From 0793a61d4df8daeac6492dbf8d2f3e5713caae5e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 4 Dec 2008 20:12:29 +0100 Subject: performance counters: core code Implement the core kernel bits of Performance Counters subsystem. The Linux Performance Counter subsystem provides an abstraction of performance counter hardware capabilities. It provides per task and per CPU counters, and it provides event capabilities on top of those. Performance counters are accessed via special file descriptors. There's one file descriptor per virtual counter used. The special file descriptor is opened via the perf_counter_open() system call: int perf_counter_open(u32 hw_event_type, u32 hw_event_period, u32 record_type, pid_t pid, int cpu); The syscall returns the new fd. The fd can be used via the normal VFS system calls: read() can be used to read the counter, fcntl() can be used to set the blocking mode, etc. Multiple counters can be kept open at a time, and the counters can be poll()ed. See more details in Documentation/perf-counters.txt. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 171 +++++++++++++++++++++++++++++++++++++++++++ include/linux/sched.h | 9 +++ include/linux/syscalls.h | 6 ++ 3 files changed, 186 insertions(+) create mode 100644 include/linux/perf_counter.h (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h new file mode 100644 index 00000000000..22c4469abf4 --- /dev/null +++ b/include/linux/perf_counter.h @@ -0,0 +1,171 @@ +/* + * Performance counters: + * + * Copyright(C) 2008, Thomas Gleixner + * Copyright(C) 2008, Red Hat, Inc., Ingo Molnar + * + * Data type definitions, declarations, prototypes. + * + * Started by: Thomas Gleixner and Ingo Molnar + * + * For licencing details see kernel-base/COPYING + */ +#ifndef _LINUX_PERF_COUNTER_H +#define _LINUX_PERF_COUNTER_H + +#include + +#include +#include +#include +#include +#include + +struct task_struct; + +/* + * Generalized hardware event types, used by the hw_event_type parameter + * of the sys_perf_counter_open() syscall: + */ +enum hw_event_types { + PERF_COUNT_CYCLES, + PERF_COUNT_INSTRUCTIONS, + PERF_COUNT_CACHE_REFERENCES, + PERF_COUNT_CACHE_MISSES, + PERF_COUNT_BRANCH_INSTRUCTIONS, + PERF_COUNT_BRANCH_MISSES, + /* + * If this bit is set in the type, then trigger NMI sampling: + */ + PERF_COUNT_NMI = (1 << 30), +}; + +/* + * IRQ-notification data record type: + */ +enum perf_record_type { + PERF_RECORD_SIMPLE, + PERF_RECORD_IRQ, + PERF_RECORD_GROUP, +}; + +/** + * struct hw_perf_counter - performance counter hardware details + */ +struct hw_perf_counter { + u64 config; + unsigned long config_base; + unsigned long counter_base; + int nmi; + unsigned int idx; + u64 prev_count; + s32 next_count; + u64 irq_period; +}; + +/* + * Hardcoded buffer length limit for now, for IRQ-fed events: + */ +#define PERF_DATA_BUFLEN 2048 + +/** + * struct perf_data - performance counter IRQ data sampling ... + */ +struct perf_data { + int len; + int rd_idx; + int overrun; + u8 data[PERF_DATA_BUFLEN]; +}; + +/** + * struct perf_counter - performance counter kernel representation: + */ +struct perf_counter { + struct list_head list; + int active; +#if BITS_PER_LONG == 64 + atomic64_t count; +#else + atomic_t count32[2]; +#endif + u64 __irq_period; + + struct hw_perf_counter hw; + + struct perf_counter_context *ctx; + struct task_struct *task; + + /* + * Protect attach/detach: + */ + struct mutex mutex; + + int oncpu; + int cpu; + + s32 hw_event_type; + enum perf_record_type record_type; + + /* read() / irq related data */ + wait_queue_head_t waitq; + /* optional: for NMIs */ + int wakeup_pending; + struct perf_data *irqdata; + struct perf_data *usrdata; + struct perf_data data[2]; +}; + +/** + * struct perf_counter_context - counter context structure + * + * Used as a container for task counters and CPU counters as well: + */ +struct perf_counter_context { +#ifdef CONFIG_PERF_COUNTERS + /* + * Protect the list of counters: + */ + spinlock_t lock; + struct list_head counters; + int nr_counters; + int nr_active; + struct task_struct *task; +#endif +}; + +/** + * struct perf_counter_cpu_context - per cpu counter context structure + */ +struct perf_cpu_context { + struct perf_counter_context ctx; + struct perf_counter_context *task_ctx; + int active_oncpu; + int max_pertask; +}; + +/* + * Set by architecture code: + */ +extern int perf_max_counters; + +#ifdef CONFIG_PERF_COUNTERS +extern void perf_counter_task_sched_in(struct task_struct *task, int cpu); +extern void perf_counter_task_sched_out(struct task_struct *task, int cpu); +extern void perf_counter_task_tick(struct task_struct *task, int cpu); +extern void perf_counter_init_task(struct task_struct *task); +extern void perf_counter_notify(struct pt_regs *regs); +extern void perf_counter_print_debug(void); +#else +static inline void +perf_counter_task_sched_in(struct task_struct *task, int cpu) { } +static inline void +perf_counter_task_sched_out(struct task_struct *task, int cpu) { } +static inline void +perf_counter_task_tick(struct task_struct *task, int cpu) { } +static inline void perf_counter_init_task(struct task_struct *task) { } +static inline void perf_counter_notify(struct pt_regs *regs) { } +static inline void perf_counter_print_debug(void) { } +#endif + +#endif /* _LINUX_PERF_COUNTER_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 55e30d11447..4c530278391 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -71,6 +71,7 @@ struct sched_param { #include #include #include +#include #include #include #include @@ -1326,6 +1327,7 @@ struct task_struct { struct list_head pi_state_list; struct futex_pi_state *pi_state_cache; #endif + struct perf_counter_context perf_counter_ctx; #ifdef CONFIG_NUMA struct mempolicy *mempolicy; short il_next; @@ -2285,6 +2287,13 @@ static inline void inc_syscw(struct task_struct *tsk) #define TASK_SIZE_OF(tsk) TASK_SIZE #endif +/* + * Call the function if the target task is executing on a CPU right now: + */ +extern void task_oncpu_function_call(struct task_struct *p, + void (*func) (void *info), void *info); + + #ifdef CONFIG_MM_OWNER extern void mm_update_next_owner(struct mm_struct *mm); extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 04fb47bfb92..6cce728a626 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -624,4 +624,10 @@ asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len); int kernel_execve(const char *filename, char *const argv[], char *const envp[]); +asmlinkage int +sys_perf_counter_open(u32 hw_event_type, + u32 hw_event_period, + u32 record_type, + pid_t pid, + int cpu); #endif -- cgit v1.2.3-70-g09d2 From 4ac13294e44664bb7edf4daf52edb71e7c6bbe84 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 9 Dec 2008 21:43:39 +0100 Subject: perf counters: protect them against CSTATE transitions Impact: fix rare lost events problem There are CPUs whose performance counters misbehave on CSTATE transitions, so provide a way to just disable/enable them around deep idle methods. (hw_perf_enable_all() is cheap on x86.) Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 14 +++++++++++++- drivers/acpi/processor_idle.c | 8 ++++++++ include/linux/perf_counter.h | 4 ++++ 3 files changed, 25 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 6a93d1f04d9..0a7f3bea2dc 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -119,10 +120,21 @@ void hw_perf_enable_all(void) wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, perf_counter_mask, 0); } -void hw_perf_disable_all(void) +void hw_perf_restore_ctrl(u64 ctrl) { + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, ctrl, 0); +} +EXPORT_SYMBOL_GPL(hw_perf_restore_ctrl); + +u64 hw_perf_disable_all(void) +{ + u64 ctrl; + + rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0, 0); + return ctrl; } +EXPORT_SYMBOL_GPL(hw_perf_disable_all); static inline void __hw_perf_counter_disable(struct hw_perf_counter *hwc, unsigned int idx) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 5f8d746a9b8..cca804e6f1d 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -270,8 +270,11 @@ static atomic_t c3_cpu_count; /* Common C-state entry for C2, C3, .. */ static void acpi_cstate_enter(struct acpi_processor_cx *cstate) { + u64 pctrl; + /* Don't trace irqs off for idle */ stop_critical_timings(); + pctrl = hw_perf_disable_all(); if (cstate->entry_method == ACPI_CSTATE_FFH) { /* Call into architectural FFH based C-state */ acpi_processor_ffh_cstate_enter(cstate); @@ -284,6 +287,7 @@ static void acpi_cstate_enter(struct acpi_processor_cx *cstate) gets asserted in time to freeze execution properly. */ unused = inl(acpi_gbl_FADT.xpm_timer_block.address); } + hw_perf_restore_ctrl(pctrl); start_critical_timings(); } #endif /* !CONFIG_CPU_IDLE */ @@ -1425,8 +1429,11 @@ static inline void acpi_idle_update_bm_rld(struct acpi_processor *pr, */ static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx) { + u64 pctrl; + /* Don't trace irqs off for idle */ stop_critical_timings(); + pctrl = hw_perf_disable_all(); if (cx->entry_method == ACPI_CSTATE_FFH) { /* Call into architectural FFH based C-state */ acpi_processor_ffh_cstate_enter(cx); @@ -1441,6 +1448,7 @@ static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx) gets asserted in time to freeze execution properly. */ unused = inl(acpi_gbl_FADT.xpm_timer_block.address); } + hw_perf_restore_ctrl(pctrl); start_critical_timings(); } diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 22c4469abf4..5031b5614f2 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -156,6 +156,8 @@ extern void perf_counter_task_tick(struct task_struct *task, int cpu); extern void perf_counter_init_task(struct task_struct *task); extern void perf_counter_notify(struct pt_regs *regs); extern void perf_counter_print_debug(void); +extern void hw_perf_restore_ctrl(u64 ctrl); +extern u64 hw_perf_disable_all(void); #else static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } @@ -166,6 +168,8 @@ perf_counter_task_tick(struct task_struct *task, int cpu) { } static inline void perf_counter_init_task(struct task_struct *task) { } static inline void perf_counter_notify(struct pt_regs *regs) { } static inline void perf_counter_print_debug(void) { } +static inline void hw_perf_restore_ctrl(u64 ctrl) { } +static inline u64 hw_perf_disable_all(void) { return 0; } #endif #endif /* _LINUX_PERF_COUNTER_H */ -- cgit v1.2.3-70-g09d2 From eab656ae04b9d3b83265e3db01c0d2c46b748ef7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 8 Dec 2008 19:26:59 +0100 Subject: perf counters: clean up 'raw' type API Impact: cleanup Introduce a separate hw_event type. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 7 +++++++ include/linux/syscalls.h | 8 +++----- kernel/perf_counter.c | 15 ++++++++------- 3 files changed, 18 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 5031b5614f2..daedd7d87c2 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -38,6 +38,7 @@ enum hw_event_types { * If this bit is set in the type, then trigger NMI sampling: */ PERF_COUNT_NMI = (1 << 30), + PERF_COUNT_RAW = (1 << 31), }; /* @@ -49,6 +50,12 @@ enum perf_record_type { PERF_RECORD_GROUP, }; +struct perf_counter_event { + u32 hw_event_type; + u32 hw_event_period; + u64 hw_raw_ctrl; +}; + /** * struct hw_perf_counter - performance counter hardware details */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 6cce728a626..3ecd73d03da 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -54,6 +54,7 @@ struct compat_stat; struct compat_timeval; struct robust_list_head; struct getcpu_cache; +struct perf_counter_event; #include #include @@ -625,9 +626,6 @@ asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len); int kernel_execve(const char *filename, char *const argv[], char *const envp[]); asmlinkage int -sys_perf_counter_open(u32 hw_event_type, - u32 hw_event_period, - u32 record_type, - pid_t pid, - int cpu); +sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type, + pid_t pid, int cpu, int masterfd); #endif diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 20508f05365..96c333a5b0f 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -734,26 +734,27 @@ perf_counter_alloc(u32 hw_event_period, int cpu, u32 record_type) * @pid: target pid */ asmlinkage int -sys_perf_counter_open(u32 hw_event_type, - u32 hw_event_period, - u32 record_type, - pid_t pid, - int cpu) +sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type, + pid_t pid, int cpu, int masterfd) { struct perf_counter_context *ctx; + struct perf_counter_event event; struct perf_counter *counter; int ret; + if (copy_from_user(&event, uevent, sizeof(event)) != 0) + return -EFAULT; + ctx = find_get_context(pid, cpu); if (IS_ERR(ctx)) return PTR_ERR(ctx); ret = -ENOMEM; - counter = perf_counter_alloc(hw_event_period, cpu, record_type); + counter = perf_counter_alloc(event.hw_event_period, cpu, record_type); if (!counter) goto err_put_context; - ret = hw_perf_counter_init(counter, hw_event_type); + ret = hw_perf_counter_init(counter, event.hw_event_type); if (ret) goto err_free_put_context; -- cgit v1.2.3-70-g09d2 From dfa7c899b401d7dc5d85aca416aee64ac82812f2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 8 Dec 2008 19:35:37 +0100 Subject: perf counters: expand use of counter->event Impact: change syscall, cleanup Make use of the new perf_counters event type. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 22 +++++++++++----------- include/linux/perf_counter.h | 4 +--- kernel/perf_counter.c | 10 +++++----- 3 files changed, 17 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 0a7f3bea2dc..30e7ebf7827 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -56,9 +56,10 @@ const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map); /* * Setup the hardware configuration for a given hw_event_type */ -int hw_perf_counter_init(struct perf_counter *counter, s32 hw_event_type) +int hw_perf_counter_init(struct perf_counter *counter) { struct hw_perf_counter *hwc = &counter->hw; + u32 hw_event_type = counter->event.hw_event_type; if (unlikely(!perf_counters_initialized)) return -EINVAL; @@ -83,7 +84,7 @@ int hw_perf_counter_init(struct perf_counter *counter, s32 hw_event_type) hwc->config_base = MSR_ARCH_PERFMON_EVENTSEL0; hwc->counter_base = MSR_ARCH_PERFMON_PERFCTR0; - hwc->irq_period = counter->__irq_period; + hwc->irq_period = counter->event.hw_event_period; /* * Intel PMCs cannot be accessed sanely above 32 bit width, * so we install an artificial 1<<31 period regardless of @@ -95,21 +96,19 @@ int hw_perf_counter_init(struct perf_counter *counter, s32 hw_event_type) hwc->next_count = -((s32) hwc->irq_period); /* - * Negative event types mean raw encoded event+umask values: + * Raw event type provide the config in the event structure */ - if (hw_event_type < 0) { - counter->hw_event_type = -hw_event_type; - counter->hw_event_type &= ~PERF_COUNT_NMI; + hw_event_type &= ~PERF_COUNT_NMI; + if (hw_event_type == PERF_COUNT_RAW) { + hwc->config |= counter->event.hw_raw_ctrl; } else { - hw_event_type &= ~PERF_COUNT_NMI; if (hw_event_type >= max_intel_perfmon_events) return -EINVAL; /* * The generic map: */ - counter->hw_event_type = intel_perfmon_event_map[hw_event_type]; + hwc->config |= intel_perfmon_event_map[hw_event_type]; } - hwc->config |= counter->hw_event_type; counter->wakeup_pending = 0; return 0; @@ -373,7 +372,7 @@ perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown) perf_save_and_restart(counter); } } - perf_store_irq_data(leader, counter->hw_event_type); + perf_store_irq_data(leader, counter->event.hw_event_type); perf_store_irq_data(leader, atomic64_counter_read(counter)); } } @@ -418,7 +417,8 @@ again: perf_store_irq_data(counter, instruction_pointer(regs)); break; case PERF_RECORD_GROUP: - perf_store_irq_data(counter, counter->hw_event_type); + perf_store_irq_data(counter, + counter->event.hw_event_type); perf_store_irq_data(counter, atomic64_counter_read(counter)); perf_handle_group(counter, &status, &ack); diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index daedd7d87c2..1f0017673e7 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -96,8 +96,7 @@ struct perf_counter { #else atomic_t count32[2]; #endif - u64 __irq_period; - + struct perf_counter_event event; struct hw_perf_counter hw; struct perf_counter_context *ctx; @@ -111,7 +110,6 @@ struct perf_counter { int oncpu; int cpu; - s32 hw_event_type; enum perf_record_type record_type; /* read() / irq related data */ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 96c333a5b0f..2557c670a3b 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -37,7 +37,7 @@ static DEFINE_MUTEX(perf_resource_mutex); * Architecture provided APIs - weak aliases: */ -int __weak hw_perf_counter_init(struct perf_counter *counter, u32 hw_event_type) +int __weak hw_perf_counter_init(struct perf_counter *counter) { return -EINVAL; } @@ -707,7 +707,7 @@ static const struct file_operations perf_fops = { * Allocate and initialize a counter structure */ static struct perf_counter * -perf_counter_alloc(u32 hw_event_period, int cpu, u32 record_type) +perf_counter_alloc(struct perf_counter_event *event, int cpu, u32 record_type) { struct perf_counter *counter = kzalloc(sizeof(*counter), GFP_KERNEL); @@ -722,7 +722,7 @@ perf_counter_alloc(u32 hw_event_period, int cpu, u32 record_type) counter->usrdata = &counter->data[1]; counter->cpu = cpu; counter->record_type = record_type; - counter->__irq_period = hw_event_period; + counter->event = *event; counter->wakeup_pending = 0; return counter; @@ -750,11 +750,11 @@ sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type, return PTR_ERR(ctx); ret = -ENOMEM; - counter = perf_counter_alloc(event.hw_event_period, cpu, record_type); + counter = perf_counter_alloc(&event, cpu, record_type); if (!counter) goto err_put_context; - ret = hw_perf_counter_init(counter, event.hw_event_type); + ret = hw_perf_counter_init(counter); if (ret) goto err_free_put_context; -- cgit v1.2.3-70-g09d2 From 9f66a3810fe0d4100972db84290f3ae4a4d77025 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 10 Dec 2008 12:33:23 +0100 Subject: perf counters: restructure the API Impact: clean up new API Thorough cleanup of the new perf counters API, we now get clean separation of the various concepts: - introduce perf_counter_hw_event to separate out the event source details - move special type flags into separate attributes: PERF_COUNT_NMI, PERF_COUNT_RAW - extend the type to u64 and reserve it fully to the architecture in the raw type case. And make use of all these changes in the core and x86 perfcounters code. Also change the syscall signature to: asmlinkage int sys_perf_counter_open( struct perf_counter_hw_event *hw_event_uptr __user, pid_t pid, int cpu, int group_fd); ( Note that group_fd is unused for now - it's reserved for the counter groups abstraction. ) Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 29 ++++++----- include/linux/perf_counter.h | 98 ++++++++++++++++++++++++-------------- include/linux/syscalls.h | 12 +++-- kernel/perf_counter.c | 38 ++++++++------- 4 files changed, 106 insertions(+), 71 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 30e7ebf7827..ef1936a871a 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -58,8 +58,8 @@ const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map); */ int hw_perf_counter_init(struct perf_counter *counter) { + struct perf_counter_hw_event *hw_event = &counter->hw_event; struct hw_perf_counter *hwc = &counter->hw; - u32 hw_event_type = counter->event.hw_event_type; if (unlikely(!perf_counters_initialized)) return -EINVAL; @@ -77,14 +77,14 @@ int hw_perf_counter_init(struct perf_counter *counter) hwc->nmi = 0; if (capable(CAP_SYS_ADMIN)) { hwc->config |= ARCH_PERFMON_EVENTSEL_OS; - if (hw_event_type & PERF_COUNT_NMI) + if (hw_event->nmi) hwc->nmi = 1; } - hwc->config_base = MSR_ARCH_PERFMON_EVENTSEL0; - hwc->counter_base = MSR_ARCH_PERFMON_PERFCTR0; + hwc->config_base = MSR_ARCH_PERFMON_EVENTSEL0; + hwc->counter_base = MSR_ARCH_PERFMON_PERFCTR0; - hwc->irq_period = counter->event.hw_event_period; + hwc->irq_period = hw_event->irq_period; /* * Intel PMCs cannot be accessed sanely above 32 bit width, * so we install an artificial 1<<31 period regardless of @@ -93,21 +93,20 @@ int hw_perf_counter_init(struct perf_counter *counter) if (!hwc->irq_period) hwc->irq_period = 0x7FFFFFFF; - hwc->next_count = -((s32) hwc->irq_period); + hwc->next_count = -(s32)hwc->irq_period; /* * Raw event type provide the config in the event structure */ - hw_event_type &= ~PERF_COUNT_NMI; - if (hw_event_type == PERF_COUNT_RAW) { - hwc->config |= counter->event.hw_raw_ctrl; + if (hw_event->raw) { + hwc->config |= hw_event->type; } else { - if (hw_event_type >= max_intel_perfmon_events) + if (hw_event->type >= max_intel_perfmon_events) return -EINVAL; /* * The generic map: */ - hwc->config |= intel_perfmon_event_map[hw_event_type]; + hwc->config |= intel_perfmon_event_map[hw_event->type]; } counter->wakeup_pending = 0; @@ -354,7 +353,7 @@ perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown) int bit; list_for_each_entry(counter, &ctx->counters, list) { - if (counter->record_type != PERF_RECORD_SIMPLE || + if (counter->hw_event.record_type != PERF_RECORD_SIMPLE || counter == leader) continue; @@ -372,7 +371,7 @@ perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown) perf_save_and_restart(counter); } } - perf_store_irq_data(leader, counter->event.hw_event_type); + perf_store_irq_data(leader, counter->hw_event.type); perf_store_irq_data(leader, atomic64_counter_read(counter)); } } @@ -410,7 +409,7 @@ again: perf_save_and_restart(counter); - switch (counter->record_type) { + switch (counter->hw_event.record_type) { case PERF_RECORD_SIMPLE: continue; case PERF_RECORD_IRQ: @@ -418,7 +417,7 @@ again: break; case PERF_RECORD_GROUP: perf_store_irq_data(counter, - counter->event.hw_event_type); + counter->hw_event.type); perf_store_irq_data(counter, atomic64_counter_read(counter)); perf_handle_group(counter, &status, &ack); diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 1f0017673e7..a2b4852e2d7 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -24,65 +24,93 @@ struct task_struct; /* - * Generalized hardware event types, used by the hw_event_type parameter - * of the sys_perf_counter_open() syscall: + * User-space ABI bits: + */ + +/* + * Generalized performance counter event types, used by the hw_event.type + * parameter of the sys_perf_counter_open() syscall: */ enum hw_event_types { - PERF_COUNT_CYCLES, - PERF_COUNT_INSTRUCTIONS, - PERF_COUNT_CACHE_REFERENCES, - PERF_COUNT_CACHE_MISSES, - PERF_COUNT_BRANCH_INSTRUCTIONS, - PERF_COUNT_BRANCH_MISSES, /* - * If this bit is set in the type, then trigger NMI sampling: + * Common hardware events, generalized by the kernel: */ - PERF_COUNT_NMI = (1 << 30), - PERF_COUNT_RAW = (1 << 31), + PERF_COUNT_CYCLES = 0, + PERF_COUNT_INSTRUCTIONS = 1, + PERF_COUNT_CACHE_REFERENCES = 2, + PERF_COUNT_CACHE_MISSES = 3, + PERF_COUNT_BRANCH_INSTRUCTIONS = 4, + PERF_COUNT_BRANCH_MISSES = 5, + + /* + * Special "software" counters provided by the kernel, even if + * the hardware does not support performance counters. These + * counters measure various physical and sw events of the + * kernel (and allow the profiling of them as well): + */ + PERF_COUNT_CPU_CLOCK = -1, + PERF_COUNT_TASK_CLOCK = -2, + PERF_COUNT_PAGE_FAULTS = -3, + PERF_COUNT_CONTEXT_SWITCHES = -4, }; /* * IRQ-notification data record type: */ -enum perf_record_type { - PERF_RECORD_SIMPLE, - PERF_RECORD_IRQ, - PERF_RECORD_GROUP, +enum perf_counter_record_type { + PERF_RECORD_SIMPLE = 0, + PERF_RECORD_IRQ = 1, + PERF_RECORD_GROUP = 2, }; -struct perf_counter_event { - u32 hw_event_type; - u32 hw_event_period; - u64 hw_raw_ctrl; +/* + * Hardware event to monitor via a performance monitoring counter: + */ +struct perf_counter_hw_event { + u64 type; + + u64 irq_period; + u32 record_type; + + u32 disabled : 1, /* off by default */ + nmi : 1, /* NMI sampling */ + raw : 1, /* raw event type */ + __reserved_1 : 29; + + u64 __reserved_2; }; +/* + * Kernel-internal data types: + */ + /** - * struct hw_perf_counter - performance counter hardware details + * struct hw_perf_counter - performance counter hardware details: */ struct hw_perf_counter { - u64 config; - unsigned long config_base; - unsigned long counter_base; - int nmi; - unsigned int idx; - u64 prev_count; - s32 next_count; - u64 irq_period; + u64 config; + unsigned long config_base; + unsigned long counter_base; + int nmi; + unsigned int idx; + u64 prev_count; + u64 irq_period; + s32 next_count; }; /* * Hardcoded buffer length limit for now, for IRQ-fed events: */ -#define PERF_DATA_BUFLEN 2048 +#define PERF_DATA_BUFLEN 2048 /** * struct perf_data - performance counter IRQ data sampling ... */ struct perf_data { - int len; - int rd_idx; - int overrun; - u8 data[PERF_DATA_BUFLEN]; + int len; + int rd_idx; + int overrun; + u8 data[PERF_DATA_BUFLEN]; }; /** @@ -96,7 +124,7 @@ struct perf_counter { #else atomic_t count32[2]; #endif - struct perf_counter_event event; + struct perf_counter_hw_event hw_event; struct hw_perf_counter hw; struct perf_counter_context *ctx; @@ -110,8 +138,6 @@ struct perf_counter { int oncpu; int cpu; - enum perf_record_type record_type; - /* read() / irq related data */ wait_queue_head_t waitq; /* optional: for NMIs */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 3ecd73d03da..a549678b7c3 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -54,7 +54,7 @@ struct compat_stat; struct compat_timeval; struct robust_list_head; struct getcpu_cache; -struct perf_counter_event; +struct perf_counter_hw_event; #include #include @@ -625,7 +625,11 @@ asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len); int kernel_execve(const char *filename, char *const argv[], char *const envp[]); -asmlinkage int -sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type, - pid_t pid, int cpu, int masterfd); + +asmlinkage int sys_perf_counter_open( + + struct perf_counter_hw_event *hw_event_uptr __user, + pid_t pid, + int cpu, + int group_fd); #endif diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 2557c670a3b..0d323ceda3a 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -669,7 +669,7 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct perf_counter *counter = file->private_data; - switch (counter->record_type) { + switch (counter->hw_event.record_type) { case PERF_RECORD_SIMPLE: return perf_read_hw(counter, buf, count); @@ -707,7 +707,7 @@ static const struct file_operations perf_fops = { * Allocate and initialize a counter structure */ static struct perf_counter * -perf_counter_alloc(struct perf_counter_event *event, int cpu, u32 record_type) +perf_counter_alloc(struct perf_counter_hw_event *hw_event, int cpu) { struct perf_counter *counter = kzalloc(sizeof(*counter), GFP_KERNEL); @@ -718,31 +718,37 @@ perf_counter_alloc(struct perf_counter_event *event, int cpu, u32 record_type) INIT_LIST_HEAD(&counter->list); init_waitqueue_head(&counter->waitq); - counter->irqdata = &counter->data[0]; - counter->usrdata = &counter->data[1]; - counter->cpu = cpu; - counter->record_type = record_type; - counter->event = *event; - counter->wakeup_pending = 0; + counter->irqdata = &counter->data[0]; + counter->usrdata = &counter->data[1]; + counter->cpu = cpu; + counter->hw_event = *hw_event; + counter->wakeup_pending = 0; return counter; } /** - * sys_perf_task_open - open a performance counter associate it to a task - * @hw_event_type: event type for monitoring/sampling... + * sys_perf_task_open - open a performance counter, associate it to a task/cpu + * + * @hw_event_uptr: event type attributes for monitoring/sampling * @pid: target pid + * @cpu: target cpu + * @group_fd: group leader counter fd */ -asmlinkage int -sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type, - pid_t pid, int cpu, int masterfd) +asmlinkage int sys_perf_counter_open( + + struct perf_counter_hw_event *hw_event_uptr __user, + pid_t pid, + int cpu, + int group_fd) + { struct perf_counter_context *ctx; - struct perf_counter_event event; + struct perf_counter_hw_event hw_event; struct perf_counter *counter; int ret; - if (copy_from_user(&event, uevent, sizeof(event)) != 0) + if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0) return -EFAULT; ctx = find_get_context(pid, cpu); @@ -750,7 +756,7 @@ sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type, return PTR_ERR(ctx); ret = -ENOMEM; - counter = perf_counter_alloc(&event, cpu, record_type); + counter = perf_counter_alloc(&hw_event, cpu); if (!counter) goto err_put_context; -- cgit v1.2.3-70-g09d2 From 04289bb9891882202d7e961c4c04d2376930e9f9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 11 Dec 2008 08:38:42 +0100 Subject: perf counters: add support for group counters Impact: add group counters This patch adds the "counter groups" abstraction. Groups of counters behave much like normal 'single' counters, with a few semantic and behavioral extensions on top of that. A counter group is created by creating a new counter with the open() syscall's group-leader group_fd file descriptor parameter pointing to another, already existing counter. Groups of counters are scheduled in and out in one atomic group, and they are also roundrobin-scheduled atomically. Counters that are member of a group can also record events with an (atomic) extended timestamp that extends to all members of the group, if the record type is set to PERF_RECORD_GROUP. Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 28 ++-- include/linux/perf_counter.h | 8 +- kernel/perf_counter.c | 282 ++++++++++++++++++++++++++++--------- 3 files changed, 236 insertions(+), 82 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index ef1936a871a..54b4ad0cce6 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -346,18 +346,22 @@ static void perf_save_and_restart(struct perf_counter *counter) } static void -perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown) +perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown) { - struct perf_counter_context *ctx = leader->ctx; - struct perf_counter *counter; + struct perf_counter *counter, *group_leader = sibling->group_leader; int bit; - list_for_each_entry(counter, &ctx->counters, list) { - if (counter->hw_event.record_type != PERF_RECORD_SIMPLE || - counter == leader) - continue; + /* + * Store the counter's own timestamp first: + */ + perf_store_irq_data(sibling, sibling->hw_event.type); + perf_store_irq_data(sibling, atomic64_counter_read(sibling)); - if (counter->active) { + /* + * Then store sibling timestamps (if any): + */ + list_for_each_entry(counter, &group_leader->sibling_list, list_entry) { + if (!counter->active) { /* * When counter was not in the overflow mask, we have to * read it from hardware. We read it as well, when it @@ -371,8 +375,8 @@ perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown) perf_save_and_restart(counter); } } - perf_store_irq_data(leader, counter->hw_event.type); - perf_store_irq_data(leader, atomic64_counter_read(counter)); + perf_store_irq_data(sibling, counter->hw_event.type); + perf_store_irq_data(sibling, atomic64_counter_read(counter)); } } @@ -416,10 +420,6 @@ again: perf_store_irq_data(counter, instruction_pointer(regs)); break; case PERF_RECORD_GROUP: - perf_store_irq_data(counter, - counter->hw_event.type); - perf_store_irq_data(counter, - atomic64_counter_read(counter)); perf_handle_group(counter, &status, &ack); break; } diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index a2b4852e2d7..7af7d896546 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -117,7 +117,10 @@ struct perf_data { * struct perf_counter - performance counter kernel representation: */ struct perf_counter { - struct list_head list; + struct list_head list_entry; + struct list_head sibling_list; + struct perf_counter *group_leader; + int active; #if BITS_PER_LONG == 64 atomic64_t count; @@ -158,7 +161,8 @@ struct perf_counter_context { * Protect the list of counters: */ spinlock_t lock; - struct list_head counters; + + struct list_head counter_list; int nr_counters; int nr_active; struct task_struct *task; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 0d323ceda3a..fa59fe8c02d 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -55,7 +56,7 @@ void __weak hw_perf_counter_setup(void) { } * Read the cached counter in counter safe against cross CPU / NMI * modifications. 64 bit version - no complications. */ -static inline u64 perf_read_counter_safe(struct perf_counter *counter) +static inline u64 perf_counter_read_safe(struct perf_counter *counter) { return (u64) atomic64_read(&counter->count); } @@ -66,7 +67,7 @@ static inline u64 perf_read_counter_safe(struct perf_counter *counter) * Read the cached counter in counter safe against cross CPU / NMI * modifications. 32 bit version. */ -static u64 perf_read_counter_safe(struct perf_counter *counter) +static u64 perf_counter_read_safe(struct perf_counter *counter) { u32 cntl, cnth; @@ -83,13 +84,55 @@ static u64 perf_read_counter_safe(struct perf_counter *counter) #endif +static void +list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) +{ + struct perf_counter *group_leader = counter->group_leader; + + /* + * Depending on whether it is a standalone or sibling counter, + * add it straight to the context's counter list, or to the group + * leader's sibling list: + */ + if (counter->group_leader == counter) + list_add_tail(&counter->list_entry, &ctx->counter_list); + else + list_add_tail(&counter->list_entry, &group_leader->sibling_list); +} + +static void +list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx) +{ + struct perf_counter *sibling, *tmp; + + list_del_init(&counter->list_entry); + + if (list_empty(&counter->sibling_list)) + return; + + /* + * If this was a group counter with sibling counters then + * upgrade the siblings to singleton counters by adding them + * to the context list directly: + */ + list_for_each_entry_safe(sibling, tmp, + &counter->sibling_list, list_entry) { + + list_del_init(&sibling->list_entry); + list_add_tail(&sibling->list_entry, &ctx->counter_list); + WARN_ON_ONCE(!sibling->group_leader); + WARN_ON_ONCE(sibling->group_leader == sibling); + sibling->group_leader = sibling; + } +} + /* * Cross CPU call to remove a performance counter * * We disable the counter on the hardware level first. After that we * remove it from the context list. */ -static void __perf_remove_from_context(void *info) +static void __perf_counter_remove_from_context(void *info) { struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_counter *counter = info; @@ -119,7 +162,7 @@ static void __perf_remove_from_context(void *info) * counters on a global level. NOP for non NMI based counters. */ hw_perf_disable_all(); - list_del_init(&counter->list); + list_del_counter(counter, ctx); hw_perf_enable_all(); if (!ctx->task) { @@ -144,7 +187,7 @@ static void __perf_remove_from_context(void *info) * CPU counters are removed with a smp call. For task counters we only * call when the task is on a CPU. */ -static void perf_remove_from_context(struct perf_counter *counter) +static void perf_counter_remove_from_context(struct perf_counter *counter) { struct perf_counter_context *ctx = counter->ctx; struct task_struct *task = ctx->task; @@ -155,32 +198,32 @@ static void perf_remove_from_context(struct perf_counter *counter) * the removal is always sucessful. */ smp_call_function_single(counter->cpu, - __perf_remove_from_context, + __perf_counter_remove_from_context, counter, 1); return; } retry: - task_oncpu_function_call(task, __perf_remove_from_context, + task_oncpu_function_call(task, __perf_counter_remove_from_context, counter); spin_lock_irq(&ctx->lock); /* * If the context is active we need to retry the smp call. */ - if (ctx->nr_active && !list_empty(&counter->list)) { + if (ctx->nr_active && !list_empty(&counter->list_entry)) { spin_unlock_irq(&ctx->lock); goto retry; } /* * The lock prevents that this context is scheduled in so we - * can remove the counter safely, if it the call above did not + * can remove the counter safely, if the call above did not * succeed. */ - if (!list_empty(&counter->list)) { + if (!list_empty(&counter->list_entry)) { ctx->nr_counters--; - list_del_init(&counter->list); + list_del_counter(counter, ctx); counter->task = NULL; } spin_unlock_irq(&ctx->lock); @@ -211,7 +254,7 @@ static void __perf_install_in_context(void *info) * counters on a global level. NOP for non NMI based counters. */ hw_perf_disable_all(); - list_add_tail(&counter->list, &ctx->counters); + list_add_counter(counter, ctx); hw_perf_enable_all(); ctx->nr_counters++; @@ -268,7 +311,7 @@ retry: * If the context is active and the counter has not been added * we need to retry the smp call. */ - if (ctx->nr_active && list_empty(&counter->list)) { + if (ctx->nr_active && list_empty(&counter->list_entry)) { spin_unlock_irq(&ctx->lock); goto retry; } @@ -278,13 +321,45 @@ retry: * can add the counter safely, if it the call above did not * succeed. */ - if (list_empty(&counter->list)) { - list_add_tail(&counter->list, &ctx->counters); + if (list_empty(&counter->list_entry)) { + list_add_counter(counter, ctx); ctx->nr_counters++; } spin_unlock_irq(&ctx->lock); } +static void +counter_sched_out(struct perf_counter *counter, + struct perf_cpu_context *cpuctx, + struct perf_counter_context *ctx) +{ + if (!counter->active) + return; + + hw_perf_counter_disable(counter); + counter->active = 0; + counter->oncpu = -1; + + cpuctx->active_oncpu--; + ctx->nr_active--; +} + +static void +group_sched_out(struct perf_counter *group_counter, + struct perf_cpu_context *cpuctx, + struct perf_counter_context *ctx) +{ + struct perf_counter *counter; + + counter_sched_out(group_counter, cpuctx, ctx); + + /* + * Schedule out siblings (if any): + */ + list_for_each_entry(counter, &group_counter->sibling_list, list_entry) + counter_sched_out(counter, cpuctx, ctx); +} + /* * Called from scheduler to remove the counters of the current task, * with interrupts disabled. @@ -306,21 +381,48 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu) return; spin_lock(&ctx->lock); - list_for_each_entry(counter, &ctx->counters, list) { - if (!ctx->nr_active) - break; - if (counter->active) { - hw_perf_counter_disable(counter); - counter->active = 0; - counter->oncpu = -1; - ctx->nr_active--; - cpuctx->active_oncpu--; - } + if (ctx->nr_active) { + list_for_each_entry(counter, &ctx->counter_list, list_entry) + group_sched_out(counter, cpuctx, ctx); } spin_unlock(&ctx->lock); cpuctx->task_ctx = NULL; } +static void +counter_sched_in(struct perf_counter *counter, + struct perf_cpu_context *cpuctx, + struct perf_counter_context *ctx, + int cpu) +{ + if (!counter->active) + return; + + hw_perf_counter_enable(counter); + counter->active = 1; + counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ + + cpuctx->active_oncpu++; + ctx->nr_active++; +} + +static void +group_sched_in(struct perf_counter *group_counter, + struct perf_cpu_context *cpuctx, + struct perf_counter_context *ctx, + int cpu) +{ + struct perf_counter *counter; + + counter_sched_in(group_counter, cpuctx, ctx, cpu); + + /* + * Schedule in siblings as one group (if any): + */ + list_for_each_entry(counter, &group_counter->sibling_list, list_entry) + counter_sched_in(counter, cpuctx, ctx, cpu); +} + /* * Called from scheduler to add the counters of the current task * with interrupts disabled. @@ -342,19 +444,21 @@ void perf_counter_task_sched_in(struct task_struct *task, int cpu) return; spin_lock(&ctx->lock); - list_for_each_entry(counter, &ctx->counters, list) { + list_for_each_entry(counter, &ctx->counter_list, list_entry) { if (ctx->nr_active == cpuctx->max_pertask) break; + + /* + * Listen to the 'cpu' scheduling filter constraint + * of counters: + */ if (counter->cpu != -1 && counter->cpu != cpu) continue; - hw_perf_counter_enable(counter); - counter->active = 1; - counter->oncpu = cpu; - ctx->nr_active++; - cpuctx->active_oncpu++; + group_sched_in(counter, cpuctx, ctx, cpu); } spin_unlock(&ctx->lock); + cpuctx->task_ctx = ctx; } @@ -371,12 +475,12 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) spin_lock(&ctx->lock); /* - * Rotate the first entry last: + * Rotate the first entry last (works just fine for group counters too): */ hw_perf_disable_all(); - list_for_each_entry(counter, &ctx->counters, list) { - list_del(&counter->list); - list_add_tail(&counter->list, &ctx->counters); + list_for_each_entry(counter, &ctx->counter_list, list_entry) { + list_del(&counter->list_entry); + list_add_tail(&counter->list_entry, &ctx->counter_list); break; } hw_perf_enable_all(); @@ -386,17 +490,24 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) perf_counter_task_sched_in(curr, cpu); } +/* + * Initialize the perf_counter context in a task_struct: + */ +static void +__perf_counter_init_context(struct perf_counter_context *ctx, + struct task_struct *task) +{ + spin_lock_init(&ctx->lock); + INIT_LIST_HEAD(&ctx->counter_list); + ctx->nr_counters = 0; + ctx->task = task; +} /* * Initialize the perf_counter context in task_struct */ void perf_counter_init_task(struct task_struct *task) { - struct perf_counter_context *ctx = &task->perf_counter_ctx; - - spin_lock_init(&ctx->lock); - INIT_LIST_HEAD(&ctx->counters); - ctx->nr_counters = 0; - ctx->task = task; + __perf_counter_init_context(&task->perf_counter_ctx, task); } /* @@ -407,7 +518,7 @@ static void __hw_perf_counter_read(void *info) hw_perf_counter_read(info); } -static u64 perf_read_counter(struct perf_counter *counter) +static u64 perf_counter_read(struct perf_counter *counter) { /* * If counter is enabled and currently active on a CPU, update the @@ -418,7 +529,7 @@ static u64 perf_read_counter(struct perf_counter *counter) __hw_perf_counter_read, counter, 1); } - return perf_read_counter_safe(counter); + return perf_counter_read_safe(counter); } /* @@ -555,7 +666,7 @@ static int perf_release(struct inode *inode, struct file *file) mutex_lock(&counter->mutex); - perf_remove_from_context(counter); + perf_counter_remove_from_context(counter); put_context(ctx); mutex_unlock(&counter->mutex); @@ -577,7 +688,7 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) return -EINVAL; mutex_lock(&counter->mutex); - cntval = perf_read_counter(counter); + cntval = perf_counter_read(counter); mutex_unlock(&counter->mutex); return put_user(cntval, (u64 __user *) buf) ? -EFAULT : sizeof(cntval); @@ -707,15 +818,25 @@ static const struct file_operations perf_fops = { * Allocate and initialize a counter structure */ static struct perf_counter * -perf_counter_alloc(struct perf_counter_hw_event *hw_event, int cpu) +perf_counter_alloc(struct perf_counter_hw_event *hw_event, + int cpu, + struct perf_counter *group_leader) { struct perf_counter *counter = kzalloc(sizeof(*counter), GFP_KERNEL); if (!counter) return NULL; + /* + * Single counters are their own group leaders, with an + * empty sibling list: + */ + if (!group_leader) + group_leader = counter; + mutex_init(&counter->mutex); - INIT_LIST_HEAD(&counter->list); + INIT_LIST_HEAD(&counter->list_entry); + INIT_LIST_HEAD(&counter->sibling_list); init_waitqueue_head(&counter->waitq); counter->irqdata = &counter->data[0]; @@ -723,6 +844,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, int cpu) counter->cpu = cpu; counter->hw_event = *hw_event; counter->wakeup_pending = 0; + counter->group_leader = group_leader; return counter; } @@ -743,20 +865,45 @@ asmlinkage int sys_perf_counter_open( int group_fd) { - struct perf_counter_context *ctx; + struct perf_counter *counter, *group_leader; struct perf_counter_hw_event hw_event; - struct perf_counter *counter; + struct perf_counter_context *ctx; + struct file *group_file = NULL; + int fput_needed = 0; int ret; if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0) return -EFAULT; + /* + * Look up the group leader: + */ + group_leader = NULL; + if (group_fd != -1) { + ret = -EINVAL; + group_file = fget_light(group_fd, &fput_needed); + if (!group_file) + goto out_fput; + if (group_file->f_op != &perf_fops) + goto out_fput; + + group_leader = group_file->private_data; + /* + * Do not allow a recursive hierarchy: + */ + if (group_leader->group_leader) + goto out_fput; + } + + /* + * Get the target context (task or percpu): + */ ctx = find_get_context(pid, cpu); if (IS_ERR(ctx)) return PTR_ERR(ctx); ret = -ENOMEM; - counter = perf_counter_alloc(&hw_event, cpu); + counter = perf_counter_alloc(&hw_event, cpu, group_leader); if (!counter) goto err_put_context; @@ -770,11 +917,14 @@ asmlinkage int sys_perf_counter_open( if (ret < 0) goto err_remove_free_put_context; +out_fput: + fput_light(group_file, fput_needed); + return ret; err_remove_free_put_context: mutex_lock(&counter->mutex); - perf_remove_from_context(counter); + perf_counter_remove_from_context(counter); mutex_unlock(&counter->mutex); err_free_put_context: @@ -783,40 +933,40 @@ err_free_put_context: err_put_context: put_context(ctx); - return ret; + goto out_fput; } -static void __cpuinit perf_init_cpu(int cpu) +static void __cpuinit perf_counter_init_cpu(int cpu) { - struct perf_cpu_context *ctx; + struct perf_cpu_context *cpuctx; - ctx = &per_cpu(perf_cpu_context, cpu); - spin_lock_init(&ctx->ctx.lock); - INIT_LIST_HEAD(&ctx->ctx.counters); + cpuctx = &per_cpu(perf_cpu_context, cpu); + __perf_counter_init_context(&cpuctx->ctx, NULL); mutex_lock(&perf_resource_mutex); - ctx->max_pertask = perf_max_counters - perf_reserved_percpu; + cpuctx->max_pertask = perf_max_counters - perf_reserved_percpu; mutex_unlock(&perf_resource_mutex); + hw_perf_counter_setup(); } #ifdef CONFIG_HOTPLUG_CPU -static void __perf_exit_cpu(void *info) +static void __perf_counter_exit_cpu(void *info) { struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_counter_context *ctx = &cpuctx->ctx; struct perf_counter *counter, *tmp; - list_for_each_entry_safe(counter, tmp, &ctx->counters, list) - __perf_remove_from_context(counter); + list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry) + __perf_counter_remove_from_context(counter); } -static void perf_exit_cpu(int cpu) +static void perf_counter_exit_cpu(int cpu) { - smp_call_function_single(cpu, __perf_exit_cpu, NULL, 1); + smp_call_function_single(cpu, __perf_counter_exit_cpu, NULL, 1); } #else -static inline void perf_exit_cpu(int cpu) { } +static inline void perf_counter_exit_cpu(int cpu) { } #endif static int __cpuinit @@ -828,12 +978,12 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: - perf_init_cpu(cpu); + perf_counter_init_cpu(cpu); break; case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: - perf_exit_cpu(cpu); + perf_counter_exit_cpu(cpu); break; default: -- cgit v1.2.3-70-g09d2 From 621a01eac89b5e2f81a4cf576568b31f40a02724 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 11 Dec 2008 12:46:46 +0100 Subject: perf counters: hw driver API Impact: restructure code, introduce hw_ops driver abstraction Introduce this abstraction to handle counter details: struct hw_perf_counter_ops { void (*hw_perf_counter_enable) (struct perf_counter *counter); void (*hw_perf_counter_disable) (struct perf_counter *counter); void (*hw_perf_counter_read) (struct perf_counter *counter); }; This will be useful to support assymetric hw details, and it will also be useful to implement "software counters". (Counters that count kernel managed sw events such as pagefaults, context-switches, wall-clock time or task-local time.) Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 37 ++++++++++++++++++++++--------- include/linux/perf_counter.h | 15 +++++++++++++ kernel/perf_counter.c | 45 ++++++++++++++++++++------------------ 3 files changed, 66 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 54b4ad0cce6..718b635dece 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -56,7 +56,7 @@ const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map); /* * Setup the hardware configuration for a given hw_event_type */ -int hw_perf_counter_init(struct perf_counter *counter) +static int __hw_perf_counter_init(struct perf_counter *counter) { struct perf_counter_hw_event *hw_event = &counter->hw_event; struct hw_perf_counter *hwc = &counter->hw; @@ -135,7 +135,7 @@ u64 hw_perf_disable_all(void) EXPORT_SYMBOL_GPL(hw_perf_disable_all); static inline void -__hw_perf_counter_disable(struct hw_perf_counter *hwc, unsigned int idx) +__x86_perf_counter_disable(struct hw_perf_counter *hwc, unsigned int idx) { wrmsr(hwc->config_base + idx, hwc->config, 0); } @@ -149,13 +149,13 @@ static void __hw_perf_counter_set_period(struct hw_perf_counter *hwc, int idx) wrmsr(hwc->counter_base + idx, hwc->next_count, 0); } -static void __hw_perf_counter_enable(struct hw_perf_counter *hwc, int idx) +static void __x86_perf_counter_enable(struct hw_perf_counter *hwc, int idx) { wrmsr(hwc->config_base + idx, hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE, 0); } -void hw_perf_counter_enable(struct perf_counter *counter) +static void x86_perf_counter_enable(struct perf_counter *counter) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); struct hw_perf_counter *hwc = &counter->hw; @@ -170,12 +170,12 @@ void hw_perf_counter_enable(struct perf_counter *counter) perf_counters_lapic_init(hwc->nmi); - __hw_perf_counter_disable(hwc, idx); + __x86_perf_counter_disable(hwc, idx); cpuc->counters[idx] = counter; __hw_perf_counter_set_period(hwc, idx); - __hw_perf_counter_enable(hwc, idx); + __x86_perf_counter_enable(hwc, idx); } #ifdef CONFIG_X86_64 @@ -282,20 +282,20 @@ void perf_counter_print_debug(void) local_irq_enable(); } -void hw_perf_counter_disable(struct perf_counter *counter) +static void x86_perf_counter_disable(struct perf_counter *counter) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); struct hw_perf_counter *hwc = &counter->hw; unsigned int idx = hwc->idx; - __hw_perf_counter_disable(hwc, idx); + __x86_perf_counter_disable(hwc, idx); clear_bit(idx, cpuc->used); cpuc->counters[idx] = NULL; __hw_perf_save_counter(counter, hwc, idx); } -void hw_perf_counter_read(struct perf_counter *counter) +static void x86_perf_counter_read(struct perf_counter *counter) { struct hw_perf_counter *hwc = &counter->hw; unsigned long addr = hwc->counter_base + hwc->idx; @@ -342,7 +342,7 @@ static void perf_save_and_restart(struct perf_counter *counter) __hw_perf_counter_set_period(hwc, idx); if (pmc_ctrl & ARCH_PERFMON_EVENTSEL0_ENABLE) - __hw_perf_counter_enable(hwc, idx); + __x86_perf_counter_enable(hwc, idx); } static void @@ -572,3 +572,20 @@ void __init init_hw_perf_counters(void) perf_counters_initialized = true; } + +static struct hw_perf_counter_ops x86_perf_counter_ops = { + .hw_perf_counter_enable = x86_perf_counter_enable, + .hw_perf_counter_disable = x86_perf_counter_disable, + .hw_perf_counter_read = x86_perf_counter_read, +}; + +struct hw_perf_counter_ops *hw_perf_counter_init(struct perf_counter *counter) +{ + int err; + + err = __hw_perf_counter_init(counter); + if (err) + return NULL; + + return &x86_perf_counter_ops; +} diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 7af7d896546..27385641ecb 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -113,6 +113,17 @@ struct perf_data { u8 data[PERF_DATA_BUFLEN]; }; +struct perf_counter; + +/** + * struct hw_perf_counter_ops - performance counter hw ops + */ +struct hw_perf_counter_ops { + void (*hw_perf_counter_enable) (struct perf_counter *counter); + void (*hw_perf_counter_disable) (struct perf_counter *counter); + void (*hw_perf_counter_read) (struct perf_counter *counter); +}; + /** * struct perf_counter - performance counter kernel representation: */ @@ -120,6 +131,7 @@ struct perf_counter { struct list_head list_entry; struct list_head sibling_list; struct perf_counter *group_leader; + struct hw_perf_counter_ops *hw_ops; int active; #if BITS_PER_LONG == 64 @@ -185,6 +197,9 @@ struct perf_cpu_context { extern int perf_max_counters; #ifdef CONFIG_PERF_COUNTERS +extern struct hw_perf_counter_ops * +hw_perf_counter_init(struct perf_counter *counter); + extern void perf_counter_task_sched_in(struct task_struct *task, int cpu); extern void perf_counter_task_sched_out(struct task_struct *task, int cpu); extern void perf_counter_task_tick(struct task_struct *task, int cpu); diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 278209c547a..e6e41ca9546 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -37,18 +37,15 @@ static DEFINE_MUTEX(perf_resource_mutex); /* * Architecture provided APIs - weak aliases: */ - -int __weak hw_perf_counter_init(struct perf_counter *counter) +extern __weak struct hw_perf_counter_ops * +hw_perf_counter_init(struct perf_counter *counter) { - return -EINVAL; + return ERR_PTR(-EINVAL); } -void __weak hw_perf_counter_enable(struct perf_counter *counter) { } -void __weak hw_perf_counter_disable(struct perf_counter *counter) { } -void __weak hw_perf_counter_read(struct perf_counter *counter) { } -void __weak hw_perf_disable_all(void) { } -void __weak hw_perf_enable_all(void) { } -void __weak hw_perf_counter_setup(void) { } +void __weak hw_perf_disable_all(void) { } +void __weak hw_perf_enable_all(void) { } +void __weak hw_perf_counter_setup(void) { } #if BITS_PER_LONG == 64 @@ -146,7 +143,7 @@ static void __perf_counter_remove_from_context(void *info) spin_lock(&ctx->lock); if (counter->active) { - hw_perf_counter_disable(counter); + counter->hw_ops->hw_perf_counter_disable(counter); counter->active = 0; ctx->nr_active--; cpuctx->active_oncpu--; @@ -257,7 +254,7 @@ static void __perf_install_in_context(void *info) ctx->nr_counters++; if (cpuctx->active_oncpu < perf_max_counters) { - hw_perf_counter_enable(counter); + counter->hw_ops->hw_perf_counter_enable(counter); counter->active = 1; counter->oncpu = cpu; ctx->nr_active++; @@ -333,7 +330,7 @@ counter_sched_out(struct perf_counter *counter, if (!counter->active) return; - hw_perf_counter_disable(counter); + counter->hw_ops->hw_perf_counter_disable(counter); counter->active = 0; counter->oncpu = -1; @@ -392,7 +389,7 @@ counter_sched_in(struct perf_counter *counter, struct perf_counter_context *ctx, int cpu) { - hw_perf_counter_enable(counter); + counter->hw_ops->hw_perf_counter_enable(counter); counter->active = 1; counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ @@ -509,7 +506,9 @@ void perf_counter_init_task(struct task_struct *task) */ static void __hw_perf_counter_read(void *info) { - hw_perf_counter_read(info); + struct perf_counter *counter = info; + + counter->hw_ops->hw_perf_counter_read(counter); } static u64 perf_counter_read(struct perf_counter *counter) @@ -816,8 +815,10 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, int cpu, struct perf_counter *group_leader) { - struct perf_counter *counter = kzalloc(sizeof(*counter), GFP_KERNEL); + struct hw_perf_counter_ops *hw_ops; + struct perf_counter *counter; + counter = kzalloc(sizeof(*counter), GFP_KERNEL); if (!counter) return NULL; @@ -839,6 +840,14 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, counter->hw_event = *hw_event; counter->wakeup_pending = 0; counter->group_leader = group_leader; + counter->hw_ops = NULL; + + hw_ops = hw_perf_counter_init(counter); + if (!hw_ops) { + kfree(counter); + return NULL; + } + counter->hw_ops = hw_ops; return counter; } @@ -908,10 +917,6 @@ asmlinkage int sys_perf_counter_open( if (!counter) goto err_put_context; - ret = hw_perf_counter_init(counter); - if (ret) - goto err_free_put_context; - perf_install_in_context(ctx, counter, cpu); ret = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0); @@ -927,8 +932,6 @@ err_remove_free_put_context: mutex_lock(&counter->mutex); perf_counter_remove_from_context(counter); mutex_unlock(&counter->mutex); - -err_free_put_context: kfree(counter); err_put_context: -- cgit v1.2.3-70-g09d2 From 5c92d12411dfe5f0f3d1b1c1e2f756245e6f7249 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 11 Dec 2008 13:21:10 +0100 Subject: perf counters: implement PERF_COUNT_CPU_CLOCK Impact: add new perf-counter type The 'CPU clock' counter counts the amount of CPU clock time that is elapsing, in nanoseconds. (regardless of how much of it the task is spending on a CPU executing) This counter type is a Linux kernel based abstraction, it is available even if the hardware does not support native hardware performance counters. Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 36 ++------------- include/linux/perf_counter.h | 9 ++-- kernel/perf_counter.c | 95 ++++++++++++++++++++++++++++++++------ 3 files changed, 92 insertions(+), 48 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 718b635dece..43c8e9a38b4 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -178,35 +178,6 @@ static void x86_perf_counter_enable(struct perf_counter *counter) __x86_perf_counter_enable(hwc, idx); } -#ifdef CONFIG_X86_64 -static inline void atomic64_counter_set(struct perf_counter *counter, u64 val) -{ - atomic64_set(&counter->count, val); -} - -static inline u64 atomic64_counter_read(struct perf_counter *counter) -{ - return atomic64_read(&counter->count); -} -#else -/* - * Todo: add proper atomic64_t support to 32-bit x86: - */ -static inline void atomic64_counter_set(struct perf_counter *counter, u64 val64) -{ - u32 *val32 = (void *)&val64; - - atomic_set(counter->count32 + 0, *(val32 + 0)); - atomic_set(counter->count32 + 1, *(val32 + 1)); -} - -static inline u64 atomic64_counter_read(struct perf_counter *counter) -{ - return atomic_read(counter->count32 + 0) | - (u64) atomic_read(counter->count32 + 1) << 32; -} -#endif - static void __hw_perf_save_counter(struct perf_counter *counter, struct hw_perf_counter *hwc, int idx) { @@ -309,7 +280,7 @@ static void x86_perf_counter_read(struct perf_counter *counter) } while (offs != hwc->prev_count); val32 = (s32) val; - val = (s64)hwc->irq_period + (s64)val32; + val = (s64)hwc->irq_period + (s64)val32; atomic64_counter_set(counter, hwc->prev_count + val); } @@ -573,13 +544,14 @@ void __init init_hw_perf_counters(void) perf_counters_initialized = true; } -static struct hw_perf_counter_ops x86_perf_counter_ops = { +static const struct hw_perf_counter_ops x86_perf_counter_ops = { .hw_perf_counter_enable = x86_perf_counter_enable, .hw_perf_counter_disable = x86_perf_counter_disable, .hw_perf_counter_read = x86_perf_counter_read, }; -struct hw_perf_counter_ops *hw_perf_counter_init(struct perf_counter *counter) +const struct hw_perf_counter_ops * +hw_perf_counter_init(struct perf_counter *counter) { int err; diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 27385641ecb..9a1713a1be2 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -131,7 +131,7 @@ struct perf_counter { struct list_head list_entry; struct list_head sibling_list; struct perf_counter *group_leader; - struct hw_perf_counter_ops *hw_ops; + const struct hw_perf_counter_ops *hw_ops; int active; #if BITS_PER_LONG == 64 @@ -197,7 +197,7 @@ struct perf_cpu_context { extern int perf_max_counters; #ifdef CONFIG_PERF_COUNTERS -extern struct hw_perf_counter_ops * +extern const struct hw_perf_counter_ops * hw_perf_counter_init(struct perf_counter *counter); extern void perf_counter_task_sched_in(struct task_struct *task, int cpu); @@ -208,6 +208,9 @@ extern void perf_counter_notify(struct pt_regs *regs); extern void perf_counter_print_debug(void); extern void hw_perf_restore_ctrl(u64 ctrl); extern u64 hw_perf_disable_all(void); +extern void atomic64_counter_set(struct perf_counter *counter, u64 val64); +extern u64 atomic64_counter_read(struct perf_counter *counter); + #else static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } @@ -219,7 +222,7 @@ static inline void perf_counter_init_task(struct task_struct *task) { } static inline void perf_counter_notify(struct pt_regs *regs) { } static inline void perf_counter_print_debug(void) { } static inline void hw_perf_restore_ctrl(u64 ctrl) { } -static inline u64 hw_perf_disable_all(void) { return 0; } +static inline u64 hw_perf_disable_all(void) { return 0; } #endif #endif /* _LINUX_PERF_COUNTER_H */ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index e6e41ca9546..506286e5ba6 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -37,15 +37,15 @@ static DEFINE_MUTEX(perf_resource_mutex); /* * Architecture provided APIs - weak aliases: */ -extern __weak struct hw_perf_counter_ops * +extern __weak const struct hw_perf_counter_ops * hw_perf_counter_init(struct perf_counter *counter) { return ERR_PTR(-EINVAL); } -void __weak hw_perf_disable_all(void) { } -void __weak hw_perf_enable_all(void) { } -void __weak hw_perf_counter_setup(void) { } +u64 __weak hw_perf_disable_all(void) { return 0; } +void __weak hw_perf_restore_ctrl(u64 ctrl) { } +void __weak hw_perf_counter_setup(void) { } #if BITS_PER_LONG == 64 @@ -58,6 +58,16 @@ static inline u64 perf_counter_read_safe(struct perf_counter *counter) return (u64) atomic64_read(&counter->count); } +void atomic64_counter_set(struct perf_counter *counter, u64 val) +{ + atomic64_set(&counter->count, val); +} + +u64 atomic64_counter_read(struct perf_counter *counter) +{ + return atomic64_read(&counter->count); +} + #else /* @@ -79,6 +89,20 @@ static u64 perf_counter_read_safe(struct perf_counter *counter) return cntl | ((u64) cnth) << 32; } +void atomic64_counter_set(struct perf_counter *counter, u64 val64) +{ + u32 *val32 = (void *)&val64; + + atomic_set(counter->count32 + 0, *(val32 + 0)); + atomic_set(counter->count32 + 1, *(val32 + 1)); +} + +u64 atomic64_counter_read(struct perf_counter *counter) +{ + return atomic_read(counter->count32 + 0) | + (u64) atomic_read(counter->count32 + 1) << 32; +} + #endif static void @@ -131,6 +155,7 @@ static void __perf_counter_remove_from_context(void *info) struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_counter *counter = info; struct perf_counter_context *ctx = counter->ctx; + u64 perf_flags; /* * If this is a task context, we need to check whether it is @@ -155,9 +180,9 @@ static void __perf_counter_remove_from_context(void *info) * Protect the list operation against NMI by disabling the * counters on a global level. NOP for non NMI based counters. */ - hw_perf_disable_all(); + perf_flags = hw_perf_disable_all(); list_del_counter(counter, ctx); - hw_perf_enable_all(); + hw_perf_restore_ctrl(perf_flags); if (!ctx->task) { /* @@ -232,6 +257,7 @@ static void __perf_install_in_context(void *info) struct perf_counter *counter = info; struct perf_counter_context *ctx = counter->ctx; int cpu = smp_processor_id(); + u64 perf_flags; /* * If this is a task context, we need to check whether it is @@ -247,9 +273,9 @@ static void __perf_install_in_context(void *info) * Protect the list operation against NMI by disabling the * counters on a global level. NOP for non NMI based counters. */ - hw_perf_disable_all(); + perf_flags = hw_perf_disable_all(); list_add_counter(counter, ctx); - hw_perf_enable_all(); + hw_perf_restore_ctrl(perf_flags); ctx->nr_counters++; @@ -457,6 +483,7 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) { struct perf_counter_context *ctx = &curr->perf_counter_ctx; struct perf_counter *counter; + u64 perf_flags; if (likely(!ctx->nr_counters)) return; @@ -468,13 +495,13 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) /* * Rotate the first entry last (works just fine for group counters too): */ - hw_perf_disable_all(); + perf_flags = hw_perf_disable_all(); list_for_each_entry(counter, &ctx->counter_list, list_entry) { list_del(&counter->list_entry); list_add_tail(&counter->list_entry, &ctx->counter_list); break; } - hw_perf_enable_all(); + hw_perf_restore_ctrl(perf_flags); spin_unlock(&ctx->lock); @@ -807,6 +834,42 @@ static const struct file_operations perf_fops = { .poll = perf_poll, }; +static void cpu_clock_perf_counter_enable(struct perf_counter *counter) +{ +} + +static void cpu_clock_perf_counter_disable(struct perf_counter *counter) +{ +} + +static void cpu_clock_perf_counter_read(struct perf_counter *counter) +{ + int cpu = raw_smp_processor_id(); + + atomic64_counter_set(counter, cpu_clock(cpu)); +} + +static const struct hw_perf_counter_ops perf_ops_cpu_clock = { + .hw_perf_counter_enable = cpu_clock_perf_counter_enable, + .hw_perf_counter_disable = cpu_clock_perf_counter_disable, + .hw_perf_counter_read = cpu_clock_perf_counter_read, +}; + +static const struct hw_perf_counter_ops * +sw_perf_counter_init(struct perf_counter *counter) +{ + const struct hw_perf_counter_ops *hw_ops = NULL; + + switch (counter->hw_event.type) { + case PERF_COUNT_CPU_CLOCK: + hw_ops = &perf_ops_cpu_clock; + break; + default: + break; + } + return hw_ops; +} + /* * Allocate and initialize a counter structure */ @@ -815,7 +878,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, int cpu, struct perf_counter *group_leader) { - struct hw_perf_counter_ops *hw_ops; + const struct hw_perf_counter_ops *hw_ops; struct perf_counter *counter; counter = kzalloc(sizeof(*counter), GFP_KERNEL); @@ -842,7 +905,13 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, counter->group_leader = group_leader; counter->hw_ops = NULL; - hw_ops = hw_perf_counter_init(counter); + hw_ops = NULL; + if (!hw_event->raw && hw_event->type < 0) + hw_ops = sw_perf_counter_init(counter); + if (!hw_ops) { + hw_ops = hw_perf_counter_init(counter); + } + if (!hw_ops) { kfree(counter); return NULL; @@ -912,7 +981,7 @@ asmlinkage int sys_perf_counter_open( goto err_put_context; } - ret = -ENOMEM; + ret = -EINVAL; counter = perf_counter_alloc(&hw_event, cpu, group_leader); if (!counter) goto err_put_context; -- cgit v1.2.3-70-g09d2 From 01b2838c4298c5e0d30b4993c195ac34dd9df61e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 11 Dec 2008 13:45:51 +0100 Subject: perf counters: consolidate hw_perf save/restore APIs Impact: cleanup Rename them to better match up the usual IRQ disable/enable APIs: hw_perf_disable_all() => hw_perf_save_disable() hw_perf_restore_ctrl() => hw_perf_restore() Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 8 ++++---- drivers/acpi/processor_idle.c | 10 +++++----- include/linux/perf_counter.h | 10 +++++----- kernel/perf_counter.c | 16 ++++++++-------- 4 files changed, 22 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 43c8e9a38b4..3e1dbebe22b 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -118,13 +118,13 @@ void hw_perf_enable_all(void) wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, perf_counter_mask, 0); } -void hw_perf_restore_ctrl(u64 ctrl) +void hw_perf_restore(u64 ctrl) { wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, ctrl, 0); } -EXPORT_SYMBOL_GPL(hw_perf_restore_ctrl); +EXPORT_SYMBOL_GPL(hw_perf_restore); -u64 hw_perf_disable_all(void) +u64 hw_perf_save_disable(void) { u64 ctrl; @@ -132,7 +132,7 @@ u64 hw_perf_disable_all(void) wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0, 0); return ctrl; } -EXPORT_SYMBOL_GPL(hw_perf_disable_all); +EXPORT_SYMBOL_GPL(hw_perf_save_disable); static inline void __x86_perf_counter_disable(struct hw_perf_counter *hwc, unsigned int idx) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index cca804e6f1d..a3e66a33b7a 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -270,11 +270,11 @@ static atomic_t c3_cpu_count; /* Common C-state entry for C2, C3, .. */ static void acpi_cstate_enter(struct acpi_processor_cx *cstate) { - u64 pctrl; + u64 perf_flags; /* Don't trace irqs off for idle */ stop_critical_timings(); - pctrl = hw_perf_disable_all(); + perf_flags = hw_perf_save_disable(); if (cstate->entry_method == ACPI_CSTATE_FFH) { /* Call into architectural FFH based C-state */ acpi_processor_ffh_cstate_enter(cstate); @@ -287,7 +287,7 @@ static void acpi_cstate_enter(struct acpi_processor_cx *cstate) gets asserted in time to freeze execution properly. */ unused = inl(acpi_gbl_FADT.xpm_timer_block.address); } - hw_perf_restore_ctrl(pctrl); + hw_perf_restore(perf_flags); start_critical_timings(); } #endif /* !CONFIG_CPU_IDLE */ @@ -1433,7 +1433,7 @@ static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx) /* Don't trace irqs off for idle */ stop_critical_timings(); - pctrl = hw_perf_disable_all(); + pctrl = hw_perf_save_disable(); if (cx->entry_method == ACPI_CSTATE_FFH) { /* Call into architectural FFH based C-state */ acpi_processor_ffh_cstate_enter(cx); @@ -1448,7 +1448,7 @@ static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx) gets asserted in time to freeze execution properly. */ unused = inl(acpi_gbl_FADT.xpm_timer_block.address); } - hw_perf_restore_ctrl(pctrl); + hw_perf_restore(pctrl); start_critical_timings(); } diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 9a1713a1be2..68f6e3ad531 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -67,7 +67,7 @@ enum perf_counter_record_type { * Hardware event to monitor via a performance monitoring counter: */ struct perf_counter_hw_event { - u64 type; + s64 type; u64 irq_period; u32 record_type; @@ -206,8 +206,8 @@ extern void perf_counter_task_tick(struct task_struct *task, int cpu); extern void perf_counter_init_task(struct task_struct *task); extern void perf_counter_notify(struct pt_regs *regs); extern void perf_counter_print_debug(void); -extern void hw_perf_restore_ctrl(u64 ctrl); -extern u64 hw_perf_disable_all(void); +extern u64 hw_perf_save_disable(void); +extern void hw_perf_restore(u64 ctrl); extern void atomic64_counter_set(struct perf_counter *counter, u64 val64); extern u64 atomic64_counter_read(struct perf_counter *counter); @@ -221,8 +221,8 @@ perf_counter_task_tick(struct task_struct *task, int cpu) { } static inline void perf_counter_init_task(struct task_struct *task) { } static inline void perf_counter_notify(struct pt_regs *regs) { } static inline void perf_counter_print_debug(void) { } -static inline void hw_perf_restore_ctrl(u64 ctrl) { } -static inline u64 hw_perf_disable_all(void) { return 0; } +static inline void hw_perf_restore(u64 ctrl) { } +static inline u64 hw_perf_save_disable(void) { return 0; } #endif #endif /* _LINUX_PERF_COUNTER_H */ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 506286e5ba6..0e93fea1712 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -43,8 +43,8 @@ hw_perf_counter_init(struct perf_counter *counter) return ERR_PTR(-EINVAL); } -u64 __weak hw_perf_disable_all(void) { return 0; } -void __weak hw_perf_restore_ctrl(u64 ctrl) { } +u64 __weak hw_perf_save_disable(void) { return 0; } +void __weak hw_perf_restore(u64 ctrl) { } void __weak hw_perf_counter_setup(void) { } #if BITS_PER_LONG == 64 @@ -180,9 +180,9 @@ static void __perf_counter_remove_from_context(void *info) * Protect the list operation against NMI by disabling the * counters on a global level. NOP for non NMI based counters. */ - perf_flags = hw_perf_disable_all(); + perf_flags = hw_perf_save_disable(); list_del_counter(counter, ctx); - hw_perf_restore_ctrl(perf_flags); + hw_perf_restore(perf_flags); if (!ctx->task) { /* @@ -273,9 +273,9 @@ static void __perf_install_in_context(void *info) * Protect the list operation against NMI by disabling the * counters on a global level. NOP for non NMI based counters. */ - perf_flags = hw_perf_disable_all(); + perf_flags = hw_perf_save_disable(); list_add_counter(counter, ctx); - hw_perf_restore_ctrl(perf_flags); + hw_perf_restore(perf_flags); ctx->nr_counters++; @@ -495,13 +495,13 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) /* * Rotate the first entry last (works just fine for group counters too): */ - perf_flags = hw_perf_disable_all(); + perf_flags = hw_perf_save_disable(); list_for_each_entry(counter, &ctx->counter_list, list_entry) { list_del(&counter->list_entry); list_add_tail(&counter->list_entry, &ctx->counter_list); break; } - hw_perf_restore_ctrl(perf_flags); + hw_perf_restore(perf_flags); spin_unlock(&ctx->lock); -- cgit v1.2.3-70-g09d2 From bae43c9945ebeef15e7952e317efb02393d3bfc7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 11 Dec 2008 14:03:20 +0100 Subject: perf counters: implement PERF_COUNT_TASK_CLOCK Impact: add new perf-counter type The 'task clock' counter counts the amount of time a task is executing, in nanoseconds. It stops ticking when a task is scheduled out either due to it blocking, sleeping or it being preempted. This counter type is a Linux kernel based abstraction, it is available even if the hardware does not support native hardware performance counters. Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 7 +++++-- kernel/perf_counter.c | 22 ++++++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 68f6e3ad531..30c0ec8c1ee 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -50,8 +50,11 @@ enum hw_event_types { */ PERF_COUNT_CPU_CLOCK = -1, PERF_COUNT_TASK_CLOCK = -2, - PERF_COUNT_PAGE_FAULTS = -3, - PERF_COUNT_CONTEXT_SWITCHES = -4, + /* + * Future software events: + */ + /* PERF_COUNT_PAGE_FAULTS = -3, + PERF_COUNT_CONTEXT_SWITCHES = -4, */ }; /* diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 0e93fea1712..a0fe8474ee2 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -855,6 +855,25 @@ static const struct hw_perf_counter_ops perf_ops_cpu_clock = { .hw_perf_counter_read = cpu_clock_perf_counter_read, }; +static void task_clock_perf_counter_enable(struct perf_counter *counter) +{ +} + +static void task_clock_perf_counter_disable(struct perf_counter *counter) +{ +} + +static void task_clock_perf_counter_read(struct perf_counter *counter) +{ + atomic64_counter_set(counter, current->se.sum_exec_runtime); +} + +static const struct hw_perf_counter_ops perf_ops_task_clock = { + .hw_perf_counter_enable = task_clock_perf_counter_enable, + .hw_perf_counter_disable = task_clock_perf_counter_disable, + .hw_perf_counter_read = task_clock_perf_counter_read, +}; + static const struct hw_perf_counter_ops * sw_perf_counter_init(struct perf_counter *counter) { @@ -864,6 +883,9 @@ sw_perf_counter_init(struct perf_counter *counter) case PERF_COUNT_CPU_CLOCK: hw_ops = &perf_ops_cpu_clock; break; + case PERF_COUNT_TASK_CLOCK: + hw_ops = &perf_ops_task_clock; + break; default: break; } -- cgit v1.2.3-70-g09d2 From 1d1c7ddbfab358445a542715551301b7fc363e28 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 11 Dec 2008 14:59:31 +0100 Subject: perf counters: add prctl interface to disable/enable counters Add a way for self-monitoring tasks to disable/enable counters summarily, via a prctl: PR_TASK_PERF_COUNTERS_DISABLE 31 PR_TASK_PERF_COUNTERS_ENABLE 32 Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 4 +++ include/linux/prctl.h | 3 ++ kernel/perf_counter.c | 86 ++++++++++++++++++++++++++++++++++++++++---- kernel/sys.c | 7 ++++ 4 files changed, 93 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 30c0ec8c1ee..97d86c293ee 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -213,6 +213,8 @@ extern u64 hw_perf_save_disable(void); extern void hw_perf_restore(u64 ctrl); extern void atomic64_counter_set(struct perf_counter *counter, u64 val64); extern u64 atomic64_counter_read(struct perf_counter *counter); +extern int perf_counter_task_disable(void); +extern int perf_counter_task_enable(void); #else static inline void @@ -226,6 +228,8 @@ static inline void perf_counter_notify(struct pt_regs *regs) { } static inline void perf_counter_print_debug(void) { } static inline void hw_perf_restore(u64 ctrl) { } static inline u64 hw_perf_save_disable(void) { return 0; } +static inline int perf_counter_task_disable(void) { return -EINVAL; } +static inline int perf_counter_task_enable(void) { return -EINVAL; } #endif #endif /* _LINUX_PERF_COUNTER_H */ diff --git a/include/linux/prctl.h b/include/linux/prctl.h index 48d887e3c6e..b00df4c79c6 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -85,4 +85,7 @@ #define PR_SET_TIMERSLACK 29 #define PR_GET_TIMERSLACK 30 +#define PR_TASK_PERF_COUNTERS_DISABLE 31 +#define PR_TASK_PERF_COUNTERS_ENABLE 32 + #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index a0fe8474ee2..4e679b91d8b 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -415,6 +415,9 @@ counter_sched_in(struct perf_counter *counter, struct perf_counter_context *ctx, int cpu) { + if (counter->active == -1) + return; + counter->hw_ops->hw_perf_counter_enable(counter); counter->active = 1; counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ @@ -479,6 +482,79 @@ void perf_counter_task_sched_in(struct task_struct *task, int cpu) cpuctx->task_ctx = ctx; } +int perf_counter_task_disable(void) +{ + struct task_struct *curr = current; + struct perf_counter_context *ctx = &curr->perf_counter_ctx; + struct perf_counter *counter; + u64 perf_flags; + int cpu; + + if (likely(!ctx->nr_counters)) + return 0; + + local_irq_disable(); + cpu = smp_processor_id(); + + perf_counter_task_sched_out(curr, cpu); + + spin_lock(&ctx->lock); + + /* + * Disable all the counters: + */ + perf_flags = hw_perf_save_disable(); + + list_for_each_entry(counter, &ctx->counter_list, list_entry) { + WARN_ON_ONCE(counter->active == 1); + counter->active = -1; + } + hw_perf_restore(perf_flags); + + spin_unlock(&ctx->lock); + + local_irq_enable(); + + return 0; +} + +int perf_counter_task_enable(void) +{ + struct task_struct *curr = current; + struct perf_counter_context *ctx = &curr->perf_counter_ctx; + struct perf_counter *counter; + u64 perf_flags; + int cpu; + + if (likely(!ctx->nr_counters)) + return 0; + + local_irq_disable(); + cpu = smp_processor_id(); + + spin_lock(&ctx->lock); + + /* + * Disable all the counters: + */ + perf_flags = hw_perf_save_disable(); + + list_for_each_entry(counter, &ctx->counter_list, list_entry) { + if (counter->active != -1) + continue; + counter->active = 0; + } + hw_perf_restore(perf_flags); + + spin_unlock(&ctx->lock); + + perf_counter_task_sched_in(curr, cpu); + + local_irq_enable(); + + return 0; +} + void perf_counter_task_tick(struct task_struct *curr, int cpu) { struct perf_counter_context *ctx = &curr->perf_counter_ctx; @@ -951,13 +1027,9 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, * @cpu: target cpu * @group_fd: group leader counter fd */ -asmlinkage int sys_perf_counter_open( - - struct perf_counter_hw_event *hw_event_uptr __user, - pid_t pid, - int cpu, - int group_fd) - +asmlinkage int +sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user, + pid_t pid, int cpu, int group_fd) { struct perf_counter *counter, *group_leader; struct perf_counter_hw_event hw_event; diff --git a/kernel/sys.c b/kernel/sys.c index 31deba8f7d1..0f66633be31 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -1716,6 +1717,12 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, case PR_SET_TSC: error = SET_TSC_CTL(arg2); break; + case PR_TASK_PERF_COUNTERS_DISABLE: + error = perf_counter_task_disable(); + break; + case PR_TASK_PERF_COUNTERS_ENABLE: + error = perf_counter_task_enable(); + break; case PR_GET_TIMERSLACK: error = current->timer_slack_ns; break; -- cgit v1.2.3-70-g09d2 From 6a930700c8b655a9e25e42fc4adc0b225ebbcefc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 11 Dec 2008 15:17:03 +0100 Subject: perf counters: clean up state transitions Impact: cleanup Introduce a proper enum for the 3 states of a counter: PERF_COUNTER_STATE_OFF = -1 PERF_COUNTER_STATE_INACTIVE = 0 PERF_COUNTER_STATE_ACTIVE = 1 and rename counter->active to counter->state and propagate the changes everywhere. Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 2 +- include/linux/perf_counter.h | 11 ++++++++++- kernel/perf_counter.c | 29 ++++++++++++++--------------- 3 files changed, 25 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 3e1dbebe22b..4854cca7fff 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -332,7 +332,7 @@ perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown) * Then store sibling timestamps (if any): */ list_for_each_entry(counter, &group_leader->sibling_list, list_entry) { - if (!counter->active) { + if (counter->state != PERF_COUNTER_STATE_ACTIVE) { /* * When counter was not in the overflow mask, we have to * read it from hardware. We read it as well, when it diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 97d86c293ee..8cb095fa442 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -127,6 +127,15 @@ struct hw_perf_counter_ops { void (*hw_perf_counter_read) (struct perf_counter *counter); }; +/** + * enum perf_counter_active_state - the states of a counter + */ +enum perf_counter_active_state { + PERF_COUNTER_STATE_OFF = -1, + PERF_COUNTER_STATE_INACTIVE = 0, + PERF_COUNTER_STATE_ACTIVE = 1, +}; + /** * struct perf_counter - performance counter kernel representation: */ @@ -136,7 +145,7 @@ struct perf_counter { struct perf_counter *group_leader; const struct hw_perf_counter_ops *hw_ops; - int active; + enum perf_counter_active_state state; #if BITS_PER_LONG == 64 atomic64_t count; #else diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 4e679b91d8b..559130b8774 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -167,9 +167,9 @@ static void __perf_counter_remove_from_context(void *info) spin_lock(&ctx->lock); - if (counter->active) { + if (counter->state == PERF_COUNTER_STATE_ACTIVE) { counter->hw_ops->hw_perf_counter_disable(counter); - counter->active = 0; + counter->state = PERF_COUNTER_STATE_INACTIVE; ctx->nr_active--; cpuctx->active_oncpu--; counter->task = NULL; @@ -281,7 +281,7 @@ static void __perf_install_in_context(void *info) if (cpuctx->active_oncpu < perf_max_counters) { counter->hw_ops->hw_perf_counter_enable(counter); - counter->active = 1; + counter->state = PERF_COUNTER_STATE_ACTIVE; counter->oncpu = cpu; ctx->nr_active++; cpuctx->active_oncpu++; @@ -328,7 +328,6 @@ retry: spin_lock_irq(&ctx->lock); /* - * If the context is active and the counter has not been added * we need to retry the smp call. */ if (ctx->nr_active && list_empty(&counter->list_entry)) { @@ -353,12 +352,12 @@ counter_sched_out(struct perf_counter *counter, struct perf_cpu_context *cpuctx, struct perf_counter_context *ctx) { - if (!counter->active) + if (counter->state != PERF_COUNTER_STATE_ACTIVE) return; counter->hw_ops->hw_perf_counter_disable(counter); - counter->active = 0; - counter->oncpu = -1; + counter->state = PERF_COUNTER_STATE_INACTIVE; + counter->oncpu = -1; cpuctx->active_oncpu--; ctx->nr_active--; @@ -415,11 +414,11 @@ counter_sched_in(struct perf_counter *counter, struct perf_counter_context *ctx, int cpu) { - if (counter->active == -1) + if (counter->state == PERF_COUNTER_STATE_OFF) return; counter->hw_ops->hw_perf_counter_enable(counter); - counter->active = 1; + counter->state = PERF_COUNTER_STATE_ACTIVE; counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ cpuctx->active_oncpu++; @@ -506,8 +505,8 @@ int perf_counter_task_disable(void) perf_flags = hw_perf_save_disable(); list_for_each_entry(counter, &ctx->counter_list, list_entry) { - WARN_ON_ONCE(counter->active == 1); - counter->active = -1; + WARN_ON_ONCE(counter->state == PERF_COUNTER_STATE_ACTIVE); + counter->state = PERF_COUNTER_STATE_OFF; } hw_perf_restore(perf_flags); @@ -540,9 +539,9 @@ int perf_counter_task_enable(void) perf_flags = hw_perf_save_disable(); list_for_each_entry(counter, &ctx->counter_list, list_entry) { - if (counter->active != -1) + if (counter->state != PERF_COUNTER_STATE_OFF) continue; - counter->active = 0; + counter->state = PERF_COUNTER_STATE_INACTIVE; } hw_perf_restore(perf_flags); @@ -620,7 +619,7 @@ static u64 perf_counter_read(struct perf_counter *counter) * If counter is enabled and currently active on a CPU, update the * value in the counter structure: */ - if (counter->active) { + if (counter->state == PERF_COUNTER_STATE_ACTIVE) { smp_call_function_single(counter->oncpu, __hw_perf_counter_read, counter, 1); } @@ -673,7 +672,7 @@ static struct perf_data *perf_switch_irq_data(struct perf_counter *counter) retry: spin_lock_irq(&ctx->lock); - if (!counter->active) { + if (counter->state != PERF_COUNTER_STATE_ACTIVE) { counter->irqdata = counter->usrdata; counter->usrdata = oldirqdata; spin_unlock_irq(&ctx->lock); -- cgit v1.2.3-70-g09d2 From ee06094f8279e1312fc0a31591320cc7b6f0ab1e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 13 Dec 2008 09:00:03 +0100 Subject: perfcounters: restructure x86 counter math Impact: restructure code Change counter math from absolute values to clear delta logic. We try to extract elapsed deltas from the raw hw counter - and put that into the generic counter. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- arch/x86/kernel/cpu/perf_counter.c | 230 ++++++++++++++++++++----------------- include/linux/perf_counter.h | 15 ++- kernel/perf_counter.c | 68 +---------- 4 files changed, 137 insertions(+), 178 deletions(-) (limited to 'include') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f2fdc186724..fe94490bab6 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -643,7 +643,7 @@ config X86_UP_IOAPIC config X86_LOCAL_APIC def_bool y depends on X86_64 || (X86_32 && (X86_UP_APIC || (SMP && !X86_VOYAGER) || X86_GENERICARCH)) - select HAVE_PERF_COUNTERS + select HAVE_PERF_COUNTERS if (!M386 && !M486) config X86_IO_APIC def_bool y diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index b903f8df72b..5afae13d8d5 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -53,6 +53,48 @@ const int intel_perfmon_event_map[] = const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map); +/* + * Propagate counter elapsed time into the generic counter. + * Can only be executed on the CPU where the counter is active. + * Returns the delta events processed. + */ +static void +x86_perf_counter_update(struct perf_counter *counter, + struct hw_perf_counter *hwc, int idx) +{ + u64 prev_raw_count, new_raw_count, delta; + + WARN_ON_ONCE(counter->state != PERF_COUNTER_STATE_ACTIVE); + /* + * Careful: an NMI might modify the previous counter value. + * + * Our tactic to handle this is to first atomically read and + * exchange a new raw count - then add that new-prev delta + * count to the generic counter atomically: + */ +again: + prev_raw_count = atomic64_read(&hwc->prev_count); + rdmsrl(hwc->counter_base + idx, new_raw_count); + + if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count) + goto again; + + /* + * Now we have the new raw value and have updated the prev + * timestamp already. We can now calculate the elapsed delta + * (counter-)time and add that to the generic counter. + * + * Careful, not all hw sign-extends above the physical width + * of the count, so we do that by clipping the delta to 32 bits: + */ + delta = (u64)(u32)((s32)new_raw_count - (s32)prev_raw_count); + WARN_ON_ONCE((int)delta < 0); + + atomic64_add(delta, &counter->count); + atomic64_sub(delta, &hwc->period_left); +} + /* * Setup the hardware configuration for a given hw_event_type */ @@ -90,10 +132,10 @@ static int __hw_perf_counter_init(struct perf_counter *counter) * so we install an artificial 1<<31 period regardless of * the generic counter period: */ - if (!hwc->irq_period) + if ((s64)hwc->irq_period <= 0 || hwc->irq_period > 0x7FFFFFFF) hwc->irq_period = 0x7FFFFFFF; - hwc->next_count = -(s32)hwc->irq_period; + atomic64_set(&hwc->period_left, hwc->irq_period); /* * Raw event type provide the config in the event structure @@ -118,12 +160,6 @@ void hw_perf_enable_all(void) wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, perf_counter_mask, 0); } -void hw_perf_restore(u64 ctrl) -{ - wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, ctrl, 0); -} -EXPORT_SYMBOL_GPL(hw_perf_restore); - u64 hw_perf_save_disable(void) { u64 ctrl; @@ -134,27 +170,74 @@ u64 hw_perf_save_disable(void) } EXPORT_SYMBOL_GPL(hw_perf_save_disable); +void hw_perf_restore(u64 ctrl) +{ + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, ctrl, 0); +} +EXPORT_SYMBOL_GPL(hw_perf_restore); + static inline void -__x86_perf_counter_disable(struct hw_perf_counter *hwc, unsigned int idx) +__x86_perf_counter_disable(struct perf_counter *counter, + struct hw_perf_counter *hwc, unsigned int idx) { - wrmsr(hwc->config_base + idx, hwc->config, 0); + int err; + + err = wrmsr_safe(hwc->config_base + idx, hwc->config, 0); + WARN_ON_ONCE(err); } -static DEFINE_PER_CPU(u64, prev_next_count[MAX_HW_COUNTERS]); +static DEFINE_PER_CPU(u64, prev_left[MAX_HW_COUNTERS]); -static void __hw_perf_counter_set_period(struct hw_perf_counter *hwc, int idx) +/* + * Set the next IRQ period, based on the hwc->period_left value. + * To be called with the counter disabled in hw: + */ +static void +__hw_perf_counter_set_period(struct perf_counter *counter, + struct hw_perf_counter *hwc, int idx) { - per_cpu(prev_next_count[idx], smp_processor_id()) = hwc->next_count; + s32 left = atomic64_read(&hwc->period_left); + s32 period = hwc->irq_period; + + WARN_ON_ONCE(period <= 0); + + /* + * If we are way outside a reasoable range then just skip forward: + */ + if (unlikely(left <= -period)) { + left = period; + atomic64_set(&hwc->period_left, left); + } + + if (unlikely(left <= 0)) { + left += period; + atomic64_set(&hwc->period_left, left); + } - wrmsr(hwc->counter_base + idx, hwc->next_count, 0); + WARN_ON_ONCE(left <= 0); + + per_cpu(prev_left[idx], smp_processor_id()) = left; + + /* + * The hw counter starts counting from this counter offset, + * mark it to be able to extra future deltas: + */ + atomic64_set(&hwc->prev_count, (u64)(s64)-left); + + wrmsr(hwc->counter_base + idx, -left, 0); } -static void __x86_perf_counter_enable(struct hw_perf_counter *hwc, int idx) +static void +__x86_perf_counter_enable(struct perf_counter *counter, + struct hw_perf_counter *hwc, int idx) { wrmsr(hwc->config_base + idx, hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE, 0); } +/* + * Find a PMC slot for the freshly enabled / scheduled in counter: + */ static void x86_perf_counter_enable(struct perf_counter *counter) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); @@ -170,55 +253,17 @@ static void x86_perf_counter_enable(struct perf_counter *counter) perf_counters_lapic_init(hwc->nmi); - __x86_perf_counter_disable(hwc, idx); + __x86_perf_counter_disable(counter, hwc, idx); cpuc->counters[idx] = counter; - __hw_perf_counter_set_period(hwc, idx); - __x86_perf_counter_enable(hwc, idx); -} - -static void __hw_perf_save_counter(struct perf_counter *counter, - struct hw_perf_counter *hwc, int idx) -{ - s64 raw = -1; - s64 delta; - - /* - * Get the raw hw counter value: - */ - rdmsrl(hwc->counter_base + idx, raw); - - /* - * Rebase it to zero (it started counting at -irq_period), - * to see the delta since ->prev_count: - */ - delta = (s64)hwc->irq_period + (s64)(s32)raw; - - atomic64_counter_set(counter, hwc->prev_count + delta); - - /* - * Adjust the ->prev_count offset - if we went beyond - * irq_period of units, then we got an IRQ and the counter - * was set back to -irq_period: - */ - while (delta >= (s64)hwc->irq_period) { - hwc->prev_count += hwc->irq_period; - delta -= (s64)hwc->irq_period; - } - - /* - * Calculate the next raw counter value we'll write into - * the counter at the next sched-in time: - */ - delta -= (s64)hwc->irq_period; - - hwc->next_count = (s32)delta; + __hw_perf_counter_set_period(counter, hwc, idx); + __x86_perf_counter_enable(counter, hwc, idx); } void perf_counter_print_debug(void) { - u64 ctrl, status, overflow, pmc_ctrl, pmc_count, next_count; + u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left; int cpu, idx; if (!nr_hw_counters) @@ -241,14 +286,14 @@ void perf_counter_print_debug(void) rdmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, pmc_ctrl); rdmsrl(MSR_ARCH_PERFMON_PERFCTR0 + idx, pmc_count); - next_count = per_cpu(prev_next_count[idx], cpu); + prev_left = per_cpu(prev_left[idx], cpu); printk(KERN_INFO "CPU#%d: PMC%d ctrl: %016llx\n", cpu, idx, pmc_ctrl); printk(KERN_INFO "CPU#%d: PMC%d count: %016llx\n", cpu, idx, pmc_count); - printk(KERN_INFO "CPU#%d: PMC%d next: %016llx\n", - cpu, idx, next_count); + printk(KERN_INFO "CPU#%d: PMC%d left: %016llx\n", + cpu, idx, prev_left); } local_irq_enable(); } @@ -259,29 +304,16 @@ static void x86_perf_counter_disable(struct perf_counter *counter) struct hw_perf_counter *hwc = &counter->hw; unsigned int idx = hwc->idx; - __x86_perf_counter_disable(hwc, idx); + __x86_perf_counter_disable(counter, hwc, idx); clear_bit(idx, cpuc->used); cpuc->counters[idx] = NULL; - __hw_perf_save_counter(counter, hwc, idx); -} -static void x86_perf_counter_read(struct perf_counter *counter) -{ - struct hw_perf_counter *hwc = &counter->hw; - unsigned long addr = hwc->counter_base + hwc->idx; - s64 offs, val = -1LL; - s32 val32; - - /* Careful: NMI might modify the counter offset */ - do { - offs = hwc->prev_count; - rdmsrl(addr, val); - } while (offs != hwc->prev_count); - - val32 = (s32) val; - val = (s64)hwc->irq_period + (s64)val32; - atomic64_counter_set(counter, hwc->prev_count + val); + /* + * Drain the remaining delta count out of a counter + * that we are disabling: + */ + x86_perf_counter_update(counter, hwc, idx); } static void perf_store_irq_data(struct perf_counter *counter, u64 data) @@ -299,7 +331,8 @@ static void perf_store_irq_data(struct perf_counter *counter, u64 data) } /* - * NMI-safe enable method: + * Save and restart an expired counter. Called by NMI contexts, + * so it has to be careful about preempting normal counter ops: */ static void perf_save_and_restart(struct perf_counter *counter) { @@ -309,45 +342,25 @@ static void perf_save_and_restart(struct perf_counter *counter) rdmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, pmc_ctrl); - __hw_perf_save_counter(counter, hwc, idx); - __hw_perf_counter_set_period(hwc, idx); + x86_perf_counter_update(counter, hwc, idx); + __hw_perf_counter_set_period(counter, hwc, idx); if (pmc_ctrl & ARCH_PERFMON_EVENTSEL0_ENABLE) - __x86_perf_counter_enable(hwc, idx); + __x86_perf_counter_enable(counter, hwc, idx); } static void perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown) { struct perf_counter *counter, *group_leader = sibling->group_leader; - int bit; - - /* - * Store the counter's own timestamp first: - */ - perf_store_irq_data(sibling, sibling->hw_event.type); - perf_store_irq_data(sibling, atomic64_counter_read(sibling)); /* - * Then store sibling timestamps (if any): + * Store sibling timestamps (if any): */ list_for_each_entry(counter, &group_leader->sibling_list, list_entry) { - if (counter->state != PERF_COUNTER_STATE_ACTIVE) { - /* - * When counter was not in the overflow mask, we have to - * read it from hardware. We read it as well, when it - * has not been read yet and clear the bit in the - * status mask. - */ - bit = counter->hw.idx; - if (!test_bit(bit, (unsigned long *) overflown) || - test_bit(bit, (unsigned long *) status)) { - clear_bit(bit, (unsigned long *) status); - perf_save_and_restart(counter); - } - } + x86_perf_counter_update(counter, &counter->hw, counter->hw.idx); perf_store_irq_data(sibling, counter->hw_event.type); - perf_store_irq_data(sibling, atomic64_counter_read(counter)); + perf_store_irq_data(sibling, atomic64_read(&counter->count)); } } @@ -540,6 +553,11 @@ void __init init_hw_perf_counters(void) perf_counters_initialized = true; } +static void x86_perf_counter_read(struct perf_counter *counter) +{ + x86_perf_counter_update(counter, &counter->hw, counter->hw.idx); +} + static const struct hw_perf_counter_ops x86_perf_counter_ops = { .hw_perf_counter_enable = x86_perf_counter_enable, .hw_perf_counter_disable = x86_perf_counter_disable, diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 8cb095fa442..72460289c65 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -91,14 +91,16 @@ struct perf_counter_hw_event { * struct hw_perf_counter - performance counter hardware details: */ struct hw_perf_counter { +#ifdef CONFIG_PERF_COUNTERS u64 config; unsigned long config_base; unsigned long counter_base; int nmi; unsigned int idx; - u64 prev_count; + atomic64_t prev_count; u64 irq_period; - s32 next_count; + atomic64_t period_left; +#endif }; /* @@ -140,17 +142,15 @@ enum perf_counter_active_state { * struct perf_counter - performance counter kernel representation: */ struct perf_counter { +#ifdef CONFIG_PERF_COUNTERS struct list_head list_entry; struct list_head sibling_list; struct perf_counter *group_leader; const struct hw_perf_counter_ops *hw_ops; enum perf_counter_active_state state; -#if BITS_PER_LONG == 64 atomic64_t count; -#else - atomic_t count32[2]; -#endif + struct perf_counter_hw_event hw_event; struct hw_perf_counter hw; @@ -172,6 +172,7 @@ struct perf_counter { struct perf_data *irqdata; struct perf_data *usrdata; struct perf_data data[2]; +#endif }; /** @@ -220,8 +221,6 @@ extern void perf_counter_notify(struct pt_regs *regs); extern void perf_counter_print_debug(void); extern u64 hw_perf_save_disable(void); extern void hw_perf_restore(u64 ctrl); -extern void atomic64_counter_set(struct perf_counter *counter, u64 val64); -extern u64 atomic64_counter_read(struct perf_counter *counter); extern int perf_counter_task_disable(void); extern int perf_counter_task_enable(void); diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 559130b8774..416861ce8b2 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -44,67 +44,9 @@ hw_perf_counter_init(struct perf_counter *counter) } u64 __weak hw_perf_save_disable(void) { return 0; } -void __weak hw_perf_restore(u64 ctrl) { } +void __weak hw_perf_restore(u64 ctrl) { } void __weak hw_perf_counter_setup(void) { } -#if BITS_PER_LONG == 64 - -/* - * Read the cached counter in counter safe against cross CPU / NMI - * modifications. 64 bit version - no complications. - */ -static inline u64 perf_counter_read_safe(struct perf_counter *counter) -{ - return (u64) atomic64_read(&counter->count); -} - -void atomic64_counter_set(struct perf_counter *counter, u64 val) -{ - atomic64_set(&counter->count, val); -} - -u64 atomic64_counter_read(struct perf_counter *counter) -{ - return atomic64_read(&counter->count); -} - -#else - -/* - * Read the cached counter in counter safe against cross CPU / NMI - * modifications. 32 bit version. - */ -static u64 perf_counter_read_safe(struct perf_counter *counter) -{ - u32 cntl, cnth; - - local_irq_disable(); - do { - cnth = atomic_read(&counter->count32[1]); - cntl = atomic_read(&counter->count32[0]); - } while (cnth != atomic_read(&counter->count32[1])); - - local_irq_enable(); - - return cntl | ((u64) cnth) << 32; -} - -void atomic64_counter_set(struct perf_counter *counter, u64 val64) -{ - u32 *val32 = (void *)&val64; - - atomic_set(counter->count32 + 0, *(val32 + 0)); - atomic_set(counter->count32 + 1, *(val32 + 1)); -} - -u64 atomic64_counter_read(struct perf_counter *counter) -{ - return atomic_read(counter->count32 + 0) | - (u64) atomic_read(counter->count32 + 1) << 32; -} - -#endif - static void list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) { @@ -280,11 +222,11 @@ static void __perf_install_in_context(void *info) ctx->nr_counters++; if (cpuctx->active_oncpu < perf_max_counters) { - counter->hw_ops->hw_perf_counter_enable(counter); counter->state = PERF_COUNTER_STATE_ACTIVE; counter->oncpu = cpu; ctx->nr_active++; cpuctx->active_oncpu++; + counter->hw_ops->hw_perf_counter_enable(counter); } if (!ctx->task && cpuctx->max_pertask) @@ -624,7 +566,7 @@ static u64 perf_counter_read(struct perf_counter *counter) __hw_perf_counter_read, counter, 1); } - return perf_counter_read_safe(counter); + return atomic64_read(&counter->count); } /* @@ -921,7 +863,7 @@ static void cpu_clock_perf_counter_read(struct perf_counter *counter) { int cpu = raw_smp_processor_id(); - atomic64_counter_set(counter, cpu_clock(cpu)); + atomic64_set(&counter->count, cpu_clock(cpu)); } static const struct hw_perf_counter_ops perf_ops_cpu_clock = { @@ -940,7 +882,7 @@ static void task_clock_perf_counter_disable(struct perf_counter *counter) static void task_clock_perf_counter_read(struct perf_counter *counter) { - atomic64_counter_set(counter, current->se.sum_exec_runtime); + atomic64_set(&counter->count, current->se.sum_exec_runtime); } static const struct hw_perf_counter_ops perf_ops_task_clock = { -- cgit v1.2.3-70-g09d2 From 9b51f66dcb09ac5eb6bc68fc111d5c7a1e0131d6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 12 Dec 2008 13:49:45 +0100 Subject: perfcounters: implement "counter inheritance" Impact: implement new performance feature Counter inheritance can be used to run performance counters in a workload, transparently - and pipe back the counter results to the parent counter. Inheritance for performance counters works the following way: when creating a counter it can be marked with the .inherit=1 flag. Such counters are then 'inherited' by all child tasks (be they fork()-ed or clone()-ed). These counters get inherited through exec() boundaries as well (except through setuid boundaries). The counter values get added back to the parent counter(s) when the child task(s) exit - much like stime/utime statistics are gathered. So inherited counters are ideal to gather summary statistics about an application's behavior via shell commands, without having to modify that application. The timec.c command utilizes counter inheritance: http://redhat.com/~mingo/perfcounters/timec.c Sample output: $ ./timec -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null Performance counter stats for 'ls': 163516953 instructions 2295 cache-misses 2855182 branch-misses Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 24 +++-- kernel/exit.c | 7 +- kernel/perf_counter.c | 248 +++++++++++++++++++++++++++++++++++-------- 3 files changed, 228 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 72460289c65..e5d25bf8f74 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -75,10 +75,11 @@ struct perf_counter_hw_event { u64 irq_period; u32 record_type; - u32 disabled : 1, /* off by default */ - nmi : 1, /* NMI sampling */ - raw : 1, /* raw event type */ - __reserved_1 : 29; + u32 disabled : 1, /* off by default */ + nmi : 1, /* NMI sampling */ + raw : 1, /* raw event type */ + inherit : 1, /* children inherit it */ + __reserved_1 : 28; u64 __reserved_2; }; @@ -138,6 +139,8 @@ enum perf_counter_active_state { PERF_COUNTER_STATE_ACTIVE = 1, }; +struct file; + /** * struct perf_counter - performance counter kernel representation: */ @@ -156,7 +159,10 @@ struct perf_counter { struct perf_counter_context *ctx; struct task_struct *task; + struct file *filp; + unsigned int nr_inherited; + struct perf_counter *parent; /* * Protect attach/detach: */ @@ -210,13 +216,16 @@ struct perf_cpu_context { extern int perf_max_counters; #ifdef CONFIG_PERF_COUNTERS +extern void +perf_counter_show(struct perf_counter *counter, char *str, int trace); extern const struct hw_perf_counter_ops * hw_perf_counter_init(struct perf_counter *counter); extern void perf_counter_task_sched_in(struct task_struct *task, int cpu); extern void perf_counter_task_sched_out(struct task_struct *task, int cpu); extern void perf_counter_task_tick(struct task_struct *task, int cpu); -extern void perf_counter_init_task(struct task_struct *task); +extern void perf_counter_init_task(struct task_struct *child); +extern void perf_counter_exit_task(struct task_struct *child); extern void perf_counter_notify(struct pt_regs *regs); extern void perf_counter_print_debug(void); extern u64 hw_perf_save_disable(void); @@ -226,12 +235,15 @@ extern int perf_counter_task_enable(void); #else static inline void +perf_counter_show(struct perf_counter *counter, char *str, int trace) { } +static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } static inline void perf_counter_task_sched_out(struct task_struct *task, int cpu) { } static inline void perf_counter_task_tick(struct task_struct *task, int cpu) { } -static inline void perf_counter_init_task(struct task_struct *task) { } +static inline void perf_counter_init_task(struct task_struct *child) { } +static inline void perf_counter_exit_task(struct task_struct *child) { } static inline void perf_counter_notify(struct pt_regs *regs) { } static inline void perf_counter_print_debug(void) { } static inline void hw_perf_restore(u64 ctrl) { } diff --git a/kernel/exit.c b/kernel/exit.c index 2d8be7ebb0f..d336c90a5f1 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1093,11 +1093,12 @@ NORET_TYPE void do_exit(long code) mpol_put(tsk->mempolicy); tsk->mempolicy = NULL; #endif -#ifdef CONFIG_FUTEX /* - * This must happen late, after the PID is not - * hashed anymore: + * These must happen late, after the PID is not + * hashed anymore, but still at a point that may sleep: */ + perf_counter_exit_task(tsk); +#ifdef CONFIG_FUTEX if (unlikely(!list_empty(&tsk->pi_state_list))) exit_pi_state_list(tsk); if (unlikely(current->pi_state_cache)) diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 416861ce8b2..f5e81dd193d 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -80,8 +80,6 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx) list_del_init(&sibling->list_entry); list_add_tail(&sibling->list_entry, &ctx->counter_list); - WARN_ON_ONCE(!sibling->group_leader); - WARN_ON_ONCE(sibling->group_leader == sibling); sibling->group_leader = sibling; } } @@ -97,6 +95,7 @@ static void __perf_counter_remove_from_context(void *info) struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_counter *counter = info; struct perf_counter_context *ctx = counter->ctx; + unsigned long flags; u64 perf_flags; /* @@ -107,7 +106,7 @@ static void __perf_counter_remove_from_context(void *info) if (ctx->task && cpuctx->task_ctx != ctx) return; - spin_lock(&ctx->lock); + spin_lock_irqsave(&ctx->lock, flags); if (counter->state == PERF_COUNTER_STATE_ACTIVE) { counter->hw_ops->hw_perf_counter_disable(counter); @@ -136,7 +135,7 @@ static void __perf_counter_remove_from_context(void *info) perf_max_counters - perf_reserved_percpu); } - spin_unlock(&ctx->lock); + spin_unlock_irqrestore(&ctx->lock, flags); } @@ -199,6 +198,7 @@ static void __perf_install_in_context(void *info) struct perf_counter *counter = info; struct perf_counter_context *ctx = counter->ctx; int cpu = smp_processor_id(); + unsigned long flags; u64 perf_flags; /* @@ -209,7 +209,7 @@ static void __perf_install_in_context(void *info) if (ctx->task && cpuctx->task_ctx != ctx) return; - spin_lock(&ctx->lock); + spin_lock_irqsave(&ctx->lock, flags); /* * Protect the list operation against NMI by disabling the @@ -232,7 +232,7 @@ static void __perf_install_in_context(void *info) if (!ctx->task && cpuctx->max_pertask) cpuctx->max_pertask--; - spin_unlock(&ctx->lock); + spin_unlock_irqrestore(&ctx->lock, flags); } /* @@ -446,10 +446,9 @@ int perf_counter_task_disable(void) */ perf_flags = hw_perf_save_disable(); - list_for_each_entry(counter, &ctx->counter_list, list_entry) { - WARN_ON_ONCE(counter->state == PERF_COUNTER_STATE_ACTIVE); + list_for_each_entry(counter, &ctx->counter_list, list_entry) counter->state = PERF_COUNTER_STATE_OFF; - } + hw_perf_restore(perf_flags); spin_unlock(&ctx->lock); @@ -525,26 +524,6 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) perf_counter_task_sched_in(curr, cpu); } -/* - * Initialize the perf_counter context in a task_struct: - */ -static void -__perf_counter_init_context(struct perf_counter_context *ctx, - struct task_struct *task) -{ - spin_lock_init(&ctx->lock); - INIT_LIST_HEAD(&ctx->counter_list); - ctx->nr_counters = 0; - ctx->task = task; -} -/* - * Initialize the perf_counter context in task_struct - */ -void perf_counter_init_task(struct task_struct *task) -{ - __perf_counter_init_context(&task->perf_counter_ctx, task); -} - /* * Cross CPU call to read the hardware counter */ @@ -663,7 +642,6 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu) cpuctx = &per_cpu(perf_cpu_context, cpu); ctx = &cpuctx->ctx; - WARN_ON_ONCE(ctx->task); return ctx; } @@ -915,12 +893,13 @@ sw_perf_counter_init(struct perf_counter *counter) static struct perf_counter * perf_counter_alloc(struct perf_counter_hw_event *hw_event, int cpu, - struct perf_counter *group_leader) + struct perf_counter *group_leader, + gfp_t gfpflags) { const struct hw_perf_counter_ops *hw_ops; struct perf_counter *counter; - counter = kzalloc(sizeof(*counter), GFP_KERNEL); + counter = kzalloc(sizeof(*counter), gfpflags); if (!counter) return NULL; @@ -947,9 +926,8 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, hw_ops = NULL; if (!hw_event->raw && hw_event->type < 0) hw_ops = sw_perf_counter_init(counter); - if (!hw_ops) { + if (!hw_ops) hw_ops = hw_perf_counter_init(counter); - } if (!hw_ops) { kfree(counter); @@ -975,8 +953,10 @@ sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user, struct perf_counter *counter, *group_leader; struct perf_counter_hw_event hw_event; struct perf_counter_context *ctx; + struct file *counter_file = NULL; struct file *group_file = NULL; int fput_needed = 0; + int fput_needed2 = 0; int ret; if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0) @@ -1017,25 +997,29 @@ sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user, } ret = -EINVAL; - counter = perf_counter_alloc(&hw_event, cpu, group_leader); + counter = perf_counter_alloc(&hw_event, cpu, group_leader, GFP_KERNEL); if (!counter) goto err_put_context; - perf_install_in_context(ctx, counter, cpu); - ret = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0); if (ret < 0) - goto err_remove_free_put_context; + goto err_free_put_context; + + counter_file = fget_light(ret, &fput_needed2); + if (!counter_file) + goto err_free_put_context; + + counter->filp = counter_file; + perf_install_in_context(ctx, counter, cpu); + + fput_light(counter_file, fput_needed2); out_fput: fput_light(group_file, fput_needed); return ret; -err_remove_free_put_context: - mutex_lock(&counter->mutex); - perf_counter_remove_from_context(counter); - mutex_unlock(&counter->mutex); +err_free_put_context: kfree(counter); err_put_context: @@ -1044,6 +1028,186 @@ err_put_context: goto out_fput; } +/* + * Initialize the perf_counter context in a task_struct: + */ +static void +__perf_counter_init_context(struct perf_counter_context *ctx, + struct task_struct *task) +{ + memset(ctx, 0, sizeof(*ctx)); + spin_lock_init(&ctx->lock); + INIT_LIST_HEAD(&ctx->counter_list); + ctx->task = task; +} + +/* + * inherit a counter from parent task to child task: + */ +static int +inherit_counter(struct perf_counter *parent_counter, + struct task_struct *parent, + struct perf_counter_context *parent_ctx, + struct task_struct *child, + struct perf_counter_context *child_ctx) +{ + struct perf_counter *child_counter; + + child_counter = perf_counter_alloc(&parent_counter->hw_event, + parent_counter->cpu, NULL, + GFP_ATOMIC); + if (!child_counter) + return -ENOMEM; + + /* + * Link it up in the child's context: + */ + child_counter->ctx = child_ctx; + child_counter->task = child; + list_add_counter(child_counter, child_ctx); + child_ctx->nr_counters++; + + child_counter->parent = parent_counter; + parent_counter->nr_inherited++; + /* + * inherit into child's child as well: + */ + child_counter->hw_event.inherit = 1; + + /* + * Get a reference to the parent filp - we will fput it + * when the child counter exits. This is safe to do because + * we are in the parent and we know that the filp still + * exists and has a nonzero count: + */ + atomic_long_inc(&parent_counter->filp->f_count); + + return 0; +} + +static void +__perf_counter_exit_task(struct task_struct *child, + struct perf_counter *child_counter, + struct perf_counter_context *child_ctx) +{ + struct perf_counter *parent_counter; + u64 parent_val, child_val; + u64 perf_flags; + + /* + * Disable and unlink this counter. + * + * Be careful about zapping the list - IRQ/NMI context + * could still be processing it: + */ + local_irq_disable(); + perf_flags = hw_perf_save_disable(); + + if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) + child_counter->hw_ops->hw_perf_counter_disable(child_counter); + list_del_init(&child_counter->list_entry); + + hw_perf_restore(perf_flags); + local_irq_enable(); + + parent_counter = child_counter->parent; + /* + * It can happen that parent exits first, and has counters + * that are still around due to the child reference. These + * counters need to be zapped - but otherwise linger. + */ + if (!parent_counter) + return; + + parent_val = atomic64_read(&parent_counter->count); + child_val = atomic64_read(&child_counter->count); + + /* + * Add back the child's count to the parent's count: + */ + atomic64_add(child_val, &parent_counter->count); + + fput(parent_counter->filp); + + kfree(child_counter); +} + +/* + * When a child task exist, feed back counter values to parent counters. + * + * Note: we are running in child context, but the PID is not hashed + * anymore so new counters will not be added. + */ +void perf_counter_exit_task(struct task_struct *child) +{ + struct perf_counter *child_counter, *tmp; + struct perf_counter_context *child_ctx; + + child_ctx = &child->perf_counter_ctx; + + if (likely(!child_ctx->nr_counters)) + return; + + list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list, + list_entry) + __perf_counter_exit_task(child, child_counter, child_ctx); +} + +/* + * Initialize the perf_counter context in task_struct + */ +void perf_counter_init_task(struct task_struct *child) +{ + struct perf_counter_context *child_ctx, *parent_ctx; + struct perf_counter *counter, *parent_counter; + struct task_struct *parent = current; + unsigned long flags; + + child_ctx = &child->perf_counter_ctx; + parent_ctx = &parent->perf_counter_ctx; + + __perf_counter_init_context(child_ctx, child); + + /* + * This is executed from the parent task context, so inherit + * counters that have been marked for cloning: + */ + + if (likely(!parent_ctx->nr_counters)) + return; + + /* + * Lock the parent list. No need to lock the child - not PID + * hashed yet and not running, so nobody can access it. + */ + spin_lock_irqsave(&parent_ctx->lock, flags); + + /* + * We dont have to disable NMIs - we are only looking at + * the list, not manipulating it: + */ + list_for_each_entry(counter, &parent_ctx->counter_list, list_entry) { + if (!counter->hw_event.inherit || counter->group_leader != counter) + continue; + + /* + * Instead of creating recursive hierarchies of counters, + * we link inheritd counters back to the original parent, + * which has a filp for sure, which we use as the reference + * count: + */ + parent_counter = counter; + if (counter->parent) + parent_counter = counter->parent; + + if (inherit_counter(parent_counter, parent, + parent_ctx, child, child_ctx)) + break; + } + + spin_unlock_irqrestore(&parent_ctx->lock, flags); +} + static void __cpuinit perf_counter_init_cpu(int cpu) { struct perf_cpu_context *cpuctx; -- cgit v1.2.3-70-g09d2 From 5d6a27d8a096868ae313f71f563b06074a7e34fe Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 14 Dec 2008 12:28:33 +0100 Subject: perfcounters: add context switch counter Impact: add new feature, new sw counter Add a counter that counts the number of context-switches a task is doing. Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 4 ++-- kernel/perf_counter.c | 51 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index e5d25bf8f74..d2a16563415 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -53,8 +53,8 @@ enum hw_event_types { /* * Future software events: */ - /* PERF_COUNT_PAGE_FAULTS = -3, - PERF_COUNT_CONTEXT_SWITCHES = -4, */ + PERF_COUNT_PAGE_FAULTS = -3, + PERF_COUNT_CONTEXT_SWITCHES = -4, }; /* diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 1f81cde0dc4..09287091c52 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -888,6 +888,54 @@ static const struct hw_perf_counter_ops perf_ops_task_clock = { .hw_perf_counter_read = task_clock_perf_counter_read, }; +static u64 get_context_switches(void) +{ + struct task_struct *curr = current; + + return curr->nvcsw + curr->nivcsw; +} + +static void context_switches_perf_counter_update(struct perf_counter *counter) +{ + u64 prev, now; + s64 delta; + + prev = atomic64_read(&counter->hw.prev_count); + now = get_context_switches(); + + atomic64_set(&counter->hw.prev_count, now); + + delta = now - prev; + if (WARN_ON_ONCE(delta < 0)) + delta = 0; + + atomic64_add(delta, &counter->count); +} + +static void context_switches_perf_counter_read(struct perf_counter *counter) +{ + context_switches_perf_counter_update(counter); +} + +static void context_switches_perf_counter_enable(struct perf_counter *counter) +{ + /* + * ->nvcsw + curr->nivcsw is a per-task value already, + * so we dont have to clear it on switch-in. + */ +} + +static void context_switches_perf_counter_disable(struct perf_counter *counter) +{ + context_switches_perf_counter_update(counter); +} + +static const struct hw_perf_counter_ops perf_ops_context_switches = { + .hw_perf_counter_enable = context_switches_perf_counter_enable, + .hw_perf_counter_disable = context_switches_perf_counter_disable, + .hw_perf_counter_read = context_switches_perf_counter_read, +}; + static const struct hw_perf_counter_ops * sw_perf_counter_init(struct perf_counter *counter) { @@ -900,6 +948,9 @@ sw_perf_counter_init(struct perf_counter *counter) case PERF_COUNT_TASK_CLOCK: hw_ops = &perf_ops_task_clock; break; + case PERF_COUNT_CONTEXT_SWITCHES: + hw_ops = &perf_ops_context_switches; + break; default: break; } -- cgit v1.2.3-70-g09d2 From 6c594c21fcb02c662f11c97be4d7d2b73060a205 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 14 Dec 2008 12:34:15 +0100 Subject: perfcounters: add task migrations counter Impact: add new feature, new sw counter Add a counter that counts the number of cross-CPU migrations a task is suffering. Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 8 +++++--- include/linux/sched.h | 3 ++- kernel/perf_counter.c | 49 ++++++++++++++++++++++++++++++++++++++++++++ kernel/sched.c | 7 +++++-- 4 files changed, 61 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index d2a16563415..f30486fc55d 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -42,6 +42,8 @@ enum hw_event_types { PERF_COUNT_BRANCH_INSTRUCTIONS = 4, PERF_COUNT_BRANCH_MISSES = 5, + PERF_HW_EVENTS_MAX = 6, + /* * Special "software" counters provided by the kernel, even if * the hardware does not support performance counters. These @@ -50,11 +52,11 @@ enum hw_event_types { */ PERF_COUNT_CPU_CLOCK = -1, PERF_COUNT_TASK_CLOCK = -2, - /* - * Future software events: - */ PERF_COUNT_PAGE_FAULTS = -3, PERF_COUNT_CONTEXT_SWITCHES = -4, + PERF_COUNT_CPU_MIGRATIONS = -5, + + PERF_SW_EVENTS_MIN = -6, }; /* diff --git a/include/linux/sched.h b/include/linux/sched.h index 4c530278391..2e15be8fc79 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1014,6 +1014,8 @@ struct sched_entity { u64 last_wakeup; u64 avg_overlap; + u64 nr_migrations; + #ifdef CONFIG_SCHEDSTATS u64 wait_start; u64 wait_max; @@ -1029,7 +1031,6 @@ struct sched_entity { u64 exec_max; u64 slice_max; - u64 nr_migrations; u64 nr_migrations_cold; u64 nr_failed_migrations_affine; u64 nr_failed_migrations_running; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 09287091c52..fb11e351e44 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -936,6 +936,52 @@ static const struct hw_perf_counter_ops perf_ops_context_switches = { .hw_perf_counter_read = context_switches_perf_counter_read, }; +static inline u64 get_cpu_migrations(void) +{ + return current->se.nr_migrations; +} + +static void cpu_migrations_perf_counter_update(struct perf_counter *counter) +{ + u64 prev, now; + s64 delta; + + prev = atomic64_read(&counter->hw.prev_count); + now = get_cpu_migrations(); + + atomic64_set(&counter->hw.prev_count, now); + + delta = now - prev; + if (WARN_ON_ONCE(delta < 0)) + delta = 0; + + atomic64_add(delta, &counter->count); +} + +static void cpu_migrations_perf_counter_read(struct perf_counter *counter) +{ + cpu_migrations_perf_counter_update(counter); +} + +static void cpu_migrations_perf_counter_enable(struct perf_counter *counter) +{ + /* + * se.nr_migrations is a per-task value already, + * so we dont have to clear it on switch-in. + */ +} + +static void cpu_migrations_perf_counter_disable(struct perf_counter *counter) +{ + cpu_migrations_perf_counter_update(counter); +} + +static const struct hw_perf_counter_ops perf_ops_cpu_migrations = { + .hw_perf_counter_enable = cpu_migrations_perf_counter_enable, + .hw_perf_counter_disable = cpu_migrations_perf_counter_disable, + .hw_perf_counter_read = cpu_migrations_perf_counter_read, +}; + static const struct hw_perf_counter_ops * sw_perf_counter_init(struct perf_counter *counter) { @@ -951,6 +997,9 @@ sw_perf_counter_init(struct perf_counter *counter) case PERF_COUNT_CONTEXT_SWITCHES: hw_ops = &perf_ops_context_switches; break; + case PERF_COUNT_CPU_MIGRATIONS: + hw_ops = &perf_ops_cpu_migrations; + break; default: break; } diff --git a/kernel/sched.c b/kernel/sched.c index 5c3f4106314..382cfdb5e38 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1852,12 +1852,14 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) p->se.sleep_start -= clock_offset; if (p->se.block_start) p->se.block_start -= clock_offset; +#endif if (old_cpu != new_cpu) { - schedstat_inc(p, se.nr_migrations); + p->se.nr_migrations++; +#ifdef CONFIG_SCHEDSTATS if (task_hot(p, old_rq->clock, NULL)) schedstat_inc(p, se.nr_forced2_migrations); - } #endif + } p->se.vruntime -= old_cfsrq->min_vruntime - new_cfsrq->min_vruntime; @@ -2375,6 +2377,7 @@ static void __sched_fork(struct task_struct *p) p->se.exec_start = 0; p->se.sum_exec_runtime = 0; p->se.prev_sum_exec_runtime = 0; + p->se.nr_migrations = 0; p->se.last_wakeup = 0; p->se.avg_overlap = 0; -- cgit v1.2.3-70-g09d2 From 8fb9331391af95ca1f4e5c0a0da8120b13cbae01 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 23 Dec 2008 12:04:16 +0100 Subject: perfcounters: remove warnings Impact: remove debug checks Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 7 ------- include/linux/perf_counter.h | 4 ---- kernel/perf_counter.c | 8 -------- 3 files changed, 19 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index bdbdb56eaa3..89fad5d4fb3 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -64,7 +64,6 @@ x86_perf_counter_update(struct perf_counter *counter, { u64 prev_raw_count, new_raw_count, delta; - WARN_ON_ONCE(counter->state != PERF_COUNTER_STATE_ACTIVE); /* * Careful: an NMI might modify the previous counter value. * @@ -89,7 +88,6 @@ again: * of the count, so we do that by clipping the delta to 32 bits: */ delta = (u64)(u32)((s32)new_raw_count - (s32)prev_raw_count); - WARN_ON_ONCE((int)delta < 0); atomic64_add(delta, &counter->count); atomic64_sub(delta, &hwc->period_left); @@ -193,7 +191,6 @@ __x86_perf_counter_disable(struct perf_counter *counter, int err; err = wrmsr_safe(hwc->config_base + idx, hwc->config, 0); - WARN_ON_ONCE(err); } static DEFINE_PER_CPU(u64, prev_left[MAX_HW_COUNTERS]); @@ -209,8 +206,6 @@ __hw_perf_counter_set_period(struct perf_counter *counter, s32 left = atomic64_read(&hwc->period_left); s32 period = hwc->irq_period; - WARN_ON_ONCE(period <= 0); - /* * If we are way outside a reasoable range then just skip forward: */ @@ -224,8 +219,6 @@ __hw_perf_counter_set_period(struct perf_counter *counter, atomic64_set(&hwc->period_left, left); } - WARN_ON_ONCE(left <= 0); - per_cpu(prev_left[idx], smp_processor_id()) = left; /* diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index f30486fc55d..d038450de87 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -218,8 +218,6 @@ struct perf_cpu_context { extern int perf_max_counters; #ifdef CONFIG_PERF_COUNTERS -extern void -perf_counter_show(struct perf_counter *counter, char *str, int trace); extern const struct hw_perf_counter_ops * hw_perf_counter_init(struct perf_counter *counter); @@ -237,8 +235,6 @@ extern int perf_counter_task_enable(void); #else static inline void -perf_counter_show(struct perf_counter *counter, char *str, int trace) { } -static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } static inline void perf_counter_task_sched_out(struct task_struct *task, int cpu) { } diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 5431e790b5d..aab6c123b02 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -861,8 +861,6 @@ static void task_clock_perf_counter_update(struct perf_counter *counter) atomic64_set(&counter->hw.prev_count, now); delta = now - prev; - if (WARN_ON_ONCE(delta < 0)) - delta = 0; atomic64_add(delta, &counter->count); } @@ -906,8 +904,6 @@ static void page_faults_perf_counter_update(struct perf_counter *counter) atomic64_set(&counter->hw.prev_count, now); delta = now - prev; - if (WARN_ON_ONCE(delta < 0)) - delta = 0; atomic64_add(delta, &counter->count); } @@ -954,8 +950,6 @@ static void context_switches_perf_counter_update(struct perf_counter *counter) atomic64_set(&counter->hw.prev_count, now); delta = now - prev; - if (WARN_ON_ONCE(delta < 0)) - delta = 0; atomic64_add(delta, &counter->count); } @@ -1000,8 +994,6 @@ static void cpu_migrations_perf_counter_update(struct perf_counter *counter) atomic64_set(&counter->hw.prev_count, now); delta = now - prev; - if (WARN_ON_ONCE(delta < 0)) - delta = 0; atomic64_add(delta, &counter->count); } -- cgit v1.2.3-70-g09d2 From eb2b861810d4ff72454c83996b891df4e0aaff9a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 17 Dec 2008 09:09:13 +0100 Subject: x86, perfcounters: prepare for fixed-mode PMCs Impact: refactor the x86 code for fixed-mode PMCs Extend the data structures and rename the existing facilities to allow for a 'generic' versus 'fixed' counter distinction. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_counter.h | 11 ++++++++ arch/x86/kernel/cpu/perf_counter.c | 53 ++++++++++++++++++------------------- include/linux/perf_counter.h | 1 + 3 files changed, 38 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index 9dadce1124e..dd5a4a559e2 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -1,6 +1,13 @@ #ifndef _ASM_X86_PERF_COUNTER_H #define _ASM_X86_PERF_COUNTER_H +/* + * Performance counter hw details: + */ + +#define X86_PMC_MAX_GENERIC 8 +#define X86_PMC_MAX_FIXED 3 + #define MSR_ARCH_PERFMON_PERFCTR0 0xc1 #define MSR_ARCH_PERFMON_PERFCTR1 0xc2 @@ -20,6 +27,10 @@ #define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 +/* + * Intel "Architectural Performance Monitoring" CPUID + * detection/enumeration details: + */ union cpuid10_eax { struct { unsigned int version_id:8; diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index a4a3a09a654..fc3af868823 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -27,13 +27,12 @@ static bool perf_counters_initialized __read_mostly; static int nr_hw_counters __read_mostly; static u32 perf_counter_mask __read_mostly; -/* No support for fixed function counters yet */ - -#define MAX_HW_COUNTERS 8 - struct cpu_hw_counters { - struct perf_counter *counters[MAX_HW_COUNTERS]; - unsigned long used[BITS_TO_LONGS(MAX_HW_COUNTERS)]; + struct perf_counter *generic[X86_PMC_MAX_GENERIC]; + unsigned long used[BITS_TO_LONGS(X86_PMC_MAX_GENERIC)]; + + struct perf_counter *fixed[X86_PMC_MAX_FIXED]; + unsigned long used_fixed[BITS_TO_LONGS(X86_PMC_MAX_FIXED)]; }; /* @@ -185,7 +184,7 @@ void hw_perf_restore(u64 ctrl) EXPORT_SYMBOL_GPL(hw_perf_restore); static inline void -__x86_perf_counter_disable(struct perf_counter *counter, +__pmc_generic_disable(struct perf_counter *counter, struct hw_perf_counter *hwc, unsigned int idx) { int err; @@ -193,7 +192,7 @@ __x86_perf_counter_disable(struct perf_counter *counter, err = wrmsr_safe(hwc->config_base + idx, hwc->config, 0); } -static DEFINE_PER_CPU(u64, prev_left[MAX_HW_COUNTERS]); +static DEFINE_PER_CPU(u64, prev_left[X86_PMC_MAX_GENERIC]); /* * Set the next IRQ period, based on the hwc->period_left value. @@ -231,7 +230,7 @@ __hw_perf_counter_set_period(struct perf_counter *counter, } static void -__x86_perf_counter_enable(struct perf_counter *counter, +__pmc_generic_enable(struct perf_counter *counter, struct hw_perf_counter *hwc, int idx) { wrmsr(hwc->config_base + idx, @@ -241,7 +240,7 @@ __x86_perf_counter_enable(struct perf_counter *counter, /* * Find a PMC slot for the freshly enabled / scheduled in counter: */ -static void x86_perf_counter_enable(struct perf_counter *counter) +static void pmc_generic_enable(struct perf_counter *counter) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); struct hw_perf_counter *hwc = &counter->hw; @@ -256,12 +255,12 @@ static void x86_perf_counter_enable(struct perf_counter *counter) perf_counters_lapic_init(hwc->nmi); - __x86_perf_counter_disable(counter, hwc, idx); + __pmc_generic_disable(counter, hwc, idx); - cpuc->counters[idx] = counter; + cpuc->generic[idx] = counter; __hw_perf_counter_set_period(counter, hwc, idx); - __x86_perf_counter_enable(counter, hwc, idx); + __pmc_generic_enable(counter, hwc, idx); } void perf_counter_print_debug(void) @@ -301,16 +300,16 @@ void perf_counter_print_debug(void) local_irq_enable(); } -static void x86_perf_counter_disable(struct perf_counter *counter) +static void pmc_generic_disable(struct perf_counter *counter) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); struct hw_perf_counter *hwc = &counter->hw; unsigned int idx = hwc->idx; - __x86_perf_counter_disable(counter, hwc, idx); + __pmc_generic_disable(counter, hwc, idx); clear_bit(idx, cpuc->used); - cpuc->counters[idx] = NULL; + cpuc->generic[idx] = NULL; /* * Drain the remaining delta count out of a counter @@ -349,7 +348,7 @@ static void perf_save_and_restart(struct perf_counter *counter) __hw_perf_counter_set_period(counter, hwc, idx); if (pmc_ctrl & ARCH_PERFMON_EVENTSEL0_ENABLE) - __x86_perf_counter_enable(counter, hwc, idx); + __pmc_generic_enable(counter, hwc, idx); } static void @@ -392,7 +391,7 @@ static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi) again: ack = status; for_each_bit(bit, (unsigned long *) &status, nr_hw_counters) { - struct perf_counter *counter = cpuc->counters[bit]; + struct perf_counter *counter = cpuc->generic[bit]; clear_bit(bit, (unsigned long *) &status); if (!counter) @@ -412,7 +411,7 @@ again: } /* * From NMI context we cannot call into the scheduler to - * do a task wakeup - but we mark these counters as + * do a task wakeup - but we mark these generic as * wakeup_pending and initate a wakeup callback: */ if (nmi) { @@ -462,7 +461,7 @@ void perf_counter_notify(struct pt_regs *regs) cpuc = &per_cpu(cpu_hw_counters, cpu); for_each_bit(bit, cpuc->used, nr_hw_counters) { - struct perf_counter *counter = cpuc->counters[bit]; + struct perf_counter *counter = cpuc->generic[bit]; if (!counter) continue; @@ -539,10 +538,10 @@ void __init init_hw_perf_counters(void) printk(KERN_INFO "... version: %d\n", eax.split.version_id); printk(KERN_INFO "... num_counters: %d\n", eax.split.num_counters); nr_hw_counters = eax.split.num_counters; - if (nr_hw_counters > MAX_HW_COUNTERS) { - nr_hw_counters = MAX_HW_COUNTERS; + if (nr_hw_counters > X86_PMC_MAX_GENERIC) { + nr_hw_counters = X86_PMC_MAX_GENERIC; WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!", - nr_hw_counters, MAX_HW_COUNTERS); + nr_hw_counters, X86_PMC_MAX_GENERIC); } perf_counter_mask = (1 << nr_hw_counters) - 1; perf_max_counters = nr_hw_counters; @@ -556,15 +555,15 @@ void __init init_hw_perf_counters(void) register_die_notifier(&perf_counter_nmi_notifier); } -static void x86_perf_counter_read(struct perf_counter *counter) +static void pmc_generic_read(struct perf_counter *counter) { x86_perf_counter_update(counter, &counter->hw, counter->hw.idx); } static const struct hw_perf_counter_ops x86_perf_counter_ops = { - .hw_perf_counter_enable = x86_perf_counter_enable, - .hw_perf_counter_disable = x86_perf_counter_disable, - .hw_perf_counter_read = x86_perf_counter_read, + .hw_perf_counter_enable = pmc_generic_enable, + .hw_perf_counter_disable = pmc_generic_disable, + .hw_perf_counter_read = pmc_generic_read, }; const struct hw_perf_counter_ops * diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index d038450de87..984da540224 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -14,6 +14,7 @@ #define _LINUX_PERF_COUNTER_H #include +#include #include #include -- cgit v1.2.3-70-g09d2 From 7671581f1666ef4b54a1c1e598c51ac44c060a9b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 17 Dec 2008 14:20:28 +0100 Subject: perfcounters: hw ops rename Impact: rename field names Shorten them. Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 6 ++--- include/linux/perf_counter.h | 6 ++--- kernel/perf_counter.c | 50 +++++++++++++++++++------------------- 3 files changed, 31 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 358af526640..b6755712142 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -577,9 +577,9 @@ static void pmc_generic_read(struct perf_counter *counter) } static const struct hw_perf_counter_ops x86_perf_counter_ops = { - .hw_perf_counter_enable = pmc_generic_enable, - .hw_perf_counter_disable = pmc_generic_disable, - .hw_perf_counter_read = pmc_generic_read, + .enable = pmc_generic_enable, + .disable = pmc_generic_disable, + .read = pmc_generic_read, }; const struct hw_perf_counter_ops * diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 984da540224..48f76d2e54c 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -128,9 +128,9 @@ struct perf_counter; * struct hw_perf_counter_ops - performance counter hw ops */ struct hw_perf_counter_ops { - void (*hw_perf_counter_enable) (struct perf_counter *counter); - void (*hw_perf_counter_disable) (struct perf_counter *counter); - void (*hw_perf_counter_read) (struct perf_counter *counter); + void (*enable) (struct perf_counter *counter); + void (*disable) (struct perf_counter *counter); + void (*read) (struct perf_counter *counter); }; /** diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index f8a4d9a5d5d..961d651aa57 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -109,7 +109,7 @@ static void __perf_counter_remove_from_context(void *info) spin_lock_irqsave(&ctx->lock, flags); if (counter->state == PERF_COUNTER_STATE_ACTIVE) { - counter->hw_ops->hw_perf_counter_disable(counter); + counter->hw_ops->disable(counter); counter->state = PERF_COUNTER_STATE_INACTIVE; ctx->nr_active--; cpuctx->active_oncpu--; @@ -226,7 +226,7 @@ static void __perf_install_in_context(void *info) counter->oncpu = cpu; ctx->nr_active++; cpuctx->active_oncpu++; - counter->hw_ops->hw_perf_counter_enable(counter); + counter->hw_ops->enable(counter); } if (!ctx->task && cpuctx->max_pertask) @@ -297,7 +297,7 @@ counter_sched_out(struct perf_counter *counter, if (counter->state != PERF_COUNTER_STATE_ACTIVE) return; - counter->hw_ops->hw_perf_counter_disable(counter); + counter->hw_ops->disable(counter); counter->state = PERF_COUNTER_STATE_INACTIVE; counter->oncpu = -1; @@ -327,7 +327,7 @@ group_sched_out(struct perf_counter *group_counter, * * We stop each counter and update the counter value in counter->count. * - * This does not protect us against NMI, but hw_perf_counter_disable() + * This does not protect us against NMI, but disable() * sets the disabled bit in the control field of counter _before_ * accessing the counter control register. If a NMI hits, then it will * not restart the counter. @@ -359,7 +359,7 @@ counter_sched_in(struct perf_counter *counter, if (counter->state == PERF_COUNTER_STATE_OFF) return; - counter->hw_ops->hw_perf_counter_enable(counter); + counter->hw_ops->enable(counter); counter->state = PERF_COUNTER_STATE_ACTIVE; counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ @@ -395,7 +395,7 @@ group_sched_in(struct perf_counter *group_counter, * * We restore the counter value and then enable it. * - * This does not protect us against NMI, but hw_perf_counter_enable() + * This does not protect us against NMI, but enable() * sets the enabled bit in the control field of counter _before_ * accessing the counter control register. If a NMI hits, then it will * keep the counter running. @@ -537,11 +537,11 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) /* * Cross CPU call to read the hardware counter */ -static void __hw_perf_counter_read(void *info) +static void __read(void *info) { struct perf_counter *counter = info; - counter->hw_ops->hw_perf_counter_read(counter); + counter->hw_ops->read(counter); } static u64 perf_counter_read(struct perf_counter *counter) @@ -552,7 +552,7 @@ static u64 perf_counter_read(struct perf_counter *counter) */ if (counter->state == PERF_COUNTER_STATE_ACTIVE) { smp_call_function_single(counter->oncpu, - __hw_perf_counter_read, counter, 1); + __read, counter, 1); } return atomic64_read(&counter->count); @@ -855,9 +855,9 @@ static void cpu_clock_perf_counter_read(struct perf_counter *counter) } static const struct hw_perf_counter_ops perf_ops_cpu_clock = { - .hw_perf_counter_enable = cpu_clock_perf_counter_enable, - .hw_perf_counter_disable = cpu_clock_perf_counter_disable, - .hw_perf_counter_read = cpu_clock_perf_counter_read, + .enable = cpu_clock_perf_counter_enable, + .disable = cpu_clock_perf_counter_disable, + .read = cpu_clock_perf_counter_read, }; static void task_clock_perf_counter_update(struct perf_counter *counter) @@ -891,9 +891,9 @@ static void task_clock_perf_counter_disable(struct perf_counter *counter) } static const struct hw_perf_counter_ops perf_ops_task_clock = { - .hw_perf_counter_enable = task_clock_perf_counter_enable, - .hw_perf_counter_disable = task_clock_perf_counter_disable, - .hw_perf_counter_read = task_clock_perf_counter_read, + .enable = task_clock_perf_counter_enable, + .disable = task_clock_perf_counter_disable, + .read = task_clock_perf_counter_read, }; static u64 get_page_faults(void) @@ -937,9 +937,9 @@ static void page_faults_perf_counter_disable(struct perf_counter *counter) } static const struct hw_perf_counter_ops perf_ops_page_faults = { - .hw_perf_counter_enable = page_faults_perf_counter_enable, - .hw_perf_counter_disable = page_faults_perf_counter_disable, - .hw_perf_counter_read = page_faults_perf_counter_read, + .enable = page_faults_perf_counter_enable, + .disable = page_faults_perf_counter_disable, + .read = page_faults_perf_counter_read, }; static u64 get_context_switches(void) @@ -983,9 +983,9 @@ static void context_switches_perf_counter_disable(struct perf_counter *counter) } static const struct hw_perf_counter_ops perf_ops_context_switches = { - .hw_perf_counter_enable = context_switches_perf_counter_enable, - .hw_perf_counter_disable = context_switches_perf_counter_disable, - .hw_perf_counter_read = context_switches_perf_counter_read, + .enable = context_switches_perf_counter_enable, + .disable = context_switches_perf_counter_disable, + .read = context_switches_perf_counter_read, }; static inline u64 get_cpu_migrations(void) @@ -1027,9 +1027,9 @@ static void cpu_migrations_perf_counter_disable(struct perf_counter *counter) } static const struct hw_perf_counter_ops perf_ops_cpu_migrations = { - .hw_perf_counter_enable = cpu_migrations_perf_counter_enable, - .hw_perf_counter_disable = cpu_migrations_perf_counter_disable, - .hw_perf_counter_read = cpu_migrations_perf_counter_read, + .enable = cpu_migrations_perf_counter_enable, + .disable = cpu_migrations_perf_counter_disable, + .read = cpu_migrations_perf_counter_read, }; static const struct hw_perf_counter_ops * @@ -1283,7 +1283,7 @@ __perf_counter_exit_task(struct task_struct *child, cpuctx = &__get_cpu_var(perf_cpu_context); - child_counter->hw_ops->hw_perf_counter_disable(child_counter); + child_counter->hw_ops->disable(child_counter); child_counter->state = PERF_COUNTER_STATE_INACTIVE; child_counter->oncpu = -1; -- cgit v1.2.3-70-g09d2 From aa9c4c0f967fdb482ea95e8473ec3d201e6e0781 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 17 Dec 2008 14:10:57 +0100 Subject: perfcounters: fix task clock counter Impact: fix per task clock counter precision Signed-off-by: Ingo Molnar --- include/linux/kernel_stat.h | 8 ++++++ kernel/exit.c | 17 +++++++---- kernel/perf_counter.c | 70 ++++++++++++++++++++++++++++++++++----------- kernel/sched.c | 49 +++++++++++++++++++++++++++++-- 4 files changed, 120 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 4a145caeee0..1b2e3242497 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -66,7 +66,15 @@ static inline unsigned int kstat_irqs(unsigned int irq) return sum; } + +/* + * Lock/unlock the current runqueue - to extract task statistics: + */ +extern void curr_rq_lock_irq_save(unsigned long *flags); +extern void curr_rq_unlock_irq_restore(unsigned long *flags); +extern unsigned long long __task_delta_exec(struct task_struct *tsk, int update); extern unsigned long long task_delta_exec(struct task_struct *); + extern void account_user_time(struct task_struct *, cputime_t); extern void account_user_time_scaled(struct task_struct *, cputime_t); extern void account_system_time(struct task_struct *, int, cputime_t); diff --git a/kernel/exit.c b/kernel/exit.c index d336c90a5f1..244edfd9686 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -922,6 +922,12 @@ static void exit_notify(struct task_struct *tsk, int group_dead) forget_original_parent(tsk); exit_task_namespaces(tsk); + /* + * Flush inherited counters to the parent - before the parent + * gets woken up by child-exit notifications. + */ + perf_counter_exit_task(tsk); + write_lock_irq(&tasklist_lock); if (group_dead) kill_orphaned_pgrp(tsk->group_leader, NULL); @@ -1093,11 +1099,6 @@ NORET_TYPE void do_exit(long code) mpol_put(tsk->mempolicy); tsk->mempolicy = NULL; #endif - /* - * These must happen late, after the PID is not - * hashed anymore, but still at a point that may sleep: - */ - perf_counter_exit_task(tsk); #ifdef CONFIG_FUTEX if (unlikely(!list_empty(&tsk->pi_state_list))) exit_pi_state_list(tsk); @@ -1121,6 +1122,12 @@ NORET_TYPE void do_exit(long code) if (tsk->splice_pipe) __free_pipe_info(tsk->splice_pipe); + /* + * These must happen late, after the PID is not + * hashed anymore, but still at a point that may sleep: + */ + perf_counter_exit_task(tsk); + preempt_disable(); /* causes final put_task_struct in finish_task_switch(). */ tsk->state = TASK_DEAD; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 961d651aa57..f1110ac1267 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -18,6 +18,7 @@ #include #include #include +#include #include /* @@ -106,7 +107,8 @@ static void __perf_counter_remove_from_context(void *info) if (ctx->task && cpuctx->task_ctx != ctx) return; - spin_lock_irqsave(&ctx->lock, flags); + curr_rq_lock_irq_save(&flags); + spin_lock(&ctx->lock); if (counter->state == PERF_COUNTER_STATE_ACTIVE) { counter->hw_ops->disable(counter); @@ -135,7 +137,8 @@ static void __perf_counter_remove_from_context(void *info) perf_max_counters - perf_reserved_percpu); } - spin_unlock_irqrestore(&ctx->lock, flags); + spin_unlock(&ctx->lock); + curr_rq_unlock_irq_restore(&flags); } @@ -209,7 +212,8 @@ static void __perf_install_in_context(void *info) if (ctx->task && cpuctx->task_ctx != ctx) return; - spin_lock_irqsave(&ctx->lock, flags); + curr_rq_lock_irq_save(&flags); + spin_lock(&ctx->lock); /* * Protect the list operation against NMI by disabling the @@ -232,7 +236,8 @@ static void __perf_install_in_context(void *info) if (!ctx->task && cpuctx->max_pertask) cpuctx->max_pertask--; - spin_unlock_irqrestore(&ctx->lock, flags); + spin_unlock(&ctx->lock); + curr_rq_unlock_irq_restore(&flags); } /* @@ -438,15 +443,19 @@ int perf_counter_task_disable(void) struct task_struct *curr = current; struct perf_counter_context *ctx = &curr->perf_counter_ctx; struct perf_counter *counter; + unsigned long flags; u64 perf_flags; int cpu; if (likely(!ctx->nr_counters)) return 0; - local_irq_disable(); + curr_rq_lock_irq_save(&flags); cpu = smp_processor_id(); + /* force the update of the task clock: */ + __task_delta_exec(curr, 1); + perf_counter_task_sched_out(curr, cpu); spin_lock(&ctx->lock); @@ -463,7 +472,7 @@ int perf_counter_task_disable(void) spin_unlock(&ctx->lock); - local_irq_enable(); + curr_rq_unlock_irq_restore(&flags); return 0; } @@ -473,15 +482,19 @@ int perf_counter_task_enable(void) struct task_struct *curr = current; struct perf_counter_context *ctx = &curr->perf_counter_ctx; struct perf_counter *counter; + unsigned long flags; u64 perf_flags; int cpu; if (likely(!ctx->nr_counters)) return 0; - local_irq_disable(); + curr_rq_lock_irq_save(&flags); cpu = smp_processor_id(); + /* force the update of the task clock: */ + __task_delta_exec(curr, 1); + spin_lock(&ctx->lock); /* @@ -493,6 +506,7 @@ int perf_counter_task_enable(void) if (counter->state != PERF_COUNTER_STATE_OFF) continue; counter->state = PERF_COUNTER_STATE_INACTIVE; + counter->hw_event.disabled = 0; } hw_perf_restore(perf_flags); @@ -500,7 +514,7 @@ int perf_counter_task_enable(void) perf_counter_task_sched_in(curr, cpu); - local_irq_enable(); + curr_rq_unlock_irq_restore(&flags); return 0; } @@ -540,8 +554,11 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) static void __read(void *info) { struct perf_counter *counter = info; + unsigned long flags; + curr_rq_lock_irq_save(&flags); counter->hw_ops->read(counter); + curr_rq_unlock_irq_restore(&flags); } static u64 perf_counter_read(struct perf_counter *counter) @@ -860,13 +877,27 @@ static const struct hw_perf_counter_ops perf_ops_cpu_clock = { .read = cpu_clock_perf_counter_read, }; -static void task_clock_perf_counter_update(struct perf_counter *counter) +/* + * Called from within the scheduler: + */ +static u64 task_clock_perf_counter_val(struct perf_counter *counter, int update) { - u64 prev, now; + struct task_struct *curr = counter->task; + u64 delta; + + WARN_ON_ONCE(counter->task != current); + + delta = __task_delta_exec(curr, update); + + return curr->se.sum_exec_runtime + delta; +} + +static void task_clock_perf_counter_update(struct perf_counter *counter, u64 now) +{ + u64 prev; s64 delta; prev = atomic64_read(&counter->hw.prev_count); - now = current->se.sum_exec_runtime; atomic64_set(&counter->hw.prev_count, now); @@ -877,17 +908,23 @@ static void task_clock_perf_counter_update(struct perf_counter *counter) static void task_clock_perf_counter_read(struct perf_counter *counter) { - task_clock_perf_counter_update(counter); + u64 now = task_clock_perf_counter_val(counter, 1); + + task_clock_perf_counter_update(counter, now); } static void task_clock_perf_counter_enable(struct perf_counter *counter) { - atomic64_set(&counter->hw.prev_count, current->se.sum_exec_runtime); + u64 now = task_clock_perf_counter_val(counter, 0); + + atomic64_set(&counter->hw.prev_count, now); } static void task_clock_perf_counter_disable(struct perf_counter *counter) { - task_clock_perf_counter_update(counter); + u64 now = task_clock_perf_counter_val(counter, 0); + + task_clock_perf_counter_update(counter, now); } static const struct hw_perf_counter_ops perf_ops_task_clock = { @@ -1267,6 +1304,7 @@ __perf_counter_exit_task(struct task_struct *child, { struct perf_counter *parent_counter; u64 parent_val, child_val; + unsigned long flags; u64 perf_flags; /* @@ -1275,7 +1313,7 @@ __perf_counter_exit_task(struct task_struct *child, * Be careful about zapping the list - IRQ/NMI context * could still be processing it: */ - local_irq_disable(); + curr_rq_lock_irq_save(&flags); perf_flags = hw_perf_save_disable(); if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) { @@ -1294,7 +1332,7 @@ __perf_counter_exit_task(struct task_struct *child, list_del_init(&child_counter->list_entry); hw_perf_restore(perf_flags); - local_irq_enable(); + curr_rq_unlock_irq_restore(&flags); parent_counter = child_counter->parent; /* diff --git a/kernel/sched.c b/kernel/sched.c index 382cfdb5e38..4d84ff4c877 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -638,7 +638,7 @@ static inline int cpu_of(struct rq *rq) #define task_rq(p) cpu_rq(task_cpu(p)) #define cpu_curr(cpu) (cpu_rq(cpu)->curr) -static inline void update_rq_clock(struct rq *rq) +inline void update_rq_clock(struct rq *rq) { rq->clock = sched_clock_cpu(cpu_of(rq)); } @@ -969,6 +969,26 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) } } +void curr_rq_lock_irq_save(unsigned long *flags) + __acquires(rq->lock) +{ + struct rq *rq; + + local_irq_save(*flags); + rq = cpu_rq(smp_processor_id()); + spin_lock(&rq->lock); +} + +void curr_rq_unlock_irq_restore(unsigned long *flags) + __releases(rq->lock) +{ + struct rq *rq; + + rq = cpu_rq(smp_processor_id()); + spin_unlock(&rq->lock); + local_irq_restore(*flags); +} + void task_rq_unlock_wait(struct task_struct *p) { struct rq *rq = task_rq(p); @@ -2558,7 +2578,6 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next) { fire_sched_out_preempt_notifiers(prev, next); - perf_counter_task_sched_out(prev, cpu_of(rq)); prepare_lock_switch(rq, next); prepare_arch_switch(next); } @@ -4089,6 +4108,29 @@ DEFINE_PER_CPU(struct kernel_stat, kstat); EXPORT_PER_CPU_SYMBOL(kstat); +/* + * Return any ns on the sched_clock that have not yet been banked in + * @p in case that task is currently running. + */ +unsigned long long __task_delta_exec(struct task_struct *p, int update) +{ + s64 delta_exec; + struct rq *rq; + + rq = task_rq(p); + WARN_ON_ONCE(!runqueue_is_locked()); + WARN_ON_ONCE(!task_current(rq, p)); + + if (update) + update_rq_clock(rq); + + delta_exec = rq->clock - p->se.exec_start; + + WARN_ON_ONCE(delta_exec < 0); + + return delta_exec; +} + /* * Return any ns on the sched_clock that have not yet been banked in * @p in case that task is currently running. @@ -4316,13 +4358,13 @@ void scheduler_tick(void) update_rq_clock(rq); update_cpu_load(rq); curr->sched_class->task_tick(rq, curr, 0); + perf_counter_task_tick(curr, cpu); spin_unlock(&rq->lock); #ifdef CONFIG_SMP rq->idle_at_tick = idle_cpu(cpu); trigger_load_balance(rq, cpu); #endif - perf_counter_task_tick(curr, cpu); } #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \ @@ -4512,6 +4554,7 @@ need_resched_nonpreemptible: if (likely(prev != next)) { sched_info_switch(prev, next); + perf_counter_task_sched_out(prev, cpu); rq->nr_switches++; rq->curr = next; -- cgit v1.2.3-70-g09d2 From eef6cbf5844c620d9db9be99e4908cdf92492fb9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 19 Dec 2008 10:20:42 +0100 Subject: perfcounters: pull inherited counters Change counter inheritance from a 'push' to a 'pull' model: instead of child tasks pushing their final counts to the parent, reuse the wait4 infrastructure to pull counters as child tasks are exit-processed, much like how cutime/cstime is collected. Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 9 +++++++++ kernel/exit.c | 21 +++++++++------------ 2 files changed, 18 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 23fd8909b9e..54fa2fa2c8e 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -113,6 +113,14 @@ extern struct group_info init_groups; # define CAP_INIT_BSET CAP_INIT_EFF_SET #endif +#ifdef CONFIG_PERF_COUNTERS +# define INIT_PERF_COUNTERS(tsk) \ + .perf_counter_ctx.counter_list = \ + LIST_HEAD_INIT(tsk.perf_counter_ctx.counter_list), +#else +# define INIT_PERF_COUNTERS(tsk) +#endif + /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -180,6 +188,7 @@ extern struct group_info init_groups; INIT_IDS \ INIT_TRACE_IRQFLAGS \ INIT_LOCKDEP \ + INIT_PERF_COUNTERS(tsk) \ } diff --git a/kernel/exit.c b/kernel/exit.c index 244edfd9686..101b7eeff44 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -153,6 +153,9 @@ static void delayed_put_task_struct(struct rcu_head *rhp) { struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); +#ifdef CONFIG_PERF_COUNTERS + WARN_ON_ONCE(!list_empty(&tsk->perf_counter_ctx.counter_list)); +#endif trace_sched_process_free(tsk); put_task_struct(tsk); } @@ -922,12 +925,6 @@ static void exit_notify(struct task_struct *tsk, int group_dead) forget_original_parent(tsk); exit_task_namespaces(tsk); - /* - * Flush inherited counters to the parent - before the parent - * gets woken up by child-exit notifications. - */ - perf_counter_exit_task(tsk); - write_lock_irq(&tasklist_lock); if (group_dead) kill_orphaned_pgrp(tsk->group_leader, NULL); @@ -1122,12 +1119,6 @@ NORET_TYPE void do_exit(long code) if (tsk->splice_pipe) __free_pipe_info(tsk->splice_pipe); - /* - * These must happen late, after the PID is not - * hashed anymore, but still at a point that may sleep: - */ - perf_counter_exit_task(tsk); - preempt_disable(); /* causes final put_task_struct in finish_task_switch(). */ tsk->state = TASK_DEAD; @@ -1371,6 +1362,12 @@ static int wait_task_zombie(struct task_struct *p, int options, */ read_unlock(&tasklist_lock); + /* + * Flush inherited counters to the parent - before the parent + * gets woken up by child-exit notifications. + */ + perf_counter_exit_task(p); + retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; status = (p->signal->flags & SIGNAL_GROUP_EXIT) ? p->signal->group_exit_code : p->exit_code; -- cgit v1.2.3-70-g09d2 From 78b6084c907cea15bb40a564b974e072f5163781 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 21 Dec 2008 15:07:49 +0100 Subject: perfcounters: fix init context lock Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 54fa2fa2c8e..467cff545c3 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -116,7 +116,9 @@ extern struct group_info init_groups; #ifdef CONFIG_PERF_COUNTERS # define INIT_PERF_COUNTERS(tsk) \ .perf_counter_ctx.counter_list = \ - LIST_HEAD_INIT(tsk.perf_counter_ctx.counter_list), + LIST_HEAD_INIT(tsk.perf_counter_ctx.counter_list), \ + .perf_counter_ctx.lock = \ + __SPIN_LOCK_UNLOCKED(tsk.perf_counter_ctx.lock), #else # define INIT_PERF_COUNTERS(tsk) #endif -- cgit v1.2.3-70-g09d2 From 95cdd2e7851cce79ab839cb0b3cbe68d7911d0f1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 21 Dec 2008 13:50:42 +0100 Subject: perfcounters: enable lowlevel pmc code to schedule counters Allow lowlevel ->enable() op to return an error if a counter can not be added. This can be used to handle counter constraints. Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 6 +++- include/linux/perf_counter.h | 2 +- kernel/perf_counter.c | 62 +++++++++++++++++++++++++++----------- 3 files changed, 51 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index b6755712142..74090a393a7 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -244,7 +244,7 @@ static int fixed_mode_idx(struct hw_perf_counter *hwc) /* * Find a PMC slot for the freshly enabled / scheduled in counter: */ -static void pmc_generic_enable(struct perf_counter *counter) +static int pmc_generic_enable(struct perf_counter *counter) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); struct hw_perf_counter *hwc = &counter->hw; @@ -253,6 +253,8 @@ static void pmc_generic_enable(struct perf_counter *counter) /* Try to get the previous counter again */ if (test_and_set_bit(idx, cpuc->used)) { idx = find_first_zero_bit(cpuc->used, nr_counters_generic); + if (idx == nr_counters_generic) + return -EAGAIN; set_bit(idx, cpuc->used); hwc->idx = idx; } @@ -265,6 +267,8 @@ static void pmc_generic_enable(struct perf_counter *counter) __hw_perf_counter_set_period(counter, hwc, idx); __pmc_generic_enable(counter, hwc, idx); + + return 0; } void perf_counter_print_debug(void) diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 48f76d2e54c..53af11d3767 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -128,7 +128,7 @@ struct perf_counter; * struct hw_perf_counter_ops - performance counter hw ops */ struct hw_perf_counter_ops { - void (*enable) (struct perf_counter *counter); + int (*enable) (struct perf_counter *counter); void (*disable) (struct perf_counter *counter); void (*read) (struct perf_counter *counter); }; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index f1110ac1267..2e73929a695 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -355,21 +355,25 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu) cpuctx->task_ctx = NULL; } -static void +static int counter_sched_in(struct perf_counter *counter, struct perf_cpu_context *cpuctx, struct perf_counter_context *ctx, int cpu) { if (counter->state == PERF_COUNTER_STATE_OFF) - return; + return 0; + + if (counter->hw_ops->enable(counter)) + return -EAGAIN; - counter->hw_ops->enable(counter); counter->state = PERF_COUNTER_STATE_ACTIVE; counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ cpuctx->active_oncpu++; ctx->nr_active++; + + return 0; } static int @@ -378,20 +382,38 @@ group_sched_in(struct perf_counter *group_counter, struct perf_counter_context *ctx, int cpu) { - struct perf_counter *counter; - int was_group = 0; + struct perf_counter *counter, *partial_group; + int ret = 0; - counter_sched_in(group_counter, cpuctx, ctx, cpu); + if (counter_sched_in(group_counter, cpuctx, ctx, cpu)) + return -EAGAIN; /* * Schedule in siblings as one group (if any): */ list_for_each_entry(counter, &group_counter->sibling_list, list_entry) { - counter_sched_in(counter, cpuctx, ctx, cpu); - was_group = 1; + if (counter_sched_in(counter, cpuctx, ctx, cpu)) { + partial_group = counter; + goto group_error; + } + ret = -EAGAIN; } - return was_group; + return ret; + +group_error: + /* + * Groups can be scheduled in as one unit only, so undo any + * partial group before returning: + */ + list_for_each_entry(counter, &group_counter->sibling_list, list_entry) { + if (counter == partial_group) + break; + counter_sched_out(counter, cpuctx, ctx); + } + counter_sched_out(group_counter, cpuctx, ctx); + + return -EAGAIN; } /* @@ -416,9 +438,6 @@ void perf_counter_task_sched_in(struct task_struct *task, int cpu) spin_lock(&ctx->lock); list_for_each_entry(counter, &ctx->counter_list, list_entry) { - if (ctx->nr_active == cpuctx->max_pertask) - break; - /* * Listen to the 'cpu' scheduling filter constraint * of counters: @@ -856,8 +875,9 @@ static const struct file_operations perf_fops = { .poll = perf_poll, }; -static void cpu_clock_perf_counter_enable(struct perf_counter *counter) +static int cpu_clock_perf_counter_enable(struct perf_counter *counter) { + return 0; } static void cpu_clock_perf_counter_disable(struct perf_counter *counter) @@ -913,11 +933,13 @@ static void task_clock_perf_counter_read(struct perf_counter *counter) task_clock_perf_counter_update(counter, now); } -static void task_clock_perf_counter_enable(struct perf_counter *counter) +static int task_clock_perf_counter_enable(struct perf_counter *counter) { u64 now = task_clock_perf_counter_val(counter, 0); atomic64_set(&counter->hw.prev_count, now); + + return 0; } static void task_clock_perf_counter_disable(struct perf_counter *counter) @@ -960,12 +982,14 @@ static void page_faults_perf_counter_read(struct perf_counter *counter) page_faults_perf_counter_update(counter); } -static void page_faults_perf_counter_enable(struct perf_counter *counter) +static int page_faults_perf_counter_enable(struct perf_counter *counter) { /* * page-faults is a per-task value already, * so we dont have to clear it on switch-in. */ + + return 0; } static void page_faults_perf_counter_disable(struct perf_counter *counter) @@ -1006,12 +1030,14 @@ static void context_switches_perf_counter_read(struct perf_counter *counter) context_switches_perf_counter_update(counter); } -static void context_switches_perf_counter_enable(struct perf_counter *counter) +static int context_switches_perf_counter_enable(struct perf_counter *counter) { /* * ->nvcsw + curr->nivcsw is a per-task value already, * so we dont have to clear it on switch-in. */ + + return 0; } static void context_switches_perf_counter_disable(struct perf_counter *counter) @@ -1050,12 +1076,14 @@ static void cpu_migrations_perf_counter_read(struct perf_counter *counter) cpu_migrations_perf_counter_update(counter); } -static void cpu_migrations_perf_counter_enable(struct perf_counter *counter) +static int cpu_migrations_perf_counter_enable(struct perf_counter *counter) { /* * se.nr_migrations is a per-task value already, * so we dont have to clear it on switch-in. */ + + return 0; } static void cpu_migrations_perf_counter_disable(struct perf_counter *counter) -- cgit v1.2.3-70-g09d2 From 8fe91e61cdc407c7556d3cd71cf20141a25bbcea Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 23 Dec 2008 12:29:25 +0100 Subject: perfcounters: remove ->nr_inherited Impact: remove dead code nr_inherited was not maintained correctly (not decremented) - and also not used - remove it. Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 1 - kernel/perf_counter.c | 1 - 2 files changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 53af11d3767..1ea08e9f31c 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -164,7 +164,6 @@ struct perf_counter { struct task_struct *task; struct file *filp; - unsigned int nr_inherited; struct perf_counter *parent; /* * Protect attach/detach: diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 2e73929a695..48e1dbcdc1c 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1308,7 +1308,6 @@ inherit_counter(struct perf_counter *parent_counter, child_ctx->nr_counters++; child_counter->parent = parent_counter; - parent_counter->nr_inherited++; /* * inherit into child's child as well: */ -- cgit v1.2.3-70-g09d2 From f650a672359819454c3d8d4135ecd1558cde0b24 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 23 Dec 2008 12:17:29 +0100 Subject: perfcounters: add PERF_COUNT_BUS_CYCLES Generalize "bus cycles" hw events - and map them to CPU_CLK_Unhalted.Ref on x86. (which is a good enough approximation) Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 3 ++- include/linux/perf_counter.h | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index f3359c2b391..86b2fdd344a 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -41,12 +41,13 @@ static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters); static const int intel_perfmon_event_map[] = { - [PERF_COUNT_CYCLES] = 0x003c, + [PERF_COUNT_CPU_CYCLES] = 0x003c, [PERF_COUNT_INSTRUCTIONS] = 0x00c0, [PERF_COUNT_CACHE_REFERENCES] = 0x4f2e, [PERF_COUNT_CACHE_MISSES] = 0x412e, [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x00c4, [PERF_COUNT_BRANCH_MISSES] = 0x00c5, + [PERF_COUNT_BUS_CYCLES] = 0x013c, }; static const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map); diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 1ea08e9f31c..ec77d1643d3 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -36,14 +36,15 @@ enum hw_event_types { /* * Common hardware events, generalized by the kernel: */ - PERF_COUNT_CYCLES = 0, + PERF_COUNT_CPU_CYCLES = 0, PERF_COUNT_INSTRUCTIONS = 1, PERF_COUNT_CACHE_REFERENCES = 2, PERF_COUNT_CACHE_MISSES = 3, PERF_COUNT_BRANCH_INSTRUCTIONS = 4, PERF_COUNT_BRANCH_MISSES = 5, + PERF_COUNT_BUS_CYCLES = 6, - PERF_HW_EVENTS_MAX = 6, + PERF_HW_EVENTS_MAX = 7, /* * Special "software" counters provided by the kernel, even if -- cgit v1.2.3-70-g09d2 From e44aef58ecfbe061eb4c53b939bcc35fb1bee82d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 25 Dec 2008 09:02:11 +0100 Subject: perfcounters: include asm/perf_counter.h only if CONFIG_PERF_COUNTERS=y Impact: build fix on ia64 KOSAKI Motohiro reported that -tip doesnt build on ia64 because asm/perf_counter.h only exists on x86 for now. Fix it. Reported-by: KOSAKI Motohiro Tested-by: KOSAKI Motohiro Acked-by: KOSAKI Motohiro Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index ec77d1643d3..cc3a75a239a 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -14,7 +14,10 @@ #define _LINUX_PERF_COUNTER_H #include -#include + +#ifdef CONFIG_PERF_COUNTERS +# include +#endif #include #include -- cgit v1.2.3-70-g09d2 From 3cbed429a9ccdb7a243f733b1056fe5c39e9004c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 9 Jan 2009 16:43:42 +1100 Subject: perf_counter: Add optional hw_perf_group_sched_in arch function Impact: extend perf_counter infrastructure This adds an optional hw_perf_group_sched_in() arch function that enables a whole group of counters in one go. It returns 1 if it added the group successfully, 0 if it did nothing (and therefore the core needs to add the counters individually), or a negative number if an error occurred. It should add all the counters and enable any software counters in the group, or else add none of them and return an error. There are a couple of related changes/improvements in the group handling here: * As an optimization, group_sched_out() and group_sched_in() now check the state of the group leader, and do nothing if the leader is not active or disabled. * We now call hw_perf_save_disable/hw_perf_restore around the complete set of counter enable/disable calls in __perf_counter_sched_in/out, to give the arch code the opportunity to defer updating the hardware state until the hw_perf_restore call if it wants. * We no longer stop adding groups after we get to a group that has more than one counter. We will ultimately add an option for a group to be exclusive. The current code doesn't really implement exclusive groups anyway, since a group could end up going on with other counters that get added before it. Signed-off-by: Paul Mackerras --- include/linux/perf_counter.h | 3 +++ kernel/perf_counter.c | 31 ++++++++++++++++++++++++++----- 2 files changed, 29 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index cc3a75a239a..b21d1ea4c05 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -236,6 +236,9 @@ extern u64 hw_perf_save_disable(void); extern void hw_perf_restore(u64 ctrl); extern int perf_counter_task_disable(void); extern int perf_counter_task_enable(void); +extern int hw_perf_group_sched_in(struct perf_counter *group_leader, + struct perf_cpu_context *cpuctx, + struct perf_counter_context *ctx, int cpu); #else static inline void diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index b7a027a2ef0..9ad11e44d9a 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -47,6 +47,12 @@ hw_perf_counter_init(struct perf_counter *counter) u64 __weak hw_perf_save_disable(void) { return 0; } void __weak hw_perf_restore(u64 ctrl) { barrier(); } void __weak hw_perf_counter_setup(void) { barrier(); } +int __weak hw_perf_group_sched_in(struct perf_counter *group_leader, + struct perf_cpu_context *cpuctx, + struct perf_counter_context *ctx, int cpu) +{ + return 0; +} static void list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) @@ -341,6 +347,9 @@ group_sched_out(struct perf_counter *group_counter, { struct perf_counter *counter; + if (group_counter->state != PERF_COUNTER_STATE_ACTIVE) + return; + counter_sched_out(group_counter, cpuctx, ctx); /* @@ -354,15 +363,18 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx, struct perf_cpu_context *cpuctx) { struct perf_counter *counter; + u64 flags; if (likely(!ctx->nr_counters)) return; spin_lock(&ctx->lock); + flags = hw_perf_save_disable(); if (ctx->nr_active) { list_for_each_entry(counter, &ctx->counter_list, list_entry) group_sched_out(counter, cpuctx, ctx); } + hw_perf_restore(flags); spin_unlock(&ctx->lock); } @@ -402,7 +414,14 @@ group_sched_in(struct perf_counter *group_counter, int cpu) { struct perf_counter *counter, *partial_group; - int ret = 0; + int ret; + + if (group_counter->state == PERF_COUNTER_STATE_OFF) + return 0; + + ret = hw_perf_group_sched_in(group_counter, cpuctx, ctx, cpu); + if (ret) + return ret < 0 ? ret : 0; if (counter_sched_in(group_counter, cpuctx, ctx, cpu)) return -EAGAIN; @@ -415,10 +434,9 @@ group_sched_in(struct perf_counter *group_counter, partial_group = counter; goto group_error; } - ret = -EAGAIN; } - return ret; + return 0; group_error: /* @@ -440,11 +458,13 @@ __perf_counter_sched_in(struct perf_counter_context *ctx, struct perf_cpu_context *cpuctx, int cpu) { struct perf_counter *counter; + u64 flags; if (likely(!ctx->nr_counters)) return; spin_lock(&ctx->lock); + flags = hw_perf_save_disable(); list_for_each_entry(counter, &ctx->counter_list, list_entry) { /* * Listen to the 'cpu' scheduling filter constraint @@ -454,12 +474,13 @@ __perf_counter_sched_in(struct perf_counter_context *ctx, continue; /* - * If we scheduled in a group atomically and - * exclusively, break out: + * If we scheduled in a group atomically and exclusively, + * or if this group can't go on, break out: */ if (group_sched_in(counter, cpuctx, ctx, cpu)) break; } + hw_perf_restore(flags); spin_unlock(&ctx->lock); } -- cgit v1.2.3-70-g09d2 From 3b6f9e5cb21964b7ce12bf81076f830885563ec8 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 14 Jan 2009 21:00:30 +1100 Subject: perf_counter: Add support for pinned and exclusive counter groups Impact: New perf_counter features A pinned counter group is one that the user wants to have on the CPU whenever possible, i.e. whenever the associated task is running, for a per-task group, or always for a per-cpu group. If the system cannot satisfy that, it puts the group into an error state where it is not scheduled any more and reads from it return EOF (i.e. 0 bytes read). The group can be released from error state and made readable again using prctl(PR_TASK_PERF_COUNTERS_ENABLE). When we have finer-grained enable/disable controls on counters we'll be able to reset the error state on individual groups. An exclusive group is one that the user wants to be the only group using the CPU performance monitor hardware whenever it is on. The counter group scheduler will not schedule an exclusive group if there are already other groups on the CPU and will not schedule other groups onto the CPU if there is an exclusive group scheduled (that statement does not apply to groups containing only software counters, which can always go on and which do not prevent an exclusive group from going on). With an exclusive group, we will be able to let users program PMU registers at a low level without the concern that those settings will perturb other measurements. Along the way this reorganizes things a little: - is_software_counter() is moved to perf_counter.h. - cpuctx->active_oncpu now records the number of hardware counters on the CPU, i.e. it now excludes software counters. Nothing was reading cpuctx->active_oncpu before, so this change is harmless. - A new cpuctx->exclusive field records whether we currently have an exclusive group on the CPU. - counter_sched_out moves higher up in perf_counter.c and gets called from __perf_counter_remove_from_context and __perf_counter_exit_task, where we used to have essentially the same code. - __perf_counter_sched_in now goes through the counter list twice, doing the pinned counters in the first loop and the non-pinned counters in the second loop, in order to give the pinned counters the best chance to be scheduled in. Note that only a group leader can be exclusive or pinned, and that attribute applies to the whole group. This avoids some awkwardness in some corner cases (e.g. where a group leader is closed and the other group members get added to the context list). If we want to relax that restriction later, we can, and it is easier to relax a restriction than to apply a new one. This doesn't yet handle the case where a pinned counter is inherited and goes into error state in the child - the error state is not propagated up to the parent when the child exits, and arguably it should. Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/perf_counter.c | 10 +- include/linux/perf_counter.h | 15 ++- kernel/perf_counter.c | 226 +++++++++++++++++++++++++------------ 3 files changed, 169 insertions(+), 82 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 85ad25923c2..5b0211348c7 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -35,14 +35,6 @@ void perf_counter_print_debug(void) { } -/* - * Return 1 for a software counter, 0 for a hardware counter - */ -static inline int is_software_counter(struct perf_counter *counter) -{ - return !counter->hw_event.raw && counter->hw_event.type < 0; -} - /* * Read one performance monitor counter (PMC). */ @@ -443,6 +435,7 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader, */ for (i = n0; i < n0 + n; ++i) cpuhw->counter[i]->hw.config = cpuhw->events[i]; + cpuctx->active_oncpu += n; n = 1; counter_sched_in(group_leader, cpu); list_for_each_entry(sub, &group_leader->sibling_list, list_entry) { @@ -451,7 +444,6 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader, ++n; } } - cpuctx->active_oncpu += n; ctx->nr_active += n; return 1; diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index b21d1ea4c05..7ab8e5f96f5 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -86,7 +86,10 @@ struct perf_counter_hw_event { nmi : 1, /* NMI sampling */ raw : 1, /* raw event type */ inherit : 1, /* children inherit it */ - __reserved_1 : 28; + pinned : 1, /* must always be on PMU */ + exclusive : 1, /* only counter on PMU */ + + __reserved_1 : 26; u64 __reserved_2; }; @@ -141,6 +144,7 @@ struct hw_perf_counter_ops { * enum perf_counter_active_state - the states of a counter */ enum perf_counter_active_state { + PERF_COUNTER_STATE_ERROR = -2, PERF_COUNTER_STATE_OFF = -1, PERF_COUNTER_STATE_INACTIVE = 0, PERF_COUNTER_STATE_ACTIVE = 1, @@ -214,6 +218,7 @@ struct perf_cpu_context { struct perf_counter_context *task_ctx; int active_oncpu; int max_pertask; + int exclusive; }; /* @@ -240,6 +245,14 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader, struct perf_cpu_context *cpuctx, struct perf_counter_context *ctx, int cpu); +/* + * Return 1 for a software counter, 0 for a hardware counter + */ +static inline int is_software_counter(struct perf_counter *counter) +{ + return !counter->hw_event.raw && counter->hw_event.type < 0; +} + #else static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 52f2f526248..faf671b2956 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -93,6 +93,25 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx) } } +static void +counter_sched_out(struct perf_counter *counter, + struct perf_cpu_context *cpuctx, + struct perf_counter_context *ctx) +{ + if (counter->state != PERF_COUNTER_STATE_ACTIVE) + return; + + counter->state = PERF_COUNTER_STATE_INACTIVE; + counter->hw_ops->disable(counter); + counter->oncpu = -1; + + if (!is_software_counter(counter)) + cpuctx->active_oncpu--; + ctx->nr_active--; + if (counter->hw_event.exclusive || !cpuctx->active_oncpu) + cpuctx->exclusive = 0; +} + /* * Cross CPU call to remove a performance counter * @@ -118,14 +137,9 @@ static void __perf_counter_remove_from_context(void *info) curr_rq_lock_irq_save(&flags); spin_lock(&ctx->lock); - if (counter->state == PERF_COUNTER_STATE_ACTIVE) { - counter->state = PERF_COUNTER_STATE_INACTIVE; - counter->hw_ops->disable(counter); - ctx->nr_active--; - cpuctx->active_oncpu--; - counter->task = NULL; - counter->oncpu = -1; - } + counter_sched_out(counter, cpuctx, ctx); + + counter->task = NULL; ctx->nr_counters--; /* @@ -207,7 +221,7 @@ counter_sched_in(struct perf_counter *counter, struct perf_counter_context *ctx, int cpu) { - if (counter->state == PERF_COUNTER_STATE_OFF) + if (counter->state <= PERF_COUNTER_STATE_OFF) return 0; counter->state = PERF_COUNTER_STATE_ACTIVE; @@ -223,12 +237,63 @@ counter_sched_in(struct perf_counter *counter, return -EAGAIN; } - cpuctx->active_oncpu++; + if (!is_software_counter(counter)) + cpuctx->active_oncpu++; ctx->nr_active++; + if (counter->hw_event.exclusive) + cpuctx->exclusive = 1; + return 0; } +/* + * Return 1 for a group consisting entirely of software counters, + * 0 if the group contains any hardware counters. + */ +static int is_software_only_group(struct perf_counter *leader) +{ + struct perf_counter *counter; + + if (!is_software_counter(leader)) + return 0; + list_for_each_entry(counter, &leader->sibling_list, list_entry) + if (!is_software_counter(counter)) + return 0; + return 1; +} + +/* + * Work out whether we can put this counter group on the CPU now. + */ +static int group_can_go_on(struct perf_counter *counter, + struct perf_cpu_context *cpuctx, + int can_add_hw) +{ + /* + * Groups consisting entirely of software counters can always go on. + */ + if (is_software_only_group(counter)) + return 1; + /* + * If an exclusive group is already on, no other hardware + * counters can go on. + */ + if (cpuctx->exclusive) + return 0; + /* + * If this group is exclusive and there are already + * counters on the CPU, it can't go on. + */ + if (counter->hw_event.exclusive && cpuctx->active_oncpu) + return 0; + /* + * Otherwise, try to add it if all previous groups were able + * to go on. + */ + return can_add_hw; +} + /* * Cross CPU call to install and enable a performance counter */ @@ -240,6 +305,7 @@ static void __perf_install_in_context(void *info) int cpu = smp_processor_id(); unsigned long flags; u64 perf_flags; + int err; /* * If this is a task context, we need to check whether it is @@ -261,9 +327,21 @@ static void __perf_install_in_context(void *info) list_add_counter(counter, ctx); ctx->nr_counters++; - counter_sched_in(counter, cpuctx, ctx, cpu); + /* + * An exclusive counter can't go on if there are already active + * hardware counters, and no hardware counter can go on if there + * is already an exclusive counter on. + */ + if (counter->state == PERF_COUNTER_STATE_INACTIVE && + !group_can_go_on(counter, cpuctx, 1)) + err = -EEXIST; + else + err = counter_sched_in(counter, cpuctx, ctx, cpu); + + if (err && counter->hw_event.pinned) + counter->state = PERF_COUNTER_STATE_ERROR; - if (!ctx->task && cpuctx->max_pertask) + if (!err && !ctx->task && cpuctx->max_pertask) cpuctx->max_pertask--; hw_perf_restore(perf_flags); @@ -326,22 +404,6 @@ retry: spin_unlock_irq(&ctx->lock); } -static void -counter_sched_out(struct perf_counter *counter, - struct perf_cpu_context *cpuctx, - struct perf_counter_context *ctx) -{ - if (counter->state != PERF_COUNTER_STATE_ACTIVE) - return; - - counter->state = PERF_COUNTER_STATE_INACTIVE; - counter->hw_ops->disable(counter); - counter->oncpu = -1; - - cpuctx->active_oncpu--; - ctx->nr_active--; -} - static void group_sched_out(struct perf_counter *group_counter, struct perf_cpu_context *cpuctx, @@ -359,6 +421,9 @@ group_sched_out(struct perf_counter *group_counter, */ list_for_each_entry(counter, &group_counter->sibling_list, list_entry) counter_sched_out(counter, cpuctx, ctx); + + if (group_counter->hw_event.exclusive) + cpuctx->exclusive = 0; } void __perf_counter_sched_out(struct perf_counter_context *ctx, @@ -455,30 +520,6 @@ group_error: return -EAGAIN; } -/* - * Return 1 for a software counter, 0 for a hardware counter - */ -static inline int is_software_counter(struct perf_counter *counter) -{ - return !counter->hw_event.raw && counter->hw_event.type < 0; -} - -/* - * Return 1 for a group consisting entirely of software counters, - * 0 if the group contains any hardware counters. - */ -static int is_software_only_group(struct perf_counter *leader) -{ - struct perf_counter *counter; - - if (!is_software_counter(leader)) - return 0; - list_for_each_entry(counter, &leader->sibling_list, list_entry) - if (!is_software_counter(counter)) - return 0; - return 1; -} - static void __perf_counter_sched_in(struct perf_counter_context *ctx, struct perf_cpu_context *cpuctx, int cpu) @@ -492,7 +533,38 @@ __perf_counter_sched_in(struct perf_counter_context *ctx, spin_lock(&ctx->lock); flags = hw_perf_save_disable(); + + /* + * First go through the list and put on any pinned groups + * in order to give them the best chance of going on. + */ + list_for_each_entry(counter, &ctx->counter_list, list_entry) { + if (counter->state <= PERF_COUNTER_STATE_OFF || + !counter->hw_event.pinned) + continue; + if (counter->cpu != -1 && counter->cpu != cpu) + continue; + + if (group_can_go_on(counter, cpuctx, 1)) + group_sched_in(counter, cpuctx, ctx, cpu); + + /* + * If this pinned group hasn't been scheduled, + * put it in error state. + */ + if (counter->state == PERF_COUNTER_STATE_INACTIVE) + counter->state = PERF_COUNTER_STATE_ERROR; + } + list_for_each_entry(counter, &ctx->counter_list, list_entry) { + /* + * Ignore counters in OFF or ERROR state, and + * ignore pinned counters since we did them already. + */ + if (counter->state <= PERF_COUNTER_STATE_OFF || + counter->hw_event.pinned) + continue; + /* * Listen to the 'cpu' scheduling filter constraint * of counters: @@ -500,14 +572,10 @@ __perf_counter_sched_in(struct perf_counter_context *ctx, if (counter->cpu != -1 && counter->cpu != cpu) continue; - /* - * If we scheduled in a group atomically and exclusively, - * or if this group can't go on, don't add any more - * hardware counters. - */ - if (can_add_hw || is_software_only_group(counter)) + if (group_can_go_on(counter, cpuctx, can_add_hw)) { if (group_sched_in(counter, cpuctx, ctx, cpu)) can_add_hw = 0; + } } hw_perf_restore(flags); spin_unlock(&ctx->lock); @@ -567,8 +635,10 @@ int perf_counter_task_disable(void) */ perf_flags = hw_perf_save_disable(); - list_for_each_entry(counter, &ctx->counter_list, list_entry) - counter->state = PERF_COUNTER_STATE_OFF; + list_for_each_entry(counter, &ctx->counter_list, list_entry) { + if (counter->state != PERF_COUNTER_STATE_ERROR) + counter->state = PERF_COUNTER_STATE_OFF; + } hw_perf_restore(perf_flags); @@ -607,7 +677,7 @@ int perf_counter_task_enable(void) perf_flags = hw_perf_save_disable(); list_for_each_entry(counter, &ctx->counter_list, list_entry) { - if (counter->state != PERF_COUNTER_STATE_OFF) + if (counter->state > PERF_COUNTER_STATE_OFF) continue; counter->state = PERF_COUNTER_STATE_INACTIVE; counter->hw_event.disabled = 0; @@ -849,6 +919,14 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) if (count != sizeof(cntval)) return -EINVAL; + /* + * Return end-of-file for a read on a counter that is in + * error state (i.e. because it was pinned but it couldn't be + * scheduled on to the CPU at some point). + */ + if (counter->state == PERF_COUNTER_STATE_ERROR) + return 0; + mutex_lock(&counter->mutex); cntval = perf_counter_read(counter); mutex_unlock(&counter->mutex); @@ -884,7 +962,7 @@ perf_read_irq_data(struct perf_counter *counter, { struct perf_data *irqdata, *usrdata; DECLARE_WAITQUEUE(wait, current); - ssize_t res; + ssize_t res, res2; irqdata = counter->irqdata; usrdata = counter->usrdata; @@ -905,6 +983,9 @@ perf_read_irq_data(struct perf_counter *counter, if (signal_pending(current)) break; + if (counter->state == PERF_COUNTER_STATE_ERROR) + break; + spin_unlock_irq(&counter->waitq.lock); schedule(); spin_lock_irq(&counter->waitq.lock); @@ -913,7 +994,8 @@ perf_read_irq_data(struct perf_counter *counter, __set_current_state(TASK_RUNNING); spin_unlock_irq(&counter->waitq.lock); - if (usrdata->len + irqdata->len < count) + if (usrdata->len + irqdata->len < count && + counter->state != PERF_COUNTER_STATE_ERROR) return -ERESTARTSYS; read_pending: mutex_lock(&counter->mutex); @@ -925,11 +1007,12 @@ read_pending: /* Switch irq buffer: */ usrdata = perf_switch_irq_data(counter); - if (perf_copy_usrdata(usrdata, buf + res, count - res) < 0) { + res2 = perf_copy_usrdata(usrdata, buf + res, count - res); + if (res2 < 0) { if (!res) res = -EFAULT; } else { - res = count; + res += res2; } out: mutex_unlock(&counter->mutex); @@ -1348,6 +1431,11 @@ sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user, */ if (group_leader->ctx != ctx) goto err_put_context; + /* + * Only a group leader can be exclusive or pinned + */ + if (hw_event.exclusive || hw_event.pinned) + goto err_put_context; } ret = -EINVAL; @@ -1473,13 +1561,7 @@ __perf_counter_exit_task(struct task_struct *child, cpuctx = &__get_cpu_var(perf_cpu_context); - if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) { - child_counter->state = PERF_COUNTER_STATE_INACTIVE; - child_counter->hw_ops->disable(child_counter); - cpuctx->active_oncpu--; - child_ctx->nr_active--; - child_counter->oncpu = -1; - } + counter_sched_out(child_counter, cpuctx, child_ctx); list_del_init(&child_counter->list_entry); -- cgit v1.2.3-70-g09d2 From d859e29fe34cb833071b20aef860ee94fbad9bb2 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sat, 17 Jan 2009 18:10:22 +1100 Subject: perf_counter: Add counter enable/disable ioctls Impact: New perf_counter features This primarily adds a way for perf_counter users to enable and disable counters and groups. Enabling or disabling a counter or group also enables or disables all of the child counters that have been cloned from it to monitor children of the task monitored by the top-level counter. The userspace interface to enable/disable counters is via ioctl on the counter file descriptor. Along the way this extends the code that handles child counters to handle child counter groups properly. A group with multiple counters will be cloned to child tasks if and only if the group leader has the hw_event.inherit bit set - if it is set the whole group is cloned as a group in the child task. In order to be able to enable or disable all child counters of a given top-level counter, we need a way to find them all. Hence I have added a child_list field to struct perf_counter, which is the head of the list of children for a top-level counter, or the link in that list for a child counter. That list is protected by the perf_counter.mutex field. This also adds a mutex to the perf_counter_context struct. Previously the list of counters was protected just by the lock field in the context, which meant that perf_counter_init_task had to take that lock and then take whatever lock/mutex protects the top-level counter's child_list. But the counter enable/disable functions need to take that lock in order to traverse the list, then for each counter take the lock in that counter's context in order to change the counter's state safely, which will lead to a deadlock. To solve this, we now have both a mutex and a spinlock in the context, and taking either is sufficient to ensure the list of counters can't change - you have to take both before changing the list. Now perf_counter_init_task takes the mutex instead of the lock (which incidentally means that inherit_counter can use GFP_KERNEL instead of GFP_ATOMIC) and thus avoids the possible deadlock. Similarly the new enable/disable functions can take the mutex while traversing the list of child counters without incurring a possible deadlock when the counter manipulation code locks the context for a child counter. We also had an misfeature that the first counter added to a context would possibly not go on until the next sched-in, because we were using ctx->nr_active to detect if the context was running on a CPU. But nr_active is the number of active counters, and if that was zero (because the context didn't have any counters yet) it would look like the context wasn't running on a cpu and so the retry code in __perf_install_in_context wouldn't retry. So this adds an 'is_active' field that is set when the context is on a CPU, even if it has no counters. The is_active field is only used for task contexts, not for per-cpu contexts. If we enable a subsidiary counter in a group that is active on a CPU, and the arch code can't enable the counter, then we have to pull the whole group off the CPU. We do this with group_sched_out, which gets moved up in the file so it comes before all its callers. This also adds similar logic to __perf_install_in_context so that the "all on, or none" invariant of groups is preserved when adding a new counter to a group. Signed-off-by: Paul Mackerras --- include/linux/perf_counter.h | 21 +- kernel/perf_counter.c | 455 +++++++++++++++++++++++++++++++++++++------ 2 files changed, 415 insertions(+), 61 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 7ab8e5f96f5..33ba9fe0a78 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -14,6 +14,7 @@ #define _LINUX_PERF_COUNTER_H #include +#include #ifdef CONFIG_PERF_COUNTERS # include @@ -94,6 +95,12 @@ struct perf_counter_hw_event { u64 __reserved_2; }; +/* + * Ioctls that can be done on a perf counter fd: + */ +#define PERF_COUNTER_IOC_ENABLE _IO('$', 0) +#define PERF_COUNTER_IOC_DISABLE _IO('$', 1) + /* * Kernel-internal data types: */ @@ -173,8 +180,10 @@ struct perf_counter { struct file *filp; struct perf_counter *parent; + struct list_head child_list; + /* - * Protect attach/detach: + * Protect attach/detach and child_list: */ struct mutex mutex; @@ -199,13 +208,21 @@ struct perf_counter { struct perf_counter_context { #ifdef CONFIG_PERF_COUNTERS /* - * Protect the list of counters: + * Protect the states of the counters in the list, + * nr_active, and the list: */ spinlock_t lock; + /* + * Protect the list of counters. Locking either mutex or lock + * is sufficient to ensure the list doesn't change; to change + * the list you need to lock both the mutex and the spinlock. + */ + struct mutex mutex; struct list_head counter_list; int nr_counters; int nr_active; + int is_active; struct task_struct *task; #endif }; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index faf671b2956..1ac18daa424 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -112,6 +112,28 @@ counter_sched_out(struct perf_counter *counter, cpuctx->exclusive = 0; } +static void +group_sched_out(struct perf_counter *group_counter, + struct perf_cpu_context *cpuctx, + struct perf_counter_context *ctx) +{ + struct perf_counter *counter; + + if (group_counter->state != PERF_COUNTER_STATE_ACTIVE) + return; + + counter_sched_out(group_counter, cpuctx, ctx); + + /* + * Schedule out siblings (if any): + */ + list_for_each_entry(counter, &group_counter->sibling_list, list_entry) + counter_sched_out(counter, cpuctx, ctx); + + if (group_counter->hw_event.exclusive) + cpuctx->exclusive = 0; +} + /* * Cross CPU call to remove a performance counter * @@ -168,7 +190,7 @@ static void __perf_counter_remove_from_context(void *info) /* * Remove the counter from a task's (or a CPU's) list of counters. * - * Must be called with counter->mutex held. + * Must be called with counter->mutex and ctx->mutex held. * * CPU counters are removed with a smp call. For task counters we only * call when the task is on a CPU. @@ -215,6 +237,99 @@ retry: spin_unlock_irq(&ctx->lock); } +/* + * Cross CPU call to disable a performance counter + */ +static void __perf_counter_disable(void *info) +{ + struct perf_counter *counter = info; + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct perf_counter_context *ctx = counter->ctx; + unsigned long flags; + + /* + * If this is a per-task counter, need to check whether this + * counter's task is the current task on this cpu. + */ + if (ctx->task && cpuctx->task_ctx != ctx) + return; + + curr_rq_lock_irq_save(&flags); + spin_lock(&ctx->lock); + + /* + * If the counter is on, turn it off. + * If it is in error state, leave it in error state. + */ + if (counter->state >= PERF_COUNTER_STATE_INACTIVE) { + if (counter == counter->group_leader) + group_sched_out(counter, cpuctx, ctx); + else + counter_sched_out(counter, cpuctx, ctx); + counter->state = PERF_COUNTER_STATE_OFF; + } + + spin_unlock(&ctx->lock); + curr_rq_unlock_irq_restore(&flags); +} + +/* + * Disable a counter. + */ +static void perf_counter_disable(struct perf_counter *counter) +{ + struct perf_counter_context *ctx = counter->ctx; + struct task_struct *task = ctx->task; + + if (!task) { + /* + * Disable the counter on the cpu that it's on + */ + smp_call_function_single(counter->cpu, __perf_counter_disable, + counter, 1); + return; + } + + retry: + task_oncpu_function_call(task, __perf_counter_disable, counter); + + spin_lock_irq(&ctx->lock); + /* + * If the counter is still active, we need to retry the cross-call. + */ + if (counter->state == PERF_COUNTER_STATE_ACTIVE) { + spin_unlock_irq(&ctx->lock); + goto retry; + } + + /* + * Since we have the lock this context can't be scheduled + * in, so we can change the state safely. + */ + if (counter->state == PERF_COUNTER_STATE_INACTIVE) + counter->state = PERF_COUNTER_STATE_OFF; + + spin_unlock_irq(&ctx->lock); +} + +/* + * Disable a counter and all its children. + */ +static void perf_counter_disable_family(struct perf_counter *counter) +{ + struct perf_counter *child; + + perf_counter_disable(counter); + + /* + * Lock the mutex to protect the list of children + */ + mutex_lock(&counter->mutex); + list_for_each_entry(child, &counter->child_list, child_list) + perf_counter_disable(child); + mutex_unlock(&counter->mutex); +} + static int counter_sched_in(struct perf_counter *counter, struct perf_cpu_context *cpuctx, @@ -302,6 +417,7 @@ static void __perf_install_in_context(void *info) struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_counter *counter = info; struct perf_counter_context *ctx = counter->ctx; + struct perf_counter *leader = counter->group_leader; int cpu = smp_processor_id(); unsigned long flags; u64 perf_flags; @@ -327,23 +443,40 @@ static void __perf_install_in_context(void *info) list_add_counter(counter, ctx); ctx->nr_counters++; + /* + * Don't put the counter on if it is disabled or if + * it is in a group and the group isn't on. + */ + if (counter->state != PERF_COUNTER_STATE_INACTIVE || + (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE)) + goto unlock; + /* * An exclusive counter can't go on if there are already active * hardware counters, and no hardware counter can go on if there * is already an exclusive counter on. */ - if (counter->state == PERF_COUNTER_STATE_INACTIVE && - !group_can_go_on(counter, cpuctx, 1)) + if (!group_can_go_on(counter, cpuctx, 1)) err = -EEXIST; else err = counter_sched_in(counter, cpuctx, ctx, cpu); - if (err && counter->hw_event.pinned) - counter->state = PERF_COUNTER_STATE_ERROR; + if (err) { + /* + * This counter couldn't go on. If it is in a group + * then we have to pull the whole group off. + * If the counter group is pinned then put it in error state. + */ + if (leader != counter) + group_sched_out(leader, cpuctx, ctx); + if (leader->hw_event.pinned) + leader->state = PERF_COUNTER_STATE_ERROR; + } if (!err && !ctx->task && cpuctx->max_pertask) cpuctx->max_pertask--; + unlock: hw_perf_restore(perf_flags); spin_unlock(&ctx->lock); @@ -359,6 +492,8 @@ static void __perf_install_in_context(void *info) * If the counter is attached to a task which is on a CPU we use a smp * call to enable it in the task context. The task might have been * scheduled away, but we check this in the smp call again. + * + * Must be called with ctx->mutex held. */ static void perf_install_in_context(struct perf_counter_context *ctx, @@ -387,7 +522,7 @@ retry: /* * we need to retry the smp call. */ - if (ctx->nr_active && list_empty(&counter->list_entry)) { + if (ctx->is_active && list_empty(&counter->list_entry)) { spin_unlock_irq(&ctx->lock); goto retry; } @@ -404,26 +539,131 @@ retry: spin_unlock_irq(&ctx->lock); } -static void -group_sched_out(struct perf_counter *group_counter, - struct perf_cpu_context *cpuctx, - struct perf_counter_context *ctx) +/* + * Cross CPU call to enable a performance counter + */ +static void __perf_counter_enable(void *info) { - struct perf_counter *counter; + struct perf_counter *counter = info; + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct perf_counter_context *ctx = counter->ctx; + struct perf_counter *leader = counter->group_leader; + unsigned long flags; + int err; - if (group_counter->state != PERF_COUNTER_STATE_ACTIVE) + /* + * If this is a per-task counter, need to check whether this + * counter's task is the current task on this cpu. + */ + if (ctx->task && cpuctx->task_ctx != ctx) return; - counter_sched_out(group_counter, cpuctx, ctx); + curr_rq_lock_irq_save(&flags); + spin_lock(&ctx->lock); + + if (counter->state >= PERF_COUNTER_STATE_INACTIVE) + goto unlock; + counter->state = PERF_COUNTER_STATE_INACTIVE; /* - * Schedule out siblings (if any): + * If the counter is in a group and isn't the group leader, + * then don't put it on unless the group is on. */ - list_for_each_entry(counter, &group_counter->sibling_list, list_entry) - counter_sched_out(counter, cpuctx, ctx); + if (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE) + goto unlock; - if (group_counter->hw_event.exclusive) - cpuctx->exclusive = 0; + if (!group_can_go_on(counter, cpuctx, 1)) + err = -EEXIST; + else + err = counter_sched_in(counter, cpuctx, ctx, + smp_processor_id()); + + if (err) { + /* + * If this counter can't go on and it's part of a + * group, then the whole group has to come off. + */ + if (leader != counter) + group_sched_out(leader, cpuctx, ctx); + if (leader->hw_event.pinned) + leader->state = PERF_COUNTER_STATE_ERROR; + } + + unlock: + spin_unlock(&ctx->lock); + curr_rq_unlock_irq_restore(&flags); +} + +/* + * Enable a counter. + */ +static void perf_counter_enable(struct perf_counter *counter) +{ + struct perf_counter_context *ctx = counter->ctx; + struct task_struct *task = ctx->task; + + if (!task) { + /* + * Enable the counter on the cpu that it's on + */ + smp_call_function_single(counter->cpu, __perf_counter_enable, + counter, 1); + return; + } + + spin_lock_irq(&ctx->lock); + if (counter->state >= PERF_COUNTER_STATE_INACTIVE) + goto out; + + /* + * If the counter is in error state, clear that first. + * That way, if we see the counter in error state below, we + * know that it has gone back into error state, as distinct + * from the task having been scheduled away before the + * cross-call arrived. + */ + if (counter->state == PERF_COUNTER_STATE_ERROR) + counter->state = PERF_COUNTER_STATE_OFF; + + retry: + spin_unlock_irq(&ctx->lock); + task_oncpu_function_call(task, __perf_counter_enable, counter); + + spin_lock_irq(&ctx->lock); + + /* + * If the context is active and the counter is still off, + * we need to retry the cross-call. + */ + if (ctx->is_active && counter->state == PERF_COUNTER_STATE_OFF) + goto retry; + + /* + * Since we have the lock this context can't be scheduled + * in, so we can change the state safely. + */ + if (counter->state == PERF_COUNTER_STATE_OFF) + counter->state = PERF_COUNTER_STATE_INACTIVE; + out: + spin_unlock_irq(&ctx->lock); +} + +/* + * Enable a counter and all its children. + */ +static void perf_counter_enable_family(struct perf_counter *counter) +{ + struct perf_counter *child; + + perf_counter_enable(counter); + + /* + * Lock the mutex to protect the list of children + */ + mutex_lock(&counter->mutex); + list_for_each_entry(child, &counter->child_list, child_list) + perf_counter_enable(child); + mutex_unlock(&counter->mutex); } void __perf_counter_sched_out(struct perf_counter_context *ctx, @@ -432,16 +672,18 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx, struct perf_counter *counter; u64 flags; + spin_lock(&ctx->lock); + ctx->is_active = 0; if (likely(!ctx->nr_counters)) - return; + goto out; - spin_lock(&ctx->lock); flags = hw_perf_save_disable(); if (ctx->nr_active) { list_for_each_entry(counter, &ctx->counter_list, list_entry) group_sched_out(counter, cpuctx, ctx); } hw_perf_restore(flags); + out: spin_unlock(&ctx->lock); } @@ -528,10 +770,11 @@ __perf_counter_sched_in(struct perf_counter_context *ctx, u64 flags; int can_add_hw = 1; + spin_lock(&ctx->lock); + ctx->is_active = 1; if (likely(!ctx->nr_counters)) - return; + goto out; - spin_lock(&ctx->lock); flags = hw_perf_save_disable(); /* @@ -578,6 +821,7 @@ __perf_counter_sched_in(struct perf_counter_context *ctx, } } hw_perf_restore(flags); + out: spin_unlock(&ctx->lock); } @@ -896,12 +1140,14 @@ static int perf_release(struct inode *inode, struct file *file) file->private_data = NULL; + mutex_lock(&ctx->mutex); mutex_lock(&counter->mutex); perf_counter_remove_from_context(counter); put_context(ctx); mutex_unlock(&counter->mutex); + mutex_unlock(&ctx->mutex); kfree(counter); @@ -1053,10 +1299,30 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) return events; } +static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct perf_counter *counter = file->private_data; + int err = 0; + + switch (cmd) { + case PERF_COUNTER_IOC_ENABLE: + perf_counter_enable_family(counter); + break; + case PERF_COUNTER_IOC_DISABLE: + perf_counter_disable_family(counter); + break; + default: + err = -ENOTTY; + } + return err; +} + static const struct file_operations perf_fops = { .release = perf_release, .read = perf_read, .poll = perf_poll, + .unlocked_ioctl = perf_ioctl, + .compat_ioctl = perf_ioctl, }; static int cpu_clock_perf_counter_enable(struct perf_counter *counter) @@ -1348,6 +1614,8 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, INIT_LIST_HEAD(&counter->sibling_list); init_waitqueue_head(&counter->waitq); + INIT_LIST_HEAD(&counter->child_list); + counter->irqdata = &counter->data[0]; counter->usrdata = &counter->data[1]; counter->cpu = cpu; @@ -1452,7 +1720,9 @@ sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user, goto err_free_put_context; counter->filp = counter_file; + mutex_lock(&ctx->mutex); perf_install_in_context(ctx, counter, cpu); + mutex_unlock(&ctx->mutex); fput_light(counter_file, fput_needed2); @@ -1479,6 +1749,7 @@ __perf_counter_init_context(struct perf_counter_context *ctx, { memset(ctx, 0, sizeof(*ctx)); spin_lock_init(&ctx->lock); + mutex_init(&ctx->mutex); INIT_LIST_HEAD(&ctx->counter_list); ctx->task = task; } @@ -1486,20 +1757,30 @@ __perf_counter_init_context(struct perf_counter_context *ctx, /* * inherit a counter from parent task to child task: */ -static int +static struct perf_counter * inherit_counter(struct perf_counter *parent_counter, struct task_struct *parent, struct perf_counter_context *parent_ctx, struct task_struct *child, + struct perf_counter *group_leader, struct perf_counter_context *child_ctx) { struct perf_counter *child_counter; + /* + * Instead of creating recursive hierarchies of counters, + * we link inherited counters back to the original parent, + * which has a filp for sure, which we use as the reference + * count: + */ + if (parent_counter->parent) + parent_counter = parent_counter->parent; + child_counter = perf_counter_alloc(&parent_counter->hw_event, - parent_counter->cpu, NULL, - GFP_ATOMIC); + parent_counter->cpu, group_leader, + GFP_KERNEL); if (!child_counter) - return -ENOMEM; + return NULL; /* * Link it up in the child's context: @@ -1523,16 +1804,82 @@ inherit_counter(struct perf_counter *parent_counter, */ atomic_long_inc(&parent_counter->filp->f_count); + /* + * Link this into the parent counter's child list + */ + mutex_lock(&parent_counter->mutex); + list_add_tail(&child_counter->child_list, &parent_counter->child_list); + + /* + * Make the child state follow the state of the parent counter, + * not its hw_event.disabled bit. We hold the parent's mutex, + * so we won't race with perf_counter_{en,dis}able_family. + */ + if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE) + child_counter->state = PERF_COUNTER_STATE_INACTIVE; + else + child_counter->state = PERF_COUNTER_STATE_OFF; + + mutex_unlock(&parent_counter->mutex); + + return child_counter; +} + +static int inherit_group(struct perf_counter *parent_counter, + struct task_struct *parent, + struct perf_counter_context *parent_ctx, + struct task_struct *child, + struct perf_counter_context *child_ctx) +{ + struct perf_counter *leader; + struct perf_counter *sub; + + leader = inherit_counter(parent_counter, parent, parent_ctx, + child, NULL, child_ctx); + if (!leader) + return -ENOMEM; + list_for_each_entry(sub, &parent_counter->sibling_list, list_entry) { + if (!inherit_counter(sub, parent, parent_ctx, + child, leader, child_ctx)) + return -ENOMEM; + } return 0; } +static void sync_child_counter(struct perf_counter *child_counter, + struct perf_counter *parent_counter) +{ + u64 parent_val, child_val; + + parent_val = atomic64_read(&parent_counter->count); + child_val = atomic64_read(&child_counter->count); + + /* + * Add back the child's count to the parent's count: + */ + atomic64_add(child_val, &parent_counter->count); + + /* + * Remove this counter from the parent's list + */ + mutex_lock(&parent_counter->mutex); + list_del_init(&child_counter->child_list); + mutex_unlock(&parent_counter->mutex); + + /* + * Release the parent counter, if this was the last + * reference to it. + */ + fput(parent_counter->filp); +} + static void __perf_counter_exit_task(struct task_struct *child, struct perf_counter *child_counter, struct perf_counter_context *child_ctx) { struct perf_counter *parent_counter; - u64 parent_val, child_val; + struct perf_counter *sub, *tmp; /* * If we do not self-reap then we have to wait for the @@ -1561,7 +1908,7 @@ __perf_counter_exit_task(struct task_struct *child, cpuctx = &__get_cpu_var(perf_cpu_context); - counter_sched_out(child_counter, cpuctx, child_ctx); + group_sched_out(child_counter, cpuctx, child_ctx); list_del_init(&child_counter->list_entry); @@ -1577,26 +1924,23 @@ __perf_counter_exit_task(struct task_struct *child, * that are still around due to the child reference. These * counters need to be zapped - but otherwise linger. */ - if (!parent_counter) - return; - - parent_val = atomic64_read(&parent_counter->count); - child_val = atomic64_read(&child_counter->count); - - /* - * Add back the child's count to the parent's count: - */ - atomic64_add(child_val, &parent_counter->count); - - fput(parent_counter->filp); + if (parent_counter) { + sync_child_counter(child_counter, parent_counter); + list_for_each_entry_safe(sub, tmp, &child_counter->sibling_list, + list_entry) { + if (sub->parent) + sync_child_counter(sub, sub->parent); + kfree(sub); + } + } kfree(child_counter); } /* - * When a child task exist, feed back counter values to parent counters. + * When a child task exits, feed back counter values to parent counters. * - * Note: we are running in child context, but the PID is not hashed + * Note: we may be running in child context, but the PID is not hashed * anymore so new counters will not be added. */ void perf_counter_exit_task(struct task_struct *child) @@ -1620,9 +1964,8 @@ void perf_counter_exit_task(struct task_struct *child) void perf_counter_init_task(struct task_struct *child) { struct perf_counter_context *child_ctx, *parent_ctx; - struct perf_counter *counter, *parent_counter; + struct perf_counter *counter; struct task_struct *parent = current; - unsigned long flags; child_ctx = &child->perf_counter_ctx; parent_ctx = &parent->perf_counter_ctx; @@ -1641,32 +1984,22 @@ void perf_counter_init_task(struct task_struct *child) * Lock the parent list. No need to lock the child - not PID * hashed yet and not running, so nobody can access it. */ - spin_lock_irqsave(&parent_ctx->lock, flags); + mutex_lock(&parent_ctx->mutex); /* * We dont have to disable NMIs - we are only looking at * the list, not manipulating it: */ list_for_each_entry(counter, &parent_ctx->counter_list, list_entry) { - if (!counter->hw_event.inherit || counter->group_leader != counter) + if (!counter->hw_event.inherit) continue; - /* - * Instead of creating recursive hierarchies of counters, - * we link inheritd counters back to the original parent, - * which has a filp for sure, which we use as the reference - * count: - */ - parent_counter = counter; - if (counter->parent) - parent_counter = counter->parent; - - if (inherit_counter(parent_counter, parent, + if (inherit_group(counter, parent, parent_ctx, child, child_ctx)) break; } - spin_unlock_irqrestore(&parent_ctx->lock, flags); + mutex_unlock(&parent_ctx->mutex); } static void __cpuinit perf_counter_init_cpu(int cpu) @@ -1692,11 +2025,15 @@ static void __perf_counter_exit_cpu(void *info) list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry) __perf_counter_remove_from_context(counter); - } static void perf_counter_exit_cpu(int cpu) { + struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); + struct perf_counter_context *ctx = &cpuctx->ctx; + + mutex_lock(&ctx->mutex); smp_call_function_single(cpu, __perf_counter_exit_cpu, NULL, 1); + mutex_unlock(&ctx->mutex); } #else static inline void perf_counter_exit_cpu(int cpu) { } -- cgit v1.2.3-70-g09d2 From 1b023a96d9b44f50f4d8ff28c15f5b80e354760f Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Fri, 23 Jan 2009 10:13:01 +0100 Subject: perfcounters: throttle on too high IRQ rates Starting kerneltop with only -c 100 seems to be a bad idea, it can easily lock the system due to perfcounter IRQ overload. So add throttling: if a new IRQ arrives in a shorter than PERFMON_MIN_PERIOD_NS time, turn off perfcounters and untrottle them from the next timer tick. Signed-off-by: Mike Galbraith Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 2 ++ arch/x86/kernel/cpu/perf_counter.c | 38 ++++++++++++++++++++++++++++++++------ include/linux/perf_counter.h | 4 ++++ 3 files changed, 38 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 7b434e5b14c..849c23009bf 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -781,6 +781,8 @@ static void local_apic_timer_interrupt(void) inc_irq_stat(apic_timer_irqs); evt->event_handler(evt); + + perf_counter_unthrottle(); } /* diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 9376771f757..1a040b179b5 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -33,6 +33,9 @@ static int nr_counters_fixed __read_mostly; struct cpu_hw_counters { struct perf_counter *counters[X86_PMC_IDX_MAX]; unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + u64 last_interrupt; + u64 global_enable; + int throttled; }; /* @@ -474,16 +477,19 @@ perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown) static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi) { int bit, cpu = smp_processor_id(); - u64 ack, status, saved_global; - struct cpu_hw_counters *cpuc; + u64 ack, status, now; + struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu); - rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, saved_global); + rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable); /* Disable counters globally */ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); ack_APIC_irq(); - cpuc = &per_cpu(cpu_hw_counters, cpu); + now = sched_clock(); + if (now - cpuc->last_interrupt < PERFMON_MIN_PERIOD_NS) + cpuc->throttled = 1; + cpuc->last_interrupt = now; rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); if (!status) @@ -533,9 +539,29 @@ again: goto again; out: /* - * Restore - do not reenable when global enable is off: + * Restore - do not reenable when global enable is off or throttled: */ - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, saved_global); + if (!cpuc->throttled) + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable); +} + +void perf_counter_unthrottle(void) +{ + struct cpu_hw_counters *cpuc; + + if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) + return; + + if (unlikely(!perf_counters_initialized)) + return; + + cpuc = &per_cpu(cpu_hw_counters, smp_processor_id()); + if (cpuc->throttled) { + if (printk_ratelimit()) + printk(KERN_WARNING "PERFMON: max event frequency exceeded!\n"); + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable); + cpuc->throttled = 0; + } } void smp_perf_counter_interrupt(struct pt_regs *regs) diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 33ba9fe0a78..91f1ca4c01c 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -254,6 +254,7 @@ extern void perf_counter_init_task(struct task_struct *child); extern void perf_counter_exit_task(struct task_struct *child); extern void perf_counter_notify(struct pt_regs *regs); extern void perf_counter_print_debug(void); +extern void perf_counter_unthrottle(void); extern u64 hw_perf_save_disable(void); extern void hw_perf_restore(u64 ctrl); extern int perf_counter_task_disable(void); @@ -270,6 +271,8 @@ static inline int is_software_counter(struct perf_counter *counter) return !counter->hw_event.raw && counter->hw_event.type < 0; } +#define PERFMON_MIN_PERIOD_NS 10000 + #else static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } @@ -281,6 +284,7 @@ static inline void perf_counter_init_task(struct task_struct *child) { } static inline void perf_counter_exit_task(struct task_struct *child) { } static inline void perf_counter_notify(struct pt_regs *regs) { } static inline void perf_counter_print_debug(void) { } +static inline void perf_counter_unthrottle(void) { } static inline void hw_perf_restore(u64 ctrl) { } static inline u64 hw_perf_save_disable(void) { return 0; } static inline int perf_counter_task_disable(void) { return -EINVAL; } -- cgit v1.2.3-70-g09d2 From 4b39fd96855254a244f71245b41a91cdecb87d63 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Fri, 23 Jan 2009 14:36:16 +0100 Subject: perfcounters: ratelimit performance counter interrupts Ratelimit performance counter interrupts to 100KHz per CPU. This replaces the irq-delta-time based method. Signed-off-by: Mike Galbraith Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 27 +++++++++++++++------------ include/linux/perf_counter.h | 2 -- 2 files changed, 15 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 1a040b179b5..a56d4cf92f3 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -33,9 +33,8 @@ static int nr_counters_fixed __read_mostly; struct cpu_hw_counters { struct perf_counter *counters[X86_PMC_IDX_MAX]; unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; - u64 last_interrupt; + unsigned long interrupts; u64 global_enable; - int throttled; }; /* @@ -470,6 +469,11 @@ perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown) } } +/* + * Maximum interrupt frequency of 100KHz per CPU + */ +#define PERFMON_MAX_INTERRUPTS 100000/HZ + /* * This handler is triggered by the local APIC, so the APIC IRQ handling * rules apply: @@ -477,7 +481,7 @@ perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown) static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi) { int bit, cpu = smp_processor_id(); - u64 ack, status, now; + u64 ack, status; struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu); rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable); @@ -486,11 +490,6 @@ static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi) wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); ack_APIC_irq(); - now = sched_clock(); - if (now - cpuc->last_interrupt < PERFMON_MIN_PERIOD_NS) - cpuc->throttled = 1; - cpuc->last_interrupt = now; - rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); if (!status) goto out; @@ -541,13 +540,14 @@ out: /* * Restore - do not reenable when global enable is off or throttled: */ - if (!cpuc->throttled) + if (++cpuc->interrupts < PERFMON_MAX_INTERRUPTS) wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable); } void perf_counter_unthrottle(void) { struct cpu_hw_counters *cpuc; + u64 global_enable; if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) return; @@ -556,12 +556,15 @@ void perf_counter_unthrottle(void) return; cpuc = &per_cpu(cpu_hw_counters, smp_processor_id()); - if (cpuc->throttled) { + if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) { if (printk_ratelimit()) - printk(KERN_WARNING "PERFMON: max event frequency exceeded!\n"); + printk(KERN_WARNING "PERFMON: max interrupts exceeded!\n"); wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable); - cpuc->throttled = 0; } + rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, global_enable); + if (unlikely(cpuc->global_enable && !global_enable)) + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable); + cpuc->interrupts = 0; } void smp_perf_counter_interrupt(struct pt_regs *regs) diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 91f1ca4c01c..f55381fbcac 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -271,8 +271,6 @@ static inline int is_software_counter(struct perf_counter *counter) return !counter->hw_event.raw && counter->hw_event.type < 0; } -#define PERFMON_MIN_PERIOD_NS 10000 - #else static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } -- cgit v1.2.3-70-g09d2 From 23a185ca8abbeef64b6ffc33059b1d630e43ec10 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 9 Feb 2009 22:42:47 +1100 Subject: perf_counters: make software counters work as per-cpu counters Impact: kernel crash fix Yanmin Zhang reported that using a PERF_COUNT_TASK_CLOCK software counter as a per-cpu counter would reliably crash the system, because it calls __task_delta_exec with a null pointer. The page fault, context switch and cpu migration counters also won't function correctly as per-cpu counters since they reference the current task. This fixes the problem by redirecting the task_clock counter to the cpu_clock counter when used as a per-cpu counter, and by implementing per-cpu page fault, context switch and cpu migration counters. Along the way, this: - Initializes counter->ctx earlier, in perf_counter_alloc, so that sw_perf_counter_init can use it - Adds code to kernel/sched.c to count task migrations into each cpu, in rq->nr_migrations_in - Exports the per-cpu context switch and task migration counts via new functions added to kernel/sched.c - Makes sure that if sw_perf_counter_init fails, we don't try to initialize the counter as a hardware counter. Since the user has passed a negative, non-raw event type, they clearly don't intend for it to be interpreted as a hardware event. Reported-by: "Zhang Yanmin" Signed-off-by: Paul Mackerras Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 ++ kernel/perf_counter.c | 78 +++++++++++++++++++++++++++++---------------------- kernel/sched.c | 17 +++++++++++ 3 files changed, 64 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index b85b10abf77..1e5f70062a9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -137,6 +137,8 @@ extern unsigned long nr_running(void); extern unsigned long nr_uninterruptible(void); extern unsigned long nr_active(void); extern unsigned long nr_iowait(void); +extern u64 cpu_nr_switches(int cpu); +extern u64 cpu_nr_migrations(int cpu); struct seq_file; struct cfs_rq; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index f27a7e9f3c4..544193cbc47 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -20,6 +20,8 @@ #include #include #include +#include +#include /* * Each CPU has a list of per CPU counters: @@ -502,7 +504,6 @@ perf_install_in_context(struct perf_counter_context *ctx, { struct task_struct *task = ctx->task; - counter->ctx = ctx; if (!task) { /* * Per cpu counters are installed via an smp call and @@ -1417,11 +1418,19 @@ static const struct hw_perf_counter_ops perf_ops_task_clock = { .read = task_clock_perf_counter_read, }; -static u64 get_page_faults(void) +#ifdef CONFIG_VM_EVENT_COUNTERS +#define cpu_page_faults() __get_cpu_var(vm_event_states).event[PGFAULT] +#else +#define cpu_page_faults() 0 +#endif + +static u64 get_page_faults(struct perf_counter *counter) { - struct task_struct *curr = current; + struct task_struct *curr = counter->ctx->task; - return curr->maj_flt + curr->min_flt; + if (curr) + return curr->maj_flt + curr->min_flt; + return cpu_page_faults(); } static void page_faults_perf_counter_update(struct perf_counter *counter) @@ -1430,7 +1439,7 @@ static void page_faults_perf_counter_update(struct perf_counter *counter) s64 delta; prev = atomic64_read(&counter->hw.prev_count); - now = get_page_faults(); + now = get_page_faults(counter); atomic64_set(&counter->hw.prev_count, now); @@ -1446,11 +1455,7 @@ static void page_faults_perf_counter_read(struct perf_counter *counter) static int page_faults_perf_counter_enable(struct perf_counter *counter) { - /* - * page-faults is a per-task value already, - * so we dont have to clear it on switch-in. - */ - + atomic64_set(&counter->hw.prev_count, get_page_faults(counter)); return 0; } @@ -1465,11 +1470,13 @@ static const struct hw_perf_counter_ops perf_ops_page_faults = { .read = page_faults_perf_counter_read, }; -static u64 get_context_switches(void) +static u64 get_context_switches(struct perf_counter *counter) { - struct task_struct *curr = current; + struct task_struct *curr = counter->ctx->task; - return curr->nvcsw + curr->nivcsw; + if (curr) + return curr->nvcsw + curr->nivcsw; + return cpu_nr_switches(smp_processor_id()); } static void context_switches_perf_counter_update(struct perf_counter *counter) @@ -1478,7 +1485,7 @@ static void context_switches_perf_counter_update(struct perf_counter *counter) s64 delta; prev = atomic64_read(&counter->hw.prev_count); - now = get_context_switches(); + now = get_context_switches(counter); atomic64_set(&counter->hw.prev_count, now); @@ -1494,11 +1501,7 @@ static void context_switches_perf_counter_read(struct perf_counter *counter) static int context_switches_perf_counter_enable(struct perf_counter *counter) { - /* - * ->nvcsw + curr->nivcsw is a per-task value already, - * so we dont have to clear it on switch-in. - */ - + atomic64_set(&counter->hw.prev_count, get_context_switches(counter)); return 0; } @@ -1513,9 +1516,13 @@ static const struct hw_perf_counter_ops perf_ops_context_switches = { .read = context_switches_perf_counter_read, }; -static inline u64 get_cpu_migrations(void) +static inline u64 get_cpu_migrations(struct perf_counter *counter) { - return current->se.nr_migrations; + struct task_struct *curr = counter->ctx->task; + + if (curr) + return curr->se.nr_migrations; + return cpu_nr_migrations(smp_processor_id()); } static void cpu_migrations_perf_counter_update(struct perf_counter *counter) @@ -1524,7 +1531,7 @@ static void cpu_migrations_perf_counter_update(struct perf_counter *counter) s64 delta; prev = atomic64_read(&counter->hw.prev_count); - now = get_cpu_migrations(); + now = get_cpu_migrations(counter); atomic64_set(&counter->hw.prev_count, now); @@ -1540,11 +1547,7 @@ static void cpu_migrations_perf_counter_read(struct perf_counter *counter) static int cpu_migrations_perf_counter_enable(struct perf_counter *counter) { - /* - * se.nr_migrations is a per-task value already, - * so we dont have to clear it on switch-in. - */ - + atomic64_set(&counter->hw.prev_count, get_cpu_migrations(counter)); return 0; } @@ -1569,7 +1572,14 @@ sw_perf_counter_init(struct perf_counter *counter) hw_ops = &perf_ops_cpu_clock; break; case PERF_COUNT_TASK_CLOCK: - hw_ops = &perf_ops_task_clock; + /* + * If the user instantiates this as a per-cpu counter, + * use the cpu_clock counter instead. + */ + if (counter->ctx->task) + hw_ops = &perf_ops_task_clock; + else + hw_ops = &perf_ops_cpu_clock; break; case PERF_COUNT_PAGE_FAULTS: hw_ops = &perf_ops_page_faults; @@ -1592,6 +1602,7 @@ sw_perf_counter_init(struct perf_counter *counter) static struct perf_counter * perf_counter_alloc(struct perf_counter_hw_event *hw_event, int cpu, + struct perf_counter_context *ctx, struct perf_counter *group_leader, gfp_t gfpflags) { @@ -1623,6 +1634,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, counter->wakeup_pending = 0; counter->group_leader = group_leader; counter->hw_ops = NULL; + counter->ctx = ctx; counter->state = PERF_COUNTER_STATE_INACTIVE; if (hw_event->disabled) @@ -1631,7 +1643,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, hw_ops = NULL; if (!hw_event->raw && hw_event->type < 0) hw_ops = sw_perf_counter_init(counter); - if (!hw_ops) + else hw_ops = hw_perf_counter_init(counter); if (!hw_ops) { @@ -1707,7 +1719,8 @@ sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user, } ret = -EINVAL; - counter = perf_counter_alloc(&hw_event, cpu, group_leader, GFP_KERNEL); + counter = perf_counter_alloc(&hw_event, cpu, ctx, group_leader, + GFP_KERNEL); if (!counter) goto err_put_context; @@ -1777,15 +1790,14 @@ inherit_counter(struct perf_counter *parent_counter, parent_counter = parent_counter->parent; child_counter = perf_counter_alloc(&parent_counter->hw_event, - parent_counter->cpu, group_leader, - GFP_KERNEL); + parent_counter->cpu, child_ctx, + group_leader, GFP_KERNEL); if (!child_counter) return NULL; /* * Link it up in the child's context: */ - child_counter->ctx = child_ctx; child_counter->task = child; list_add_counter(child_counter, child_ctx); child_ctx->nr_counters++; diff --git a/kernel/sched.c b/kernel/sched.c index 8db1a4cf208..173768f142a 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -558,6 +558,7 @@ struct rq { struct load_weight load; unsigned long nr_load_updates; u64 nr_switches; + u64 nr_migrations_in; struct cfs_rq cfs; struct rt_rq rt; @@ -1908,6 +1909,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) #endif if (old_cpu != new_cpu) { p->se.nr_migrations++; + new_rq->nr_migrations_in++; #ifdef CONFIG_SCHEDSTATS if (task_hot(p, old_rq->clock, NULL)) schedstat_inc(p, se.nr_forced2_migrations); @@ -2810,6 +2812,21 @@ unsigned long nr_active(void) return running + uninterruptible; } +/* + * Externally visible per-cpu scheduler statistics: + * cpu_nr_switches(cpu) - number of context switches on that cpu + * cpu_nr_migrations(cpu) - number of migrations into that cpu + */ +u64 cpu_nr_switches(int cpu) +{ + return cpu_rq(cpu)->nr_switches; +} + +u64 cpu_nr_migrations(int cpu) +{ + return cpu_rq(cpu)->nr_migrations_in; +} + /* * Update rq->cpu_load[] statistics. This function is usually called every * scheduler tick (TICK_NSEC). -- cgit v1.2.3-70-g09d2 From 0475f9ea8e2cc030298908949e0d5da9f2fc2cfe Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 11 Feb 2009 14:35:35 +1100 Subject: perf_counters: allow users to count user, kernel and/or hypervisor events Impact: new perf_counter feature This extends the perf_counter_hw_event struct with bits that specify that events in user, kernel and/or hypervisor mode should not be counted (i.e. should be excluded), and adds code to program the PMU mode selection bits accordingly on x86 and powerpc. For software counters, we don't currently have the infrastructure to distinguish which mode an event occurs in, so we currently fail the counter initialization if the setting of the hw_event.exclude_* bits would require us to distinguish. Context switches and CPU migrations are currently considered to occur in kernel mode. On x86, this changes the previous policy that only root can count kernel events. Now non-root users can count kernel events or exclude them. Non-root users still can't use NMI events, though. On x86 we don't appear to have any way to control whether hypervisor events are counted or not, so hw_event.exclude_hv is ignored. On powerpc, the selection of whether to count events in user, kernel and/or hypervisor mode is PMU-wide, not per-counter, so this adds a check that the hw_event.exclude_* settings are the same as other events on the PMU. Counters being added to a group have to have the same settings as the other hardware counters in the group. Counters and groups can only be enabled in hw_perf_group_sched_in or power_perf_enable if they have the same settings as any other counters already on the PMU. If we are not running on a hypervisor, the exclude_hv setting is ignored (by forcing it to 0) since we can't ever get any hypervisor events. Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/perf_counter.c | 68 ++++++++++++++++++++++++++++++++++++-- arch/x86/kernel/cpu/perf_counter.c | 31 ++++++++++------- include/linux/perf_counter.h | 19 ++++++----- kernel/perf_counter.c | 26 ++++++++++++--- 4 files changed, 117 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 5b0211348c7..bd6ba85beb5 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -16,6 +16,7 @@ #include #include #include +#include struct cpu_hw_counters { int n_counters; @@ -214,6 +215,36 @@ static int power_check_constraints(unsigned int event[], int n_ev) return 0; } +/* + * Check if newly-added counters have consistent settings for + * exclude_{user,kernel,hv} with each other and any previously + * added counters. + */ +static int check_excludes(struct perf_counter **ctrs, int n_prev, int n_new) +{ + int eu, ek, eh; + int i, n; + struct perf_counter *counter; + + n = n_prev + n_new; + if (n <= 1) + return 0; + + eu = ctrs[0]->hw_event.exclude_user; + ek = ctrs[0]->hw_event.exclude_kernel; + eh = ctrs[0]->hw_event.exclude_hv; + if (n_prev == 0) + n_prev = 1; + for (i = n_prev; i < n; ++i) { + counter = ctrs[i]; + if (counter->hw_event.exclude_user != eu || + counter->hw_event.exclude_kernel != ek || + counter->hw_event.exclude_hv != eh) + return -EAGAIN; + } + return 0; +} + static void power_perf_read(struct perf_counter *counter) { long val, delta, prev; @@ -323,6 +354,20 @@ void hw_perf_restore(u64 disable) goto out; } + /* + * Add in MMCR0 freeze bits corresponding to the + * hw_event.exclude_* bits for the first counter. + * We have already checked that all counters have the + * same values for these bits as the first counter. + */ + counter = cpuhw->counter[0]; + if (counter->hw_event.exclude_user) + cpuhw->mmcr[0] |= MMCR0_FCP; + if (counter->hw_event.exclude_kernel) + cpuhw->mmcr[0] |= MMCR0_FCS; + if (counter->hw_event.exclude_hv) + cpuhw->mmcr[0] |= MMCR0_FCHV; + /* * Write the new configuration to MMCR* with the freeze * bit set and set the hardware counters to their initial values. @@ -424,6 +469,8 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader, &cpuhw->counter[n0], &cpuhw->events[n0]); if (n < 0) return -EAGAIN; + if (check_excludes(cpuhw->counter, n0, n)) + return -EAGAIN; if (power_check_constraints(cpuhw->events, n + n0)) return -EAGAIN; cpuhw->n_counters = n0 + n; @@ -476,6 +523,8 @@ static int power_perf_enable(struct perf_counter *counter) goto out; cpuhw->counter[n0] = counter; cpuhw->events[n0] = counter->hw.config; + if (check_excludes(cpuhw->counter, n0, 1)) + goto out; if (power_check_constraints(cpuhw->events, n0 + 1)) goto out; @@ -554,6 +603,17 @@ hw_perf_counter_init(struct perf_counter *counter) counter->hw.config_base = ev; counter->hw.idx = 0; + /* + * If we are not running on a hypervisor, force the + * exclude_hv bit to 0 so that we don't care what + * the user set it to. This also means that we don't + * set the MMCR0_FCHV bit, which unconditionally freezes + * the counters on the PPC970 variants used in Apple G5 + * machines (since MSR.HV is always 1 on those machines). + */ + if (!firmware_has_feature(FW_FEATURE_LPAR)) + counter->hw_event.exclude_hv = 0; + /* * If this is in a group, check if it can go on with all the * other hardware counters in the group. We assume the counter @@ -566,11 +626,13 @@ hw_perf_counter_init(struct perf_counter *counter) if (n < 0) return NULL; } - events[n++] = ev; - if (power_check_constraints(events, n)) + events[n] = ev; + if (check_excludes(ctrs, n, 1)) + return NULL; + if (power_check_constraints(events, n + 1)) return NULL; - counter->hw.config = events[n - 1]; + counter->hw.config = events[n]; atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period); return &power_perf_ops; } diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 9901e46998d..383d4c6423a 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -107,21 +107,25 @@ static int __hw_perf_counter_init(struct perf_counter *counter) return -EINVAL; /* - * Count user events, and generate PMC IRQs: + * Generate PMC IRQs: * (keep 'enabled' bit clear for now) */ - hwc->config = ARCH_PERFMON_EVENTSEL_USR | ARCH_PERFMON_EVENTSEL_INT; + hwc->config = ARCH_PERFMON_EVENTSEL_INT; /* - * If privileged enough, count OS events too, and allow - * NMI events as well: + * Count user and OS events unless requested not to. */ - hwc->nmi = 0; - if (capable(CAP_SYS_ADMIN)) { + if (!hw_event->exclude_user) + hwc->config |= ARCH_PERFMON_EVENTSEL_USR; + if (!hw_event->exclude_kernel) hwc->config |= ARCH_PERFMON_EVENTSEL_OS; - if (hw_event->nmi) - hwc->nmi = 1; - } + + /* + * If privileged enough, allow NMI events: + */ + hwc->nmi = 0; + if (capable(CAP_SYS_ADMIN) && hw_event->nmi) + hwc->nmi = 1; hwc->irq_period = hw_event->irq_period; /* @@ -248,10 +252,13 @@ __pmc_fixed_enable(struct perf_counter *counter, int err; /* - * Enable IRQ generation (0x8) and ring-3 counting (0x2), - * and enable ring-0 counting if allowed: + * Enable IRQ generation (0x8), + * and enable ring-3 counting (0x2) and ring-0 counting (0x1) + * if requested: */ - bits = 0x8ULL | 0x2ULL; + bits = 0x8ULL; + if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) + bits |= 0x2; if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) bits |= 0x1; bits <<= (idx * 4); diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index f55381fbcac..c83f51d6e35 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -83,14 +83,17 @@ struct perf_counter_hw_event { u64 irq_period; u32 record_type; - u32 disabled : 1, /* off by default */ - nmi : 1, /* NMI sampling */ - raw : 1, /* raw event type */ - inherit : 1, /* children inherit it */ - pinned : 1, /* must always be on PMU */ - exclusive : 1, /* only counter on PMU */ - - __reserved_1 : 26; + u32 disabled : 1, /* off by default */ + nmi : 1, /* NMI sampling */ + raw : 1, /* raw event type */ + inherit : 1, /* children inherit it */ + pinned : 1, /* must always be on PMU */ + exclusive : 1, /* only group on PMU */ + exclude_user : 1, /* don't count user */ + exclude_kernel : 1, /* ditto kernel */ + exclude_hv : 1, /* ditto hypervisor */ + + __reserved_1 : 23; u64 __reserved_2; }; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 544193cbc47..89d5e3fe970 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1567,11 +1567,25 @@ sw_perf_counter_init(struct perf_counter *counter) { const struct hw_perf_counter_ops *hw_ops = NULL; + /* + * Software counters (currently) can't in general distinguish + * between user, kernel and hypervisor events. + * However, context switches and cpu migrations are considered + * to be kernel events, and page faults are never hypervisor + * events. + */ switch (counter->hw_event.type) { case PERF_COUNT_CPU_CLOCK: - hw_ops = &perf_ops_cpu_clock; + if (!(counter->hw_event.exclude_user || + counter->hw_event.exclude_kernel || + counter->hw_event.exclude_hv)) + hw_ops = &perf_ops_cpu_clock; break; case PERF_COUNT_TASK_CLOCK: + if (counter->hw_event.exclude_user || + counter->hw_event.exclude_kernel || + counter->hw_event.exclude_hv) + break; /* * If the user instantiates this as a per-cpu counter, * use the cpu_clock counter instead. @@ -1582,13 +1596,17 @@ sw_perf_counter_init(struct perf_counter *counter) hw_ops = &perf_ops_cpu_clock; break; case PERF_COUNT_PAGE_FAULTS: - hw_ops = &perf_ops_page_faults; + if (!(counter->hw_event.exclude_user || + counter->hw_event.exclude_kernel)) + hw_ops = &perf_ops_page_faults; break; case PERF_COUNT_CONTEXT_SWITCHES: - hw_ops = &perf_ops_context_switches; + if (!counter->hw_event.exclude_kernel) + hw_ops = &perf_ops_context_switches; break; case PERF_COUNT_CPU_MIGRATIONS: - hw_ops = &perf_ops_cpu_migrations; + if (!counter->hw_event.exclude_kernel) + hw_ops = &perf_ops_cpu_migrations; break; default: break; -- cgit v1.2.3-70-g09d2 From c07c99b67233ccaad38a961c17405dc1e1542aa4 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 13 Feb 2009 22:10:34 +1100 Subject: perfcounters: make context switch and migration software counters work again Jaswinder Singh Rajput reported that commit 23a185ca8abbeef caused the context switch and migration software counters to report zero always. With that commit, the software counters only count events that occur between sched-in and sched-out for a task. This is necessary for the counter enable/disable prctls and ioctls to work. However, the context switch and migration counts are incremented after sched-out for one task and before sched-in for the next. Since the increment doesn't occur while a task is scheduled in (as far as the software counters are concerned) it doesn't count towards any counter. Thus the context switch and migration counters need to count events that occur at any time, provided the counter is enabled, not just those that occur while the task is scheduled in (from the perf_counter subsystem's point of view). The problem though is that the software counter code can't tell the difference between being enabled and being scheduled in, and between being disabled and being scheduled out, since we use the one pair of enable/disable entry points for both. That is, the high-level disable operation simply arranges for the counter to not be scheduled in any more, and the high-level enable operation arranges for it to be scheduled in again. One way to solve this would be to have sched_in/out operations in the hw_perf_counter_ops struct as well as enable/disable. However, this takes a simpler approach: it adds a 'prev_state' field to the perf_counter struct that allows a counter's enable method to know whether the counter was previously disabled or just inactive (scheduled out), and therefore whether the enable method is being called as a result of a high-level enable or a schedule-in operation. This then allows the context switch, migration and page fault counters to reset their hw.prev_count value in their enable functions only if they are called as a result of a high-level enable operation. Although page faults would normally only occur while the counter is scheduled in, this changes the page fault counter code too in case there are ever circumstances where page faults get counted against a task while its counters are not scheduled in. Reported-by: Jaswinder Singh Rajput Signed-off-by: Paul Mackerras Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 1 + kernel/perf_counter.c | 21 +++++++++++++++------ 2 files changed, 16 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index c83f51d6e35..32cd1acb738 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -173,6 +173,7 @@ struct perf_counter { const struct hw_perf_counter_ops *hw_ops; enum perf_counter_active_state state; + enum perf_counter_active_state prev_state; atomic64_t count; struct perf_counter_hw_event hw_event; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index fcefb0a726f..ad62965828d 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -444,6 +444,7 @@ static void __perf_install_in_context(void *info) list_add_counter(counter, ctx); ctx->nr_counters++; + counter->prev_state = PERF_COUNTER_STATE_OFF; /* * Don't put the counter on if it is disabled or if @@ -562,6 +563,7 @@ static void __perf_counter_enable(void *info) curr_rq_lock_irq_save(&flags); spin_lock(&ctx->lock); + counter->prev_state = counter->state; if (counter->state >= PERF_COUNTER_STATE_INACTIVE) goto unlock; counter->state = PERF_COUNTER_STATE_INACTIVE; @@ -733,6 +735,7 @@ group_sched_in(struct perf_counter *group_counter, if (ret) return ret < 0 ? ret : 0; + group_counter->prev_state = group_counter->state; if (counter_sched_in(group_counter, cpuctx, ctx, cpu)) return -EAGAIN; @@ -740,6 +743,7 @@ group_sched_in(struct perf_counter *group_counter, * Schedule in siblings as one group (if any): */ list_for_each_entry(counter, &group_counter->sibling_list, list_entry) { + counter->prev_state = counter->state; if (counter_sched_in(counter, cpuctx, ctx, cpu)) { partial_group = counter; goto group_error; @@ -1398,9 +1402,9 @@ static void task_clock_perf_counter_read(struct perf_counter *counter) static int task_clock_perf_counter_enable(struct perf_counter *counter) { - u64 now = task_clock_perf_counter_val(counter, 0); - - atomic64_set(&counter->hw.prev_count, now); + if (counter->prev_state <= PERF_COUNTER_STATE_OFF) + atomic64_set(&counter->hw.prev_count, + task_clock_perf_counter_val(counter, 0)); return 0; } @@ -1455,7 +1459,8 @@ static void page_faults_perf_counter_read(struct perf_counter *counter) static int page_faults_perf_counter_enable(struct perf_counter *counter) { - atomic64_set(&counter->hw.prev_count, get_page_faults(counter)); + if (counter->prev_state <= PERF_COUNTER_STATE_OFF) + atomic64_set(&counter->hw.prev_count, get_page_faults(counter)); return 0; } @@ -1501,7 +1506,9 @@ static void context_switches_perf_counter_read(struct perf_counter *counter) static int context_switches_perf_counter_enable(struct perf_counter *counter) { - atomic64_set(&counter->hw.prev_count, get_context_switches(counter)); + if (counter->prev_state <= PERF_COUNTER_STATE_OFF) + atomic64_set(&counter->hw.prev_count, + get_context_switches(counter)); return 0; } @@ -1547,7 +1554,9 @@ static void cpu_migrations_perf_counter_read(struct perf_counter *counter) static int cpu_migrations_perf_counter_enable(struct perf_counter *counter) { - atomic64_set(&counter->hw.prev_count, get_cpu_migrations(counter)); + if (counter->prev_state <= PERF_COUNTER_STATE_OFF) + atomic64_set(&counter->hw.prev_count, + get_cpu_migrations(counter)); return 0; } -- cgit v1.2.3-70-g09d2 From f3dfd2656deb81a0addee4f4ceff66b50a387388 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 26 Feb 2009 22:43:46 +1100 Subject: perfcounters: fix a few minor cleanliness issues This fixes three issues noticed by Arnd Bergmann: - Add #ifdef __KERNEL__ and move some things around in perf_counter.h to make sure only the bits that userspace needs are exported to userspace. - Use __u64, __s64, __u32 types in the structs exported to userspace rather than u64, s64, u32. - Make the sys_perf_counter_open syscall available to the SPUs on Cell platforms. And one issue that I noticed in looking at the code again: - Wrap the perf_counter_open syscall with SYSCALL_DEFINE4 so we get the proper handling of int arguments on ppc64 (and some other 64-bit architectures). Reported-by: Arnd Bergmann Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/systbl.h | 2 +- include/linux/perf_counter.h | 43 +++++++++++++++++++++------------------ include/linux/syscalls.h | 9 +++----- kernel/perf_counter.c | 6 +++--- 4 files changed, 30 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 4c8095f6bec..d312eec8abb 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -322,4 +322,4 @@ SYSCALL_SPU(epoll_create1) SYSCALL_SPU(dup3) SYSCALL_SPU(pipe2) SYSCALL(inotify_init1) -SYSCALL(perf_counter_open) +SYSCALL_SPU(perf_counter_open) diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 32cd1acb738..186efaf4966 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -13,20 +13,8 @@ #ifndef _LINUX_PERF_COUNTER_H #define _LINUX_PERF_COUNTER_H -#include -#include - -#ifdef CONFIG_PERF_COUNTERS -# include -#endif - -#include -#include -#include -#include -#include - -struct task_struct; +#include +#include /* * User-space ABI bits: @@ -78,12 +66,12 @@ enum perf_counter_record_type { * Hardware event to monitor via a performance monitoring counter: */ struct perf_counter_hw_event { - s64 type; + __s64 type; - u64 irq_period; - u32 record_type; + __u64 irq_period; + __u32 record_type; - u32 disabled : 1, /* off by default */ + __u32 disabled : 1, /* off by default */ nmi : 1, /* NMI sampling */ raw : 1, /* raw event type */ inherit : 1, /* children inherit it */ @@ -95,7 +83,7 @@ struct perf_counter_hw_event { __reserved_1 : 23; - u64 __reserved_2; + __u64 __reserved_2; }; /* @@ -104,10 +92,24 @@ struct perf_counter_hw_event { #define PERF_COUNTER_IOC_ENABLE _IO('$', 0) #define PERF_COUNTER_IOC_DISABLE _IO('$', 1) +#ifdef __KERNEL__ /* - * Kernel-internal data types: + * Kernel-internal data types and definitions: */ +#ifdef CONFIG_PERF_COUNTERS +# include +#endif + +#include +#include +#include +#include +#include +#include + +struct task_struct; + /** * struct hw_perf_counter - performance counter hardware details: */ @@ -293,4 +295,5 @@ static inline int perf_counter_task_disable(void) { return -EINVAL; } static inline int perf_counter_task_enable(void) { return -EINVAL; } #endif +#endif /* __KERNEL__ */ #endif /* _LINUX_PERF_COUNTER_H */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 88255d3261a..28ef2be839c 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -696,10 +696,7 @@ asmlinkage long sys_pipe(int __user *); int kernel_execve(const char *filename, char *const argv[], char *const envp[]); -asmlinkage int sys_perf_counter_open( - - struct perf_counter_hw_event *hw_event_uptr __user, - pid_t pid, - int cpu, - int group_fd); +asmlinkage long sys_perf_counter_open( + const struct perf_counter_hw_event __user *hw_event_uptr, + pid_t pid, int cpu, int group_fd); #endif diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index ad62965828d..16b14ba99d3 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1690,9 +1690,9 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, * @cpu: target cpu * @group_fd: group leader counter fd */ -asmlinkage int -sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user, - pid_t pid, int cpu, int group_fd) +SYSCALL_DEFINE4(perf_counter_open, + const struct perf_counter_hw_event __user *, hw_event_uptr, + pid_t, pid, int, cpu, int, group_fd) { struct perf_counter *counter, *group_leader; struct perf_counter_hw_event hw_event; -- cgit v1.2.3-70-g09d2 From 2743a5b0fa6f309da904f2190a9cc25deee34dbd Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 4 Mar 2009 20:36:51 +1100 Subject: perfcounters: provide expansion room in the ABI Impact: ABI change This expands several fields in the perf_counter_hw_event struct and adds a "flags" argument to the perf_counter_open system call, in order that features can be added in future without ABI changes. In particular the record_type field is expanded to 64 bits, and the space for flag bits has been expanded from 32 to 64 bits. This also adds some new fields: * read_format (64 bits) is intended to provide a way to specify what userspace wants to get back when it does a read() on a simple (non-interrupting) counter; * exclude_idle (1 bit) provides a way for userspace to ask that events that occur when the cpu is idle be excluded; * extra_config_len will provide a way for userspace to supply an arbitrary amount of extra machine-specific PMU configuration data immediately following the perf_counter_hw_event struct, to allow sophisticated users to program things such as instruction matching CAMs and address range registers; * __reserved_3 and __reserved_4 provide space for future expansion. Signed-off-by: Paul Mackerras --- include/linux/perf_counter.h | 12 +++++++++--- include/linux/syscalls.h | 2 +- kernel/perf_counter.c | 10 +++++++--- 3 files changed, 17 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 186efaf4966..c42455ab155 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -69,9 +69,10 @@ struct perf_counter_hw_event { __s64 type; __u64 irq_period; - __u32 record_type; + __u64 record_type; + __u64 read_format; - __u32 disabled : 1, /* off by default */ + __u64 disabled : 1, /* off by default */ nmi : 1, /* NMI sampling */ raw : 1, /* raw event type */ inherit : 1, /* children inherit it */ @@ -80,10 +81,15 @@ struct perf_counter_hw_event { exclude_user : 1, /* don't count user */ exclude_kernel : 1, /* ditto kernel */ exclude_hv : 1, /* ditto hypervisor */ + exclude_idle : 1, /* don't count when idle */ - __reserved_1 : 23; + __reserved_1 : 55; + + __u32 extra_config_len; + __u32 __reserved_4; __u64 __reserved_2; + __u64 __reserved_3; }; /* diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 28ef2be839c..ab1d7724739 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -698,5 +698,5 @@ int kernel_execve(const char *filename, char *const argv[], char *const envp[]); asmlinkage long sys_perf_counter_open( const struct perf_counter_hw_event __user *hw_event_uptr, - pid_t pid, int cpu, int group_fd); + pid_t pid, int cpu, int group_fd, unsigned long flags); #endif diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 16b14ba99d3..b2e838959f3 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1683,16 +1683,16 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, } /** - * sys_perf_task_open - open a performance counter, associate it to a task/cpu + * sys_perf_counter_open - open a performance counter, associate it to a task/cpu * * @hw_event_uptr: event type attributes for monitoring/sampling * @pid: target pid * @cpu: target cpu * @group_fd: group leader counter fd */ -SYSCALL_DEFINE4(perf_counter_open, +SYSCALL_DEFINE5(perf_counter_open, const struct perf_counter_hw_event __user *, hw_event_uptr, - pid_t, pid, int, cpu, int, group_fd) + pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) { struct perf_counter *counter, *group_leader; struct perf_counter_hw_event hw_event; @@ -1703,6 +1703,10 @@ SYSCALL_DEFINE4(perf_counter_open, int fput_needed2 = 0; int ret; + /* for future expandability... */ + if (flags) + return -EINVAL; + if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0) return -EFAULT; -- cgit v1.2.3-70-g09d2 From 2485e5184452ccbda34ff83883883d9107800dff Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 5 Mar 2009 12:33:16 +0100 Subject: perfcounters: fix reserved bits sizing The sum of bits is 65 currently not 64 - so reduce the # of reserved bits from 55 to 54. Cc: Paul Mackerras Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index c42455ab155..dde564517b6 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -83,7 +83,7 @@ struct perf_counter_hw_event { exclude_hv : 1, /* ditto hypervisor */ exclude_idle : 1, /* don't count when idle */ - __reserved_1 : 55; + __reserved_1 : 54; __u32 extra_config_len; __u32 __reserved_4; -- cgit v1.2.3-70-g09d2 From bac429f037f1a51a74d62bad6d1518c3be065df3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 20 Mar 2009 12:50:56 -0400 Subject: tracing: add function profiler Impact: new profiling feature This patch adds a function profiler. In debugfs/tracing/ two new files are created. function_profile_enabled - to enable or disable profiling trace_stat/functions - the profiled functions. For example: echo 1 > /debugfs/tracing/function_profile_enabled ./hackbench 50 echo 0 > /debugfs/tracing/function_profile_enabled yields: cat /debugfs/tracing/trace_stat/functions Function Hit -------- --- _spin_lock 10106442 _spin_unlock 10097492 kfree 6013704 _spin_unlock_irqrestore 4423941 _spin_lock_irqsave 4406825 __phys_addr 4181686 __slab_free 4038222 dput 4030130 path_put 4023387 unroll_tree_refs 4019532 [...] The most hit functions are listed first. Functions that are not hit are not listed. This feature depends on and uses dynamic function tracing. When the function profiling is disabled, no overhead occurs. But it still takes up around 300KB to hold the data, thus it is not recomended to keep it enabled for systems low on memory. When a '1' is echoed into the function_profile_enabled file, the counters for is function is reset back to zero. Thus you can see what functions are hit most by different programs. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 4 + kernel/trace/Kconfig | 19 +++ kernel/trace/ftrace.c | 313 ++++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 334 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 015a3d22cf7..0456c3a51c6 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -153,6 +153,10 @@ struct dyn_ftrace { unsigned long flags; struct dyn_ftrace *newlist; }; +#ifdef CONFIG_FUNCTION_PROFILER + unsigned long counter; + struct hlist_node node; +#endif struct dyn_arch_ftrace arch; }; diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 8a4d7293104..95e9ad5735d 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -105,6 +105,7 @@ config FUNCTION_GRAPH_TRACER This is done by setting the current return address on the current task structure into a stack of calls. + config IRQSOFF_TRACER bool "Interrupts-off Latency Tracer" default n @@ -376,6 +377,24 @@ config DYNAMIC_FTRACE were made. If so, it runs stop_machine (stops all CPUS) and modifies the code to jump over the call to ftrace. +config FUNCTION_PROFILER + bool "Kernel function profiler" + depends on DYNAMIC_FTRACE + default n + help + This option enables the kernel function profiler. When the dynamic + function tracing is enabled, a counter is added into the function + records used by the dynamic function tracer. A file is created in + debugfs called function_profile_enabled which defaults to zero. + When a 1 is echoed into this file profiling begins, and when a + zero is entered, profiling stops. A file in the trace_stats + directory called functions, that show the list of functions that + have been hit and their counters. + + This takes up around 320K more memory. + + If in doubt, say N + config FTRACE_MCOUNT_RECORD def_bool y depends on DYNAMIC_FTRACE diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 7b8722baf15..11f364c776d 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -34,6 +34,7 @@ #include #include "trace.h" +#include "trace_stat.h" #define FTRACE_WARN_ON(cond) \ do { \ @@ -261,7 +262,6 @@ struct ftrace_func_probe { struct rcu_head rcu; }; - enum { FTRACE_ENABLE_CALLS = (1 << 0), FTRACE_DISABLE_CALLS = (1 << 1), @@ -309,6 +309,307 @@ static struct dyn_ftrace *ftrace_free_records; } \ } +#ifdef CONFIG_FUNCTION_PROFILER +static struct hlist_head *ftrace_profile_hash; +static int ftrace_profile_bits; +static int ftrace_profile_enabled; +static DEFINE_MUTEX(ftrace_profile_lock); + +static void * +function_stat_next(void *v, int idx) +{ + struct dyn_ftrace *rec = v; + struct ftrace_page *pg; + + pg = (struct ftrace_page *)((unsigned long)rec & PAGE_MASK); + + again: + rec++; + if ((void *)rec >= (void *)&pg->records[pg->index]) { + pg = pg->next; + if (!pg) + return NULL; + rec = &pg->records[0]; + } + + if (rec->flags & FTRACE_FL_FREE || + rec->flags & FTRACE_FL_FAILED || + !(rec->flags & FTRACE_FL_CONVERTED) || + /* ignore non hit functions */ + !rec->counter) + goto again; + + return rec; +} + +static void *function_stat_start(struct tracer_stat *trace) +{ + return function_stat_next(&ftrace_pages_start->records[0], 0); +} + +static int function_stat_cmp(void *p1, void *p2) +{ + struct dyn_ftrace *a = p1; + struct dyn_ftrace *b = p2; + + if (a->counter < b->counter) + return -1; + if (a->counter > b->counter) + return 1; + else + return 0; +} + +static int function_stat_headers(struct seq_file *m) +{ + seq_printf(m, " Function Hit\n" + " -------- ---\n"); + return 0; +} + +static int function_stat_show(struct seq_file *m, void *v) +{ + struct dyn_ftrace *rec = v; + char str[KSYM_SYMBOL_LEN]; + + kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); + + seq_printf(m, " %-30.30s %10lu\n", str, rec->counter); + return 0; +} + +static struct tracer_stat function_stats = { + .name = "functions", + .stat_start = function_stat_start, + .stat_next = function_stat_next, + .stat_cmp = function_stat_cmp, + .stat_headers = function_stat_headers, + .stat_show = function_stat_show +}; + +static void ftrace_profile_init(int nr_funcs) +{ + unsigned long addr; + int order; + int size; + + /* + * We are profiling all functions, lets make it 1/4th of the + * number of functions that are in core kernel. So we have to + * iterate 4 times. + */ + order = (sizeof(struct hlist_head) * nr_funcs) / 4; + order = get_order(order); + size = 1 << (PAGE_SHIFT + order); + + pr_info("Allocating %d KB for profiler hash\n", size >> 10); + + addr = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); + if (!addr) { + pr_warning("Could not allocate function profiler hash\n"); + return; + } + + ftrace_profile_hash = (void *)addr; + + /* + * struct hlist_head should be a pointer of 4 or 8 bytes. + * And a simple bit manipulation can be done, but if for + * some reason struct hlist_head is not a mulitple of 2, + * then we play it safe, and simply count. This function + * is done once at boot up, so it is not that critical in + * performance. + */ + + size--; + size /= sizeof(struct hlist_head); + + for (; size; size >>= 1) + ftrace_profile_bits++; + + pr_info("Function profiler has %d hash buckets\n", + 1 << ftrace_profile_bits); + + return; +} + +static ssize_t +ftrace_profile_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[64]; + int r; + + r = sprintf(buf, "%u\n", ftrace_profile_enabled); + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + +static void ftrace_profile_reset(void) +{ + struct dyn_ftrace *rec; + struct ftrace_page *pg; + + do_for_each_ftrace_rec(pg, rec) { + rec->counter = 0; + } while_for_each_ftrace_rec(); +} + +static struct dyn_ftrace *ftrace_find_profiled_func(unsigned long ip) +{ + struct dyn_ftrace *rec; + struct hlist_head *hhd; + struct hlist_node *n; + unsigned long flags; + unsigned long key; + + if (!ftrace_profile_hash) + return NULL; + + key = hash_long(ip, ftrace_profile_bits); + hhd = &ftrace_profile_hash[key]; + + if (hlist_empty(hhd)) + return NULL; + + local_irq_save(flags); + hlist_for_each_entry_rcu(rec, n, hhd, node) { + if (rec->ip == ip) + goto out; + } + rec = NULL; + out: + local_irq_restore(flags); + + return rec; +} + +static void +function_profile_call(unsigned long ip, unsigned long parent_ip) +{ + struct dyn_ftrace *rec; + unsigned long flags; + + if (!ftrace_profile_enabled) + return; + + local_irq_save(flags); + rec = ftrace_find_profiled_func(ip); + if (!rec) + goto out; + + rec->counter++; + out: + local_irq_restore(flags); +} + +static struct ftrace_ops ftrace_profile_ops __read_mostly = +{ + .func = function_profile_call, +}; + +static ssize_t +ftrace_profile_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + unsigned long val; + char buf[64]; + int ret; + + if (!ftrace_profile_hash) { + pr_info("Can not enable hash due to earlier problems\n"); + return -ENODEV; + } + + if (cnt >= sizeof(buf)) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + ret = strict_strtoul(buf, 10, &val); + if (ret < 0) + return ret; + + val = !!val; + + mutex_lock(&ftrace_profile_lock); + if (ftrace_profile_enabled ^ val) { + if (val) { + ftrace_profile_reset(); + register_ftrace_function(&ftrace_profile_ops); + ftrace_profile_enabled = 1; + } else { + ftrace_profile_enabled = 0; + unregister_ftrace_function(&ftrace_profile_ops); + } + } + mutex_unlock(&ftrace_profile_lock); + + filp->f_pos += cnt; + + return cnt; +} + +static const struct file_operations ftrace_profile_fops = { + .open = tracing_open_generic, + .read = ftrace_profile_read, + .write = ftrace_profile_write, +}; + +static void ftrace_profile_debugfs(struct dentry *d_tracer) +{ + struct dentry *entry; + int ret; + + ret = register_stat_tracer(&function_stats); + if (ret) { + pr_warning("Warning: could not register " + "function stats\n"); + return; + } + + entry = debugfs_create_file("function_profile_enabled", 0644, + d_tracer, NULL, &ftrace_profile_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'function_profile_enabled' entry\n"); +} + +static void ftrace_add_profile(struct dyn_ftrace *rec) +{ + unsigned long key; + + if (!ftrace_profile_hash) + return; + + key = hash_long(rec->ip, ftrace_profile_bits); + hlist_add_head_rcu(&rec->node, &ftrace_profile_hash[key]); +} + +static void ftrace_profile_release(struct dyn_ftrace *rec) +{ + mutex_lock(&ftrace_profile_lock); + hlist_del(&rec->node); + mutex_unlock(&ftrace_profile_lock); +} + +#else /* CONFIG_FUNCTION_PROFILER */ +static void ftrace_profile_init(int nr_funcs) +{ +} +static void ftrace_add_profile(struct dyn_ftrace *rec) +{ +} +static void ftrace_profile_debugfs(struct dentry *d_tracer) +{ +} +static void ftrace_profile_release(struct dyn_ftrace *rec) +{ +} +#endif /* CONFIG_FUNCTION_PROFILER */ + #ifdef CONFIG_KPROBES static int frozen_record_count; @@ -359,8 +660,10 @@ void ftrace_release(void *start, unsigned long size) mutex_lock(&ftrace_lock); do_for_each_ftrace_rec(pg, rec) { if ((rec->ip >= s) && (rec->ip < e) && - !(rec->flags & FTRACE_FL_FREE)) + !(rec->flags & FTRACE_FL_FREE)) { ftrace_free_rec(rec); + ftrace_profile_release(rec); + } } while_for_each_ftrace_rec(); mutex_unlock(&ftrace_lock); } @@ -414,6 +717,8 @@ ftrace_record_ip(unsigned long ip) rec->newlist = ftrace_new_addrs; ftrace_new_addrs = rec; + ftrace_add_profile(rec); + return rec; } @@ -2157,6 +2462,8 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer) "'set_graph_function' entry\n"); #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + ftrace_profile_debugfs(d_tracer); + return 0; } @@ -2225,6 +2532,8 @@ void __init ftrace_init(void) if (ret) goto failed; + ftrace_profile_init(count); + last_ftrace_enabled = ftrace_enabled = 1; ret = ftrace_convert_nops(NULL, -- cgit v1.2.3-70-g09d2 From 493762fc534c71d11d489f872c4b4a2c61173668 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 23 Mar 2009 17:12:36 -0400 Subject: tracing: move function profiler data out of function struct Impact: reduce size of memory in function profiler The function profiler originally introduces its counters into the function records itself. There is 20 thousand different functions on a normal system, and that is adding 20 thousand counters for profiling event when not needed. A normal run of the profiler yields only a couple of thousand functions executed, depending on what is being profiled. This means we have around 18 thousand useless counters. This patch rectifies this by moving the data out of the function records used by dynamic ftrace. Data is preallocated to hold the functions when the profiling begins. Checks are made during profiling to see if more recorcds should be allocated, and they are allocated if it is safe to do so. This also removes the dependency from using dynamic ftrace, and also removes the overhead by having it enabled. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 4 - kernel/trace/Kconfig | 10 +- kernel/trace/ftrace.c | 440 +++++++++++++++++++++++++++++-------------------- 3 files changed, 263 insertions(+), 191 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 0456c3a51c6..015a3d22cf7 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -153,10 +153,6 @@ struct dyn_ftrace { unsigned long flags; struct dyn_ftrace *newlist; }; -#ifdef CONFIG_FUNCTION_PROFILER - unsigned long counter; - struct hlist_node node; -#endif struct dyn_arch_ftrace arch; }; diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 95e9ad5735d..8a4136096d7 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -379,20 +379,16 @@ config DYNAMIC_FTRACE config FUNCTION_PROFILER bool "Kernel function profiler" - depends on DYNAMIC_FTRACE + depends on FUNCTION_TRACER default n help - This option enables the kernel function profiler. When the dynamic - function tracing is enabled, a counter is added into the function - records used by the dynamic function tracer. A file is created in - debugfs called function_profile_enabled which defaults to zero. + This option enables the kernel function profiler. A file is created + in debugfs called function_profile_enabled which defaults to zero. When a 1 is echoed into this file profiling begins, and when a zero is entered, profiling stops. A file in the trace_stats directory called functions, that show the list of functions that have been hit and their counters. - This takes up around 320K more memory. - If in doubt, say N config FTRACE_MCOUNT_RECORD diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 11f364c776d..24dac448cdc 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -241,87 +241,48 @@ static void ftrace_update_pid_func(void) #endif } -/* set when tracing only a pid */ -struct pid *ftrace_pid_trace; -static struct pid * const ftrace_swapper_pid = &init_struct_pid; - -#ifdef CONFIG_DYNAMIC_FTRACE - -#ifndef CONFIG_FTRACE_MCOUNT_RECORD -# error Dynamic ftrace depends on MCOUNT_RECORD -#endif - -static struct hlist_head ftrace_func_hash[FTRACE_FUNC_HASHSIZE] __read_mostly; - -struct ftrace_func_probe { - struct hlist_node node; - struct ftrace_probe_ops *ops; - unsigned long flags; - unsigned long ip; - void *data; - struct rcu_head rcu; +#ifdef CONFIG_FUNCTION_PROFILER +struct ftrace_profile { + struct hlist_node node; + unsigned long ip; + unsigned long counter; }; -enum { - FTRACE_ENABLE_CALLS = (1 << 0), - FTRACE_DISABLE_CALLS = (1 << 1), - FTRACE_UPDATE_TRACE_FUNC = (1 << 2), - FTRACE_ENABLE_MCOUNT = (1 << 3), - FTRACE_DISABLE_MCOUNT = (1 << 4), - FTRACE_START_FUNC_RET = (1 << 5), - FTRACE_STOP_FUNC_RET = (1 << 6), +struct ftrace_profile_page { + struct ftrace_profile_page *next; + unsigned long index; + struct ftrace_profile records[]; }; -static int ftrace_filtered; +#define PROFILE_RECORDS_SIZE \ + (PAGE_SIZE - offsetof(struct ftrace_profile_page, records)) -static struct dyn_ftrace *ftrace_new_addrs; +#define PROFILES_PER_PAGE \ + (PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile)) -static DEFINE_MUTEX(ftrace_regex_lock); - -struct ftrace_page { - struct ftrace_page *next; - int index; - struct dyn_ftrace records[]; -}; +/* TODO: make these percpu, to prevent cache line bouncing */ +static struct ftrace_profile_page *profile_pages_start; +static struct ftrace_profile_page *profile_pages; -#define ENTRIES_PER_PAGE \ - ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace)) - -/* estimate from running different kernels */ -#define NR_TO_INIT 10000 - -static struct ftrace_page *ftrace_pages_start; -static struct ftrace_page *ftrace_pages; - -static struct dyn_ftrace *ftrace_free_records; - -/* - * This is a double for. Do not use 'break' to break out of the loop, - * you must use a goto. - */ -#define do_for_each_ftrace_rec(pg, rec) \ - for (pg = ftrace_pages_start; pg; pg = pg->next) { \ - int _____i; \ - for (_____i = 0; _____i < pg->index; _____i++) { \ - rec = &pg->records[_____i]; - -#define while_for_each_ftrace_rec() \ - } \ - } - -#ifdef CONFIG_FUNCTION_PROFILER static struct hlist_head *ftrace_profile_hash; static int ftrace_profile_bits; static int ftrace_profile_enabled; static DEFINE_MUTEX(ftrace_profile_lock); +static DEFINE_PER_CPU(atomic_t, ftrace_profile_disable); + +#define FTRACE_PROFILE_HASH_SIZE 1024 /* must be power of 2 */ + +static raw_spinlock_t ftrace_profile_rec_lock = + (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + static void * function_stat_next(void *v, int idx) { - struct dyn_ftrace *rec = v; - struct ftrace_page *pg; + struct ftrace_profile *rec = v; + struct ftrace_profile_page *pg; - pg = (struct ftrace_page *)((unsigned long)rec & PAGE_MASK); + pg = (struct ftrace_profile_page *)((unsigned long)rec & PAGE_MASK); again: rec++; @@ -330,27 +291,22 @@ function_stat_next(void *v, int idx) if (!pg) return NULL; rec = &pg->records[0]; + if (!rec->counter) + goto again; } - if (rec->flags & FTRACE_FL_FREE || - rec->flags & FTRACE_FL_FAILED || - !(rec->flags & FTRACE_FL_CONVERTED) || - /* ignore non hit functions */ - !rec->counter) - goto again; - return rec; } static void *function_stat_start(struct tracer_stat *trace) { - return function_stat_next(&ftrace_pages_start->records[0], 0); + return function_stat_next(&profile_pages_start->records[0], 0); } static int function_stat_cmp(void *p1, void *p2) { - struct dyn_ftrace *a = p1; - struct dyn_ftrace *b = p2; + struct ftrace_profile *a = p1; + struct ftrace_profile *b = p2; if (a->counter < b->counter) return -1; @@ -369,7 +325,7 @@ static int function_stat_headers(struct seq_file *m) static int function_stat_show(struct seq_file *m, void *v) { - struct dyn_ftrace *rec = v; + struct ftrace_profile *rec = v; char str[KSYM_SYMBOL_LEN]; kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); @@ -387,115 +343,191 @@ static struct tracer_stat function_stats = { .stat_show = function_stat_show }; -static void ftrace_profile_init(int nr_funcs) +static void ftrace_profile_reset(void) { - unsigned long addr; - int order; - int size; + struct ftrace_profile_page *pg; - /* - * We are profiling all functions, lets make it 1/4th of the - * number of functions that are in core kernel. So we have to - * iterate 4 times. - */ - order = (sizeof(struct hlist_head) * nr_funcs) / 4; - order = get_order(order); - size = 1 << (PAGE_SHIFT + order); - - pr_info("Allocating %d KB for profiler hash\n", size >> 10); + pg = profile_pages = profile_pages_start; - addr = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); - if (!addr) { - pr_warning("Could not allocate function profiler hash\n"); - return; + while (pg) { + memset(pg->records, 0, PROFILE_RECORDS_SIZE); + pg->index = 0; + pg = pg->next; } - ftrace_profile_hash = (void *)addr; + memset(ftrace_profile_hash, 0, + FTRACE_PROFILE_HASH_SIZE * sizeof(struct hlist_head)); +} - /* - * struct hlist_head should be a pointer of 4 or 8 bytes. - * And a simple bit manipulation can be done, but if for - * some reason struct hlist_head is not a mulitple of 2, - * then we play it safe, and simply count. This function - * is done once at boot up, so it is not that critical in - * performance. - */ +int ftrace_profile_pages_init(void) +{ + struct ftrace_profile_page *pg; + int i; - size--; - size /= sizeof(struct hlist_head); + /* If we already allocated, do nothing */ + if (profile_pages) + return 0; - for (; size; size >>= 1) - ftrace_profile_bits++; + profile_pages = (void *)get_zeroed_page(GFP_KERNEL); + if (!profile_pages) + return -ENOMEM; - pr_info("Function profiler has %d hash buckets\n", - 1 << ftrace_profile_bits); + pg = profile_pages_start = profile_pages; - return; + /* allocate 10 more pages to start */ + for (i = 0; i < 10; i++) { + pg->next = (void *)get_zeroed_page(GFP_KERNEL); + /* + * We only care about allocating profile_pages, if + * we failed to allocate here, hopefully we will allocate + * later. + */ + if (!pg->next) + break; + pg = pg->next; + } + + return 0; } -static ssize_t -ftrace_profile_read(struct file *filp, char __user *ubuf, - size_t cnt, loff_t *ppos) +static int ftrace_profile_init(void) { - char buf[64]; - int r; + int size; - r = sprintf(buf, "%u\n", ftrace_profile_enabled); - return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); -} + if (ftrace_profile_hash) { + /* If the profile is already created, simply reset it */ + ftrace_profile_reset(); + return 0; + } -static void ftrace_profile_reset(void) -{ - struct dyn_ftrace *rec; - struct ftrace_page *pg; + /* + * We are profiling all functions, but usually only a few thousand + * functions are hit. We'll make a hash of 1024 items. + */ + size = FTRACE_PROFILE_HASH_SIZE; - do_for_each_ftrace_rec(pg, rec) { - rec->counter = 0; - } while_for_each_ftrace_rec(); + ftrace_profile_hash = + kzalloc(sizeof(struct hlist_head) * size, GFP_KERNEL); + + if (!ftrace_profile_hash) + return -ENOMEM; + + size--; + + for (; size; size >>= 1) + ftrace_profile_bits++; + + /* Preallocate a few pages */ + if (ftrace_profile_pages_init() < 0) { + kfree(ftrace_profile_hash); + ftrace_profile_hash = NULL; + return -ENOMEM; + } + + return 0; } -static struct dyn_ftrace *ftrace_find_profiled_func(unsigned long ip) +/* interrupts must be disabled */ +static struct ftrace_profile *ftrace_find_profiled_func(unsigned long ip) { - struct dyn_ftrace *rec; + struct ftrace_profile *rec; struct hlist_head *hhd; struct hlist_node *n; - unsigned long flags; unsigned long key; - if (!ftrace_profile_hash) - return NULL; - key = hash_long(ip, ftrace_profile_bits); hhd = &ftrace_profile_hash[key]; if (hlist_empty(hhd)) return NULL; - local_irq_save(flags); hlist_for_each_entry_rcu(rec, n, hhd, node) { if (rec->ip == ip) - goto out; + return rec; + } + + return NULL; +} + +static void ftrace_add_profile(struct ftrace_profile *rec) +{ + unsigned long key; + + key = hash_long(rec->ip, ftrace_profile_bits); + hlist_add_head_rcu(&rec->node, &ftrace_profile_hash[key]); +} + +/* Interrupts must be disabled calling this */ +static struct ftrace_profile * +ftrace_profile_alloc(unsigned long ip, bool alloc_safe) +{ + struct ftrace_profile *rec = NULL; + + /* prevent recursion */ + if (atomic_inc_return(&__get_cpu_var(ftrace_profile_disable)) != 1) + goto out; + + __raw_spin_lock(&ftrace_profile_rec_lock); + + /* Try to always keep another page available */ + if (!profile_pages->next && alloc_safe) + profile_pages->next = (void *)get_zeroed_page(GFP_ATOMIC); + + /* + * Try to find the function again since another + * task on another CPU could have added it + */ + rec = ftrace_find_profiled_func(ip); + if (rec) + goto out_unlock; + + if (profile_pages->index == PROFILES_PER_PAGE) { + if (!profile_pages->next) + goto out_unlock; + profile_pages = profile_pages->next; } - rec = NULL; + + rec = &profile_pages->records[profile_pages->index++]; + rec->ip = ip; + ftrace_add_profile(rec); + + out_unlock: + __raw_spin_unlock(&ftrace_profile_rec_lock); out: - local_irq_restore(flags); + atomic_dec(&__get_cpu_var(ftrace_profile_disable)); return rec; } +/* + * If we are not in an interrupt, or softirq and + * and interrupts are disabled and preemption is not enabled + * (not in a spinlock) then it should be safe to allocate memory. + */ +static bool ftrace_safe_to_allocate(void) +{ + return !in_interrupt() && irqs_disabled() && !preempt_count(); +} + static void function_profile_call(unsigned long ip, unsigned long parent_ip) { - struct dyn_ftrace *rec; + struct ftrace_profile *rec; unsigned long flags; + bool alloc_safe; if (!ftrace_profile_enabled) return; + alloc_safe = ftrace_safe_to_allocate(); + local_irq_save(flags); rec = ftrace_find_profiled_func(ip); - if (!rec) - goto out; + if (!rec) { + rec = ftrace_profile_alloc(ip, alloc_safe); + if (!rec) + goto out; + } rec->counter++; out: @@ -515,11 +547,6 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf, char buf[64]; int ret; - if (!ftrace_profile_hash) { - pr_info("Can not enable hash due to earlier problems\n"); - return -ENODEV; - } - if (cnt >= sizeof(buf)) return -EINVAL; @@ -537,7 +564,12 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf, mutex_lock(&ftrace_profile_lock); if (ftrace_profile_enabled ^ val) { if (val) { - ftrace_profile_reset(); + ret = ftrace_profile_init(); + if (ret < 0) { + cnt = ret; + goto out; + } + register_ftrace_function(&ftrace_profile_ops); ftrace_profile_enabled = 1; } else { @@ -545,6 +577,7 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf, unregister_ftrace_function(&ftrace_profile_ops); } } + out: mutex_unlock(&ftrace_profile_lock); filp->f_pos += cnt; @@ -552,6 +585,17 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf, return cnt; } +static ssize_t +ftrace_profile_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[64]; + int r; + + r = sprintf(buf, "%u\n", ftrace_profile_enabled); + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + static const struct file_operations ftrace_profile_fops = { .open = tracing_open_generic, .read = ftrace_profile_read, @@ -577,39 +621,80 @@ static void ftrace_profile_debugfs(struct dentry *d_tracer) "'function_profile_enabled' entry\n"); } -static void ftrace_add_profile(struct dyn_ftrace *rec) -{ - unsigned long key; - - if (!ftrace_profile_hash) - return; - - key = hash_long(rec->ip, ftrace_profile_bits); - hlist_add_head_rcu(&rec->node, &ftrace_profile_hash[key]); -} - -static void ftrace_profile_release(struct dyn_ftrace *rec) -{ - mutex_lock(&ftrace_profile_lock); - hlist_del(&rec->node); - mutex_unlock(&ftrace_profile_lock); -} - #else /* CONFIG_FUNCTION_PROFILER */ -static void ftrace_profile_init(int nr_funcs) -{ -} -static void ftrace_add_profile(struct dyn_ftrace *rec) -{ -} static void ftrace_profile_debugfs(struct dentry *d_tracer) { } -static void ftrace_profile_release(struct dyn_ftrace *rec) -{ -} #endif /* CONFIG_FUNCTION_PROFILER */ +/* set when tracing only a pid */ +struct pid *ftrace_pid_trace; +static struct pid * const ftrace_swapper_pid = &init_struct_pid; + +#ifdef CONFIG_DYNAMIC_FTRACE + +#ifndef CONFIG_FTRACE_MCOUNT_RECORD +# error Dynamic ftrace depends on MCOUNT_RECORD +#endif + +static struct hlist_head ftrace_func_hash[FTRACE_FUNC_HASHSIZE] __read_mostly; + +struct ftrace_func_probe { + struct hlist_node node; + struct ftrace_probe_ops *ops; + unsigned long flags; + unsigned long ip; + void *data; + struct rcu_head rcu; +}; + +enum { + FTRACE_ENABLE_CALLS = (1 << 0), + FTRACE_DISABLE_CALLS = (1 << 1), + FTRACE_UPDATE_TRACE_FUNC = (1 << 2), + FTRACE_ENABLE_MCOUNT = (1 << 3), + FTRACE_DISABLE_MCOUNT = (1 << 4), + FTRACE_START_FUNC_RET = (1 << 5), + FTRACE_STOP_FUNC_RET = (1 << 6), +}; + +static int ftrace_filtered; + +static struct dyn_ftrace *ftrace_new_addrs; + +static DEFINE_MUTEX(ftrace_regex_lock); + +struct ftrace_page { + struct ftrace_page *next; + int index; + struct dyn_ftrace records[]; +}; + +#define ENTRIES_PER_PAGE \ + ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace)) + +/* estimate from running different kernels */ +#define NR_TO_INIT 10000 + +static struct ftrace_page *ftrace_pages_start; +static struct ftrace_page *ftrace_pages; + +static struct dyn_ftrace *ftrace_free_records; + +/* + * This is a double for. Do not use 'break' to break out of the loop, + * you must use a goto. + */ +#define do_for_each_ftrace_rec(pg, rec) \ + for (pg = ftrace_pages_start; pg; pg = pg->next) { \ + int _____i; \ + for (_____i = 0; _____i < pg->index; _____i++) { \ + rec = &pg->records[_____i]; + +#define while_for_each_ftrace_rec() \ + } \ + } + #ifdef CONFIG_KPROBES static int frozen_record_count; @@ -660,10 +745,8 @@ void ftrace_release(void *start, unsigned long size) mutex_lock(&ftrace_lock); do_for_each_ftrace_rec(pg, rec) { if ((rec->ip >= s) && (rec->ip < e) && - !(rec->flags & FTRACE_FL_FREE)) { + !(rec->flags & FTRACE_FL_FREE)) ftrace_free_rec(rec); - ftrace_profile_release(rec); - } } while_for_each_ftrace_rec(); mutex_unlock(&ftrace_lock); } @@ -717,8 +800,6 @@ ftrace_record_ip(unsigned long ip) rec->newlist = ftrace_new_addrs; ftrace_new_addrs = rec; - ftrace_add_profile(rec); - return rec; } @@ -2462,8 +2543,6 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer) "'set_graph_function' entry\n"); #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ - ftrace_profile_debugfs(d_tracer); - return 0; } @@ -2532,8 +2611,6 @@ void __init ftrace_init(void) if (ret) goto failed; - ftrace_profile_init(count); - last_ftrace_enabled = ftrace_enabled = 1; ret = ftrace_convert_nops(NULL, @@ -2734,6 +2811,9 @@ static __init int ftrace_init_debugfs(void) if (!entry) pr_warning("Could not create debugfs " "'set_ftrace_pid' entry\n"); + + ftrace_profile_debugfs(d_tracer); + return 0; } fs_initcall(ftrace_init_debugfs); -- cgit v1.2.3-70-g09d2 From a2a16d6a3156ef7309ca7328a20c35df9418e670 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 24 Mar 2009 23:17:58 -0400 Subject: function-graph: add option to calculate graph time or not graph time is the time that a function is executing another function. Thus if function A calls B, if graph-time is set, then the time for A includes B. This is the default behavior. But if graph-time is off, then the time spent executing B is subtracted from A. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 3 +-- kernel/trace/ftrace.c | 21 ++++++++++++++++++++- kernel/trace/trace.c | 4 +++- kernel/trace/trace.h | 1 + kernel/trace/trace_functions_graph.c | 8 ++++---- 5 files changed, 29 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 015a3d22cf7..9e0a8d245e5 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -365,6 +365,7 @@ struct ftrace_ret_stack { unsigned long ret; unsigned long func; unsigned long long calltime; + unsigned long long subtime; }; /* @@ -376,8 +377,6 @@ extern void return_to_handler(void); extern int ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth); -extern void -ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret); /* * Sometimes we don't want to trace a function with the function diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index ed1fc5021d4..71e5faef12a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -604,6 +604,7 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace) static void profile_graph_return(struct ftrace_graph_ret *trace) { struct ftrace_profile_stat *stat; + unsigned long long calltime; struct ftrace_profile *rec; unsigned long flags; @@ -612,9 +613,27 @@ static void profile_graph_return(struct ftrace_graph_ret *trace) if (!stat->hash) goto out; + calltime = trace->rettime - trace->calltime; + + if (!(trace_flags & TRACE_ITER_GRAPH_TIME)) { + int index; + + index = trace->depth; + + /* Append this call time to the parent time to subtract */ + if (index) + current->ret_stack[index - 1].subtime += calltime; + + if (current->ret_stack[index].subtime < calltime) + calltime -= current->ret_stack[index].subtime; + else + calltime = 0; + } + rec = ftrace_find_profiled_func(stat, trace->func); if (rec) - rec->time += trace->rettime - trace->calltime; + rec->time += calltime; + out: local_irq_restore(flags); } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 821bf49771d..5d1a16cae37 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -255,7 +255,8 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); /* trace_flags holds trace_options default values */ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | - TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME; + TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | + TRACE_ITER_GRAPH_TIME; /** * trace_wake_up - wake up tasks waiting for trace input @@ -317,6 +318,7 @@ static const char *trace_options[] = { "latency-format", "global-clock", "sleep-time", + "graph-time", NULL }; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c66ca3b6605..e3429a8ab05 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -685,6 +685,7 @@ enum trace_iterator_flags { TRACE_ITER_LATENCY_FMT = 0x40000, TRACE_ITER_GLOBAL_CLK = 0x80000, TRACE_ITER_SLEEP_TIME = 0x100000, + TRACE_ITER_GRAPH_TIME = 0x200000, }; /* diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 85bba0f018b..10f6ad7d85f 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -78,13 +78,14 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth) current->ret_stack[index].ret = ret; current->ret_stack[index].func = func; current->ret_stack[index].calltime = calltime; + current->ret_stack[index].subtime = 0; *depth = index; return 0; } /* Retrieve a function return address to the trace stack on thread info.*/ -void +static void ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) { int index; @@ -104,9 +105,6 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) trace->calltime = current->ret_stack[index].calltime; trace->overrun = atomic_read(¤t->trace_overrun); trace->depth = index; - barrier(); - current->curr_ret_stack--; - } /* @@ -121,6 +119,8 @@ unsigned long ftrace_return_to_handler(void) ftrace_pop_return_trace(&trace, &ret); trace.rettime = trace_clock_local(); ftrace_graph_return(&trace); + barrier(); + current->curr_ret_stack--; if (unlikely(!ret)) { ftrace_graph_stop(); -- cgit v1.2.3-70-g09d2 From 7fd7d83d49914f03aefffba6aee09032fcd54cce Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Feb 2009 23:24:03 -0800 Subject: x86/pvops: replace arch_enter_lazy_cpu_mode with arch_start_context_switch Impact: simplification, prepare for later changes Make lazy cpu mode more specific to context switching, so that it makes sense to do more context-switch specific things in the callbacks. Signed-off-by: Jeremy Fitzhardinge Acked-by: Peter Zijlstra --- arch/x86/include/asm/paravirt.h | 8 +++----- arch/x86/kernel/paravirt.c | 13 ------------- arch/x86/kernel/process_32.c | 2 +- arch/x86/kernel/process_64.c | 2 +- arch/x86/xen/mmu.c | 5 +---- include/asm-frv/pgtable.h | 4 ++-- include/asm-generic/pgtable.h | 21 +++++++++++---------- kernel/sched.c | 2 +- 8 files changed, 20 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 0617d5cc971..7b28abac323 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -1420,19 +1420,17 @@ void paravirt_enter_lazy_mmu(void); void paravirt_leave_lazy_mmu(void); void paravirt_leave_lazy(enum paravirt_lazy_mode mode); -#define __HAVE_ARCH_ENTER_LAZY_CPU_MODE -static inline void arch_enter_lazy_cpu_mode(void) +#define __HAVE_ARCH_START_CONTEXT_SWITCH +static inline void arch_start_context_switch(void) { PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter); } -static inline void arch_leave_lazy_cpu_mode(void) +static inline void arch_end_context_switch(void) { PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave); } -void arch_flush_lazy_cpu_mode(void); - #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE static inline void arch_enter_lazy_mmu_mode(void) { diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 8ab250ac498..5eea9548216 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -301,19 +301,6 @@ void arch_flush_lazy_mmu_mode(void) preempt_enable(); } -void arch_flush_lazy_cpu_mode(void) -{ - preempt_disable(); - - if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { - WARN_ON(preempt_count() == 1); - arch_leave_lazy_cpu_mode(); - arch_enter_lazy_cpu_mode(); - } - - preempt_enable(); -} - struct pv_info pv_info = { .name = "bare hardware", .paravirt_enabled = 0, diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 14014d766ca..57e49a8278a 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -407,7 +407,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * done before math_state_restore, so the TS bit is up * to date. */ - arch_leave_lazy_cpu_mode(); + arch_end_context_switch(); /* If the task has used fpu the last 5 timeslices, just do a full * restore of the math state immediately to avoid the trap; the diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index abb7e6a7f0c..7115e608532 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -428,7 +428,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * done before math_state_restore, so the TS bit is up * to date. */ - arch_leave_lazy_cpu_mode(); + arch_end_context_switch(); /* * Switch FS and GS. diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index cb6afa4ec95..6b98f87232a 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1119,10 +1119,8 @@ static void drop_other_mm_ref(void *info) /* If this cpu still has a stale cr3 reference, then make sure it has been flushed. */ - if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) { + if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) load_cr3(swapper_pg_dir); - arch_flush_lazy_cpu_mode(); - } } static void xen_drop_mm_ref(struct mm_struct *mm) @@ -1135,7 +1133,6 @@ static void xen_drop_mm_ref(struct mm_struct *mm) load_cr3(swapper_pg_dir); else leave_mm(smp_processor_id()); - arch_flush_lazy_cpu_mode(); } /* Get the "official" set of cpus referring to our pagetable. */ diff --git a/include/asm-frv/pgtable.h b/include/asm-frv/pgtable.h index e16fdb1f4f4..235e34a7a34 100644 --- a/include/asm-frv/pgtable.h +++ b/include/asm-frv/pgtable.h @@ -73,8 +73,8 @@ static inline int pte_file(pte_t pte) { return 0; } #define pgtable_cache_init() do {} while (0) #define arch_enter_lazy_mmu_mode() do {} while (0) #define arch_leave_lazy_mmu_mode() do {} while (0) -#define arch_enter_lazy_cpu_mode() do {} while (0) -#define arch_leave_lazy_cpu_mode() do {} while (0) + +#define arch_start_context_switch() do {} while (0) #else /* !CONFIG_MMU */ /*****************************************************************************/ diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 8e6d0ca70ab..922f03671dd 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -280,17 +280,18 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm, #endif /* - * A facility to provide batching of the reload of page tables with the - * actual context switch code for paravirtualized guests. By convention, - * only one of the lazy modes (CPU, MMU) should be active at any given - * time, entry should never be nested, and entry and exits should always - * be paired. This is for sanity of maintaining and reasoning about the - * kernel code. + * A facility to provide batching of the reload of page tables and + * other process state with the actual context switch code for + * paravirtualized guests. By convention, only one of the batched + * update (lazy) modes (CPU, MMU) should be active at any given time, + * entry should never be nested, and entry and exits should always be + * paired. This is for sanity of maintaining and reasoning about the + * kernel code. In this case, the exit (end of the context switch) is + * in architecture-specific code, and so doesn't need a generic + * definition. */ -#ifndef __HAVE_ARCH_ENTER_LAZY_CPU_MODE -#define arch_enter_lazy_cpu_mode() do {} while (0) -#define arch_leave_lazy_cpu_mode() do {} while (0) -#define arch_flush_lazy_cpu_mode() do {} while (0) +#ifndef __HAVE_ARCH_START_CONTEXT_SWITCH +#define arch_start_context_switch() do {} while (0) #endif #ifndef __HAVE_PFNMAP_TRACKING diff --git a/kernel/sched.c b/kernel/sched.c index 5757e03cfac..7530fdd7c98 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2746,7 +2746,7 @@ context_switch(struct rq *rq, struct task_struct *prev, * combine the page table reload and the switch backend into * one hypercall. */ - arch_enter_lazy_cpu_mode(); + arch_start_context_switch(); if (unlikely(!mm)) { next->active_mm = oldmm; -- cgit v1.2.3-70-g09d2 From 224101ed69d3fbb486868e0f6e0f9fa37302efb4 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 18 Feb 2009 11:18:57 -0800 Subject: x86/paravirt: finish change from lazy cpu to context switch start/end Impact: fix lazy context switch API Pass the previous and next tasks into the context switch start end calls, so that the called functions can properly access the task state (esp in end_context_switch, in which the next task is not yet completely current). Signed-off-by: Jeremy Fitzhardinge Acked-by: Peter Zijlstra --- arch/x86/include/asm/paravirt.h | 17 ++++++++++------- arch/x86/include/asm/pgtable.h | 2 ++ arch/x86/kernel/paravirt.c | 14 ++++++-------- arch/x86/kernel/process_32.c | 2 +- arch/x86/kernel/process_64.c | 2 +- arch/x86/kernel/vmi_32.c | 12 ++++++------ arch/x86/lguest/boot.c | 8 ++++---- arch/x86/xen/enlighten.c | 10 ++++------ include/asm-frv/pgtable.h | 2 +- include/asm-generic/pgtable.h | 2 +- kernel/sched.c | 2 +- 11 files changed, 37 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 58d2481b01a..dfdee0ca57d 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -56,6 +56,7 @@ struct desc_ptr; struct tss_struct; struct mm_struct; struct desc_struct; +struct task_struct; /* * Wrapper type for pointers to code which uses the non-standard @@ -203,7 +204,8 @@ struct pv_cpu_ops { void (*swapgs)(void); - struct pv_lazy_ops lazy_mode; + void (*start_context_switch)(struct task_struct *prev); + void (*end_context_switch)(struct task_struct *next); }; struct pv_irq_ops { @@ -1414,20 +1416,21 @@ enum paravirt_lazy_mode { }; enum paravirt_lazy_mode paravirt_get_lazy_mode(void); -void paravirt_enter_lazy_cpu(void); -void paravirt_leave_lazy_cpu(void); +void paravirt_start_context_switch(struct task_struct *prev); +void paravirt_end_context_switch(struct task_struct *next); + void paravirt_enter_lazy_mmu(void); void paravirt_leave_lazy_mmu(void); #define __HAVE_ARCH_START_CONTEXT_SWITCH -static inline void arch_start_context_switch(void) +static inline void arch_start_context_switch(struct task_struct *prev) { - PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter); + PVOP_VCALL1(pv_cpu_ops.start_context_switch, prev); } -static inline void arch_end_context_switch(void) +static inline void arch_end_context_switch(struct task_struct *next) { - PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave); + PVOP_VCALL1(pv_cpu_ops.end_context_switch, next); } #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index d0812e155f1..24e42836e92 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -83,6 +83,8 @@ static inline void __init paravirt_pagetable_setup_done(pgd_t *base) #define pte_val(x) native_pte_val(x) #define __pte(x) native_make_pte(x) +#define arch_end_context_switch(prev) do {} while(0) + #endif /* CONFIG_PARAVIRT */ /* diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 430a0e30577..cf1437503ba 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -270,20 +270,20 @@ void paravirt_leave_lazy_mmu(void) leave_lazy(PARAVIRT_LAZY_MMU); } -void paravirt_enter_lazy_cpu(void) +void paravirt_start_context_switch(struct task_struct *prev) { if (percpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) { arch_leave_lazy_mmu_mode(); - set_thread_flag(TIF_LAZY_MMU_UPDATES); + set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES); } enter_lazy(PARAVIRT_LAZY_CPU); } -void paravirt_leave_lazy_cpu(void) +void paravirt_end_context_switch(struct task_struct *next) { leave_lazy(PARAVIRT_LAZY_CPU); - if (test_and_clear_thread_flag(TIF_LAZY_MMU_UPDATES)) + if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES)) arch_enter_lazy_mmu_mode(); } @@ -399,10 +399,8 @@ struct pv_cpu_ops pv_cpu_ops = { .set_iopl_mask = native_set_iopl_mask, .io_delay = native_io_delay, - .lazy_mode = { - .enter = paravirt_nop, - .leave = paravirt_nop, - }, + .start_context_switch = paravirt_nop, + .end_context_switch = paravirt_nop, }; struct pv_apic_ops pv_apic_ops = { diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 57e49a8278a..d766c7616fd 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -407,7 +407,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * done before math_state_restore, so the TS bit is up * to date. */ - arch_end_context_switch(); + arch_end_context_switch(next_p); /* If the task has used fpu the last 5 timeslices, just do a full * restore of the math state immediately to avoid the trap; the diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 7115e608532..e8a9aaf9df8 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -428,7 +428,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * done before math_state_restore, so the TS bit is up * to date. */ - arch_end_context_switch(); + arch_end_context_switch(next_p); /* * Switch FS and GS. diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 950929c607d..55a5d6938e5 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -467,16 +467,16 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, } #endif -static void vmi_enter_lazy_cpu(void) +static void vmi_start_context_switch(struct task_struct *prev) { - paravirt_enter_lazy_cpu(); + paravirt_start_context_switch(prev); vmi_ops.set_lazy_mode(2); } -static void vmi_leave_lazy_cpu(void) +static void vmi_end_context_switch(struct task_struct *next) { vmi_ops.set_lazy_mode(0); - paravirt_leave_lazy_cpu(); + paravirt_end_context_switch(next); } static void vmi_enter_lazy_mmu(void) @@ -722,9 +722,9 @@ static inline int __init activate_vmi(void) para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask); para_fill(pv_cpu_ops.io_delay, IODelay); - para_wrap(pv_cpu_ops.lazy_mode.enter, vmi_enter_lazy_cpu, + para_wrap(pv_cpu_ops.start_context_switch, vmi_start_context_switch, set_lazy_mode, SetLazyMode); - para_wrap(pv_cpu_ops.lazy_mode.leave, vmi_leave_lazy_cpu, + para_wrap(pv_cpu_ops.end_context_switch, vmi_end_context_switch, set_lazy_mode, SetLazyMode); para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu, diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 41a5562e710..5287081b356 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -153,10 +153,10 @@ static void lguest_leave_lazy_mmu_mode(void) paravirt_leave_lazy_mmu(); } -static void lguest_leave_lazy_cpu_mode(void) +static void lguest_end_context_switch(struct task_struct *next) { hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0); - paravirt_leave_lazy_cpu(); + paravirt_end_context_switch(next); } /*G:033 @@ -1031,8 +1031,8 @@ __init void lguest_init(void) pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry; pv_cpu_ops.write_idt_entry = lguest_write_idt_entry; pv_cpu_ops.wbinvd = lguest_wbinvd; - pv_cpu_ops.lazy_mode.enter = paravirt_enter_lazy_cpu; - pv_cpu_ops.lazy_mode.leave = lguest_leave_lazy_cpu_mode; + pv_cpu_ops.start_context_switch = paravirt_start_context_switch; + pv_cpu_ops.end_context_switch = lguest_end_context_switch; /* pagetable management */ pv_mmu_ops.write_cr3 = lguest_write_cr3; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index f586e63b9a6..70b355d3a86 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -203,10 +203,10 @@ static unsigned long xen_get_debugreg(int reg) return HYPERVISOR_get_debugreg(reg); } -static void xen_leave_lazy_cpu(void) +static void xen_end_context_switch(struct task_struct *next) { xen_mc_flush(); - paravirt_leave_lazy_cpu(); + paravirt_end_context_switch(next); } static unsigned long xen_store_tr(void) @@ -817,10 +817,8 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { /* Xen takes care of %gs when switching to usermode for us */ .swapgs = paravirt_nop, - .lazy_mode = { - .enter = paravirt_enter_lazy_cpu, - .leave = xen_leave_lazy_cpu, - }, + .start_context_switch = paravirt_start_context_switch, + .end_context_switch = xen_end_context_switch, }; static const struct pv_apic_ops xen_apic_ops __initdata = { diff --git a/include/asm-frv/pgtable.h b/include/asm-frv/pgtable.h index 235e34a7a34..09887045d03 100644 --- a/include/asm-frv/pgtable.h +++ b/include/asm-frv/pgtable.h @@ -74,7 +74,7 @@ static inline int pte_file(pte_t pte) { return 0; } #define arch_enter_lazy_mmu_mode() do {} while (0) #define arch_leave_lazy_mmu_mode() do {} while (0) -#define arch_start_context_switch() do {} while (0) +#define arch_start_context_switch(prev) do {} while (0) #else /* !CONFIG_MMU */ /*****************************************************************************/ diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 922f03671dd..e410f602cab 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -291,7 +291,7 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm, * definition. */ #ifndef __HAVE_ARCH_START_CONTEXT_SWITCH -#define arch_start_context_switch() do {} while (0) +#define arch_start_context_switch(prev) do {} while (0) #endif #ifndef __HAVE_PFNMAP_TRACKING diff --git a/kernel/sched.c b/kernel/sched.c index 7530fdd7c98..133762aece5 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2746,7 +2746,7 @@ context_switch(struct rq *rq, struct task_struct *prev, * combine the page table reload and the switch backend into * one hypercall. */ - arch_start_context_switch(); + arch_start_context_switch(prev); if (unlikely(!mm)) { next->active_mm = oldmm; -- cgit v1.2.3-70-g09d2 From d4c045364d3107603187f21a56ec231e74d26441 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Fri, 6 Feb 2009 19:20:31 -0800 Subject: xen: add irq_from_evtchn Given an evtchn, return the corresponding irq. Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/events.c | 6 ++++++ include/xen/events.h | 3 +++ 2 files changed, 9 insertions(+) (limited to 'include') diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 30963af5dba..1cd2a0e15ae 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -151,6 +151,12 @@ static unsigned int evtchn_from_irq(unsigned irq) return info_for_irq(irq)->evtchn; } +unsigned irq_from_evtchn(unsigned int evtchn) +{ + return evtchn_to_irq[evtchn]; +} +EXPORT_SYMBOL_GPL(irq_from_evtchn); + static enum ipi_vector ipi_from_irq(unsigned irq) { struct irq_info *info = info_for_irq(irq); diff --git a/include/xen/events.h b/include/xen/events.h index 0d5f1adc036..e68d59a90ca 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -53,4 +53,7 @@ bool xen_test_irq_pending(int irq); irq will be disabled so it won't deliver an interrupt. */ void xen_poll_irq(int irq); +/* Determine the IRQ which is bound to an event channel */ +unsigned irq_from_evtchn(unsigned int evtchn); + #endif /* _XEN_EVENTS_H */ -- cgit v1.2.3-70-g09d2 From f7116284c734f3a47180cd9c907944a1837ccb3c Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Fri, 6 Feb 2009 19:21:19 -0800 Subject: xen: add /dev/xen/evtchn driver This driver is used by application which wish to receive notifications from the hypervisor or other guests via Xen's event channel mechanism. In particular it is used by the xenstore daemon in domain 0. Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/Kconfig | 10 ++ drivers/xen/Makefile | 3 +- drivers/xen/evtchn.c | 494 +++++++++++++++++++++++++++++++++++++++++++++++++++ include/xen/evtchn.h | 88 +++++++++ 4 files changed, 594 insertions(+), 1 deletion(-) create mode 100644 drivers/xen/evtchn.c create mode 100644 include/xen/evtchn.h (limited to 'include') diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 526187c8a12..1bbb9108f31 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -18,6 +18,16 @@ config XEN_SCRUB_PAGES secure, but slightly less efficient. If in doubt, say yes. +config XEN_DEV_EVTCHN + tristate "Xen /dev/xen/evtchn device" + depends on XEN + default y + help + The evtchn driver allows a userspace process to triger event + channels and to receive notification of an event channel + firing. + If in doubt, say yes. + config XENFS tristate "Xen filesystem" depends on XEN diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index ff8accc9e10..1567639847e 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -4,4 +4,5 @@ obj-y += xenbus/ obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o obj-$(CONFIG_XEN_XENCOMM) += xencomm.o obj-$(CONFIG_XEN_BALLOON) += balloon.o -obj-$(CONFIG_XENFS) += xenfs/ \ No newline at end of file +obj-$(CONFIG_XEN_DEV_EVTCHN) += evtchn.o +obj-$(CONFIG_XENFS) += xenfs/ diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c new file mode 100644 index 00000000000..517b9ee63e1 --- /dev/null +++ b/drivers/xen/evtchn.c @@ -0,0 +1,494 @@ +/****************************************************************************** + * evtchn.c + * + * Driver for receiving and demuxing event-channel signals. + * + * Copyright (c) 2004-2005, K A Fraser + * Multi-process extensions Copyright (c) 2004, Steven Smith + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct per_user_data { + /* Notification ring, accessed via /dev/xen/evtchn. */ +#define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t)) +#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1)) + evtchn_port_t *ring; + unsigned int ring_cons, ring_prod, ring_overflow; + struct mutex ring_cons_mutex; /* protect against concurrent readers */ + + /* Processes wait on this queue when ring is empty. */ + wait_queue_head_t evtchn_wait; + struct fasync_struct *evtchn_async_queue; + const char *name; +}; + +/* Who's bound to each port? */ +static struct per_user_data *port_user[NR_EVENT_CHANNELS]; +static DEFINE_SPINLOCK(port_user_lock); + +irqreturn_t evtchn_interrupt(int irq, void *data) +{ + unsigned int port = (unsigned long)data; + struct per_user_data *u; + + spin_lock(&port_user_lock); + + u = port_user[port]; + + disable_irq_nosync(irq); + + if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) { + u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port; + wmb(); /* Ensure ring contents visible */ + if (u->ring_cons == u->ring_prod++) { + wake_up_interruptible(&u->evtchn_wait); + kill_fasync(&u->evtchn_async_queue, + SIGIO, POLL_IN); + } + } else { + u->ring_overflow = 1; + } + + spin_unlock(&port_user_lock); + + return IRQ_HANDLED; +} + +static ssize_t evtchn_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + int rc; + unsigned int c, p, bytes1 = 0, bytes2 = 0; + struct per_user_data *u = file->private_data; + + /* Whole number of ports. */ + count &= ~(sizeof(evtchn_port_t)-1); + + if (count == 0) + return 0; + + if (count > PAGE_SIZE) + count = PAGE_SIZE; + + for (;;) { + mutex_lock(&u->ring_cons_mutex); + + rc = -EFBIG; + if (u->ring_overflow) + goto unlock_out; + + c = u->ring_cons; + p = u->ring_prod; + if (c != p) + break; + + mutex_unlock(&u->ring_cons_mutex); + + if (file->f_flags & O_NONBLOCK) + return -EAGAIN; + + rc = wait_event_interruptible(u->evtchn_wait, + u->ring_cons != u->ring_prod); + if (rc) + return rc; + } + + /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */ + if (((c ^ p) & EVTCHN_RING_SIZE) != 0) { + bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) * + sizeof(evtchn_port_t); + bytes2 = EVTCHN_RING_MASK(p) * sizeof(evtchn_port_t); + } else { + bytes1 = (p - c) * sizeof(evtchn_port_t); + bytes2 = 0; + } + + /* Truncate chunks according to caller's maximum byte count. */ + if (bytes1 > count) { + bytes1 = count; + bytes2 = 0; + } else if ((bytes1 + bytes2) > count) { + bytes2 = count - bytes1; + } + + rc = -EFAULT; + rmb(); /* Ensure that we see the port before we copy it. */ + if (copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) || + ((bytes2 != 0) && + copy_to_user(&buf[bytes1], &u->ring[0], bytes2))) + goto unlock_out; + + u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t); + rc = bytes1 + bytes2; + + unlock_out: + mutex_unlock(&u->ring_cons_mutex); + return rc; +} + +static ssize_t evtchn_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + int rc, i; + evtchn_port_t *kbuf = (evtchn_port_t *)__get_free_page(GFP_KERNEL); + struct per_user_data *u = file->private_data; + + if (kbuf == NULL) + return -ENOMEM; + + /* Whole number of ports. */ + count &= ~(sizeof(evtchn_port_t)-1); + + rc = 0; + if (count == 0) + goto out; + + if (count > PAGE_SIZE) + count = PAGE_SIZE; + + rc = -EFAULT; + if (copy_from_user(kbuf, buf, count) != 0) + goto out; + + spin_lock_irq(&port_user_lock); + for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) + if ((kbuf[i] < NR_EVENT_CHANNELS) && (port_user[kbuf[i]] == u)) + enable_irq(irq_from_evtchn(kbuf[i])); + spin_unlock_irq(&port_user_lock); + + rc = count; + + out: + free_page((unsigned long)kbuf); + return rc; +} + +static int evtchn_bind_to_user(struct per_user_data *u, int port) +{ + int irq; + int rc = 0; + + spin_lock_irq(&port_user_lock); + + BUG_ON(port_user[port] != NULL); + + irq = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED, + u->name, (void *)(unsigned long)port); + if (rc < 0) + goto fail; + + port_user[port] = u; + +fail: + spin_unlock_irq(&port_user_lock); + return rc; +} + +static void evtchn_unbind_from_user(struct per_user_data *u, int port) +{ + int irq = irq_from_evtchn(port); + + unbind_from_irqhandler(irq, (void *)(unsigned long)port); + port_user[port] = NULL; +} + +static long evtchn_ioctl(struct file *file, + unsigned int cmd, unsigned long arg) +{ + int rc; + struct per_user_data *u = file->private_data; + void __user *uarg = (void __user *) arg; + + switch (cmd) { + case IOCTL_EVTCHN_BIND_VIRQ: { + struct ioctl_evtchn_bind_virq bind; + struct evtchn_bind_virq bind_virq; + + rc = -EFAULT; + if (copy_from_user(&bind, uarg, sizeof(bind))) + break; + + bind_virq.virq = bind.virq; + bind_virq.vcpu = 0; + rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, + &bind_virq); + if (rc != 0) + break; + + rc = evtchn_bind_to_user(u, bind_virq.port); + if (rc == 0) + rc = bind_virq.port; + break; + } + + case IOCTL_EVTCHN_BIND_INTERDOMAIN: { + struct ioctl_evtchn_bind_interdomain bind; + struct evtchn_bind_interdomain bind_interdomain; + + rc = -EFAULT; + if (copy_from_user(&bind, uarg, sizeof(bind))) + break; + + bind_interdomain.remote_dom = bind.remote_domain; + bind_interdomain.remote_port = bind.remote_port; + rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, + &bind_interdomain); + if (rc != 0) + break; + + rc = evtchn_bind_to_user(u, bind_interdomain.local_port); + if (rc == 0) + rc = bind_interdomain.local_port; + break; + } + + case IOCTL_EVTCHN_BIND_UNBOUND_PORT: { + struct ioctl_evtchn_bind_unbound_port bind; + struct evtchn_alloc_unbound alloc_unbound; + + rc = -EFAULT; + if (copy_from_user(&bind, uarg, sizeof(bind))) + break; + + alloc_unbound.dom = DOMID_SELF; + alloc_unbound.remote_dom = bind.remote_domain; + rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, + &alloc_unbound); + if (rc != 0) + break; + + rc = evtchn_bind_to_user(u, alloc_unbound.port); + if (rc == 0) + rc = alloc_unbound.port; + break; + } + + case IOCTL_EVTCHN_UNBIND: { + struct ioctl_evtchn_unbind unbind; + + rc = -EFAULT; + if (copy_from_user(&unbind, uarg, sizeof(unbind))) + break; + + rc = -EINVAL; + if (unbind.port >= NR_EVENT_CHANNELS) + break; + + spin_lock_irq(&port_user_lock); + + rc = -ENOTCONN; + if (port_user[unbind.port] != u) { + spin_unlock_irq(&port_user_lock); + break; + } + + evtchn_unbind_from_user(u, unbind.port); + + spin_unlock_irq(&port_user_lock); + + rc = 0; + break; + } + + case IOCTL_EVTCHN_NOTIFY: { + struct ioctl_evtchn_notify notify; + + rc = -EFAULT; + if (copy_from_user(¬ify, uarg, sizeof(notify))) + break; + + if (notify.port >= NR_EVENT_CHANNELS) { + rc = -EINVAL; + } else if (port_user[notify.port] != u) { + rc = -ENOTCONN; + } else { + notify_remote_via_evtchn(notify.port); + rc = 0; + } + break; + } + + case IOCTL_EVTCHN_RESET: { + /* Initialise the ring to empty. Clear errors. */ + mutex_lock(&u->ring_cons_mutex); + spin_lock_irq(&port_user_lock); + u->ring_cons = u->ring_prod = u->ring_overflow = 0; + spin_unlock_irq(&port_user_lock); + mutex_unlock(&u->ring_cons_mutex); + rc = 0; + break; + } + + default: + rc = -ENOSYS; + break; + } + + return rc; +} + +static unsigned int evtchn_poll(struct file *file, poll_table *wait) +{ + unsigned int mask = POLLOUT | POLLWRNORM; + struct per_user_data *u = file->private_data; + + poll_wait(file, &u->evtchn_wait, wait); + if (u->ring_cons != u->ring_prod) + mask |= POLLIN | POLLRDNORM; + if (u->ring_overflow) + mask = POLLERR; + return mask; +} + +static int evtchn_fasync(int fd, struct file *filp, int on) +{ + struct per_user_data *u = filp->private_data; + return fasync_helper(fd, filp, on, &u->evtchn_async_queue); +} + +static int evtchn_open(struct inode *inode, struct file *filp) +{ + struct per_user_data *u; + + u = kzalloc(sizeof(*u), GFP_KERNEL); + if (u == NULL) + return -ENOMEM; + + u->name = kasprintf(GFP_KERNEL, "evtchn:%s", current->comm); + if (u->name == NULL) { + kfree(u); + return -ENOMEM; + } + + init_waitqueue_head(&u->evtchn_wait); + + u->ring = (evtchn_port_t *)__get_free_page(GFP_KERNEL); + if (u->ring == NULL) { + kfree(u->name); + kfree(u); + return -ENOMEM; + } + + mutex_init(&u->ring_cons_mutex); + + filp->private_data = u; + + return 0; +} + +static int evtchn_release(struct inode *inode, struct file *filp) +{ + int i; + struct per_user_data *u = filp->private_data; + + spin_lock_irq(&port_user_lock); + + free_page((unsigned long)u->ring); + + for (i = 0; i < NR_EVENT_CHANNELS; i++) { + if (port_user[i] != u) + continue; + + evtchn_unbind_from_user(port_user[i], i); + } + + spin_unlock_irq(&port_user_lock); + + kfree(u->name); + kfree(u); + + return 0; +} + +static const struct file_operations evtchn_fops = { + .owner = THIS_MODULE, + .read = evtchn_read, + .write = evtchn_write, + .unlocked_ioctl = evtchn_ioctl, + .poll = evtchn_poll, + .fasync = evtchn_fasync, + .open = evtchn_open, + .release = evtchn_release, +}; + +static struct miscdevice evtchn_miscdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "evtchn", + .fops = &evtchn_fops, +}; +static int __init evtchn_init(void) +{ + int err; + + if (!xen_domain()) + return -ENODEV; + + spin_lock_init(&port_user_lock); + memset(port_user, 0, sizeof(port_user)); + + /* Create '/dev/misc/evtchn'. */ + err = misc_register(&evtchn_miscdev); + if (err != 0) { + printk(KERN_ALERT "Could not register /dev/misc/evtchn\n"); + return err; + } + + printk(KERN_INFO "Event-channel device installed.\n"); + + return 0; +} + +static void __exit evtchn_cleanup(void) +{ + misc_deregister(&evtchn_miscdev); +} + +module_init(evtchn_init); +module_exit(evtchn_cleanup); + +MODULE_LICENSE("GPL"); diff --git a/include/xen/evtchn.h b/include/xen/evtchn.h new file mode 100644 index 00000000000..14e833ee4e0 --- /dev/null +++ b/include/xen/evtchn.h @@ -0,0 +1,88 @@ +/****************************************************************************** + * evtchn.h + * + * Interface to /dev/xen/evtchn. + * + * Copyright (c) 2003-2005, K A Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __LINUX_PUBLIC_EVTCHN_H__ +#define __LINUX_PUBLIC_EVTCHN_H__ + +/* + * Bind a fresh port to VIRQ @virq. + * Return allocated port. + */ +#define IOCTL_EVTCHN_BIND_VIRQ \ + _IOC(_IOC_NONE, 'E', 0, sizeof(struct ioctl_evtchn_bind_virq)) +struct ioctl_evtchn_bind_virq { + unsigned int virq; +}; + +/* + * Bind a fresh port to remote <@remote_domain, @remote_port>. + * Return allocated port. + */ +#define IOCTL_EVTCHN_BIND_INTERDOMAIN \ + _IOC(_IOC_NONE, 'E', 1, sizeof(struct ioctl_evtchn_bind_interdomain)) +struct ioctl_evtchn_bind_interdomain { + unsigned int remote_domain, remote_port; +}; + +/* + * Allocate a fresh port for binding to @remote_domain. + * Return allocated port. + */ +#define IOCTL_EVTCHN_BIND_UNBOUND_PORT \ + _IOC(_IOC_NONE, 'E', 2, sizeof(struct ioctl_evtchn_bind_unbound_port)) +struct ioctl_evtchn_bind_unbound_port { + unsigned int remote_domain; +}; + +/* + * Unbind previously allocated @port. + */ +#define IOCTL_EVTCHN_UNBIND \ + _IOC(_IOC_NONE, 'E', 3, sizeof(struct ioctl_evtchn_unbind)) +struct ioctl_evtchn_unbind { + unsigned int port; +}; + +/* + * Unbind previously allocated @port. + */ +#define IOCTL_EVTCHN_NOTIFY \ + _IOC(_IOC_NONE, 'E', 4, sizeof(struct ioctl_evtchn_notify)) +struct ioctl_evtchn_notify { + unsigned int port; +}; + +/* Clear and reinitialise the event buffer. Clear error condition. */ +#define IOCTL_EVTCHN_RESET \ + _IOC(_IOC_NONE, 'E', 5, 0) + +#endif /* __LINUX_PUBLIC_EVTCHN_H__ */ -- cgit v1.2.3-70-g09d2 From c5cfef0f79cacc3aa438fc28f4747f0d10c54d0d Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Fri, 6 Feb 2009 19:21:19 -0800 Subject: xen: export ioctl headers to userspace Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge --- include/Kbuild | 1 + include/xen/Kbuild | 1 + 2 files changed, 2 insertions(+) create mode 100644 include/xen/Kbuild (limited to 'include') diff --git a/include/Kbuild b/include/Kbuild index d8c3e3cbf41..fe36accd432 100644 --- a/include/Kbuild +++ b/include/Kbuild @@ -8,3 +8,4 @@ header-y += mtd/ header-y += rdma/ header-y += video/ header-y += drm/ +header-y += xen/ diff --git a/include/xen/Kbuild b/include/xen/Kbuild new file mode 100644 index 00000000000..4e65c16a445 --- /dev/null +++ b/include/xen/Kbuild @@ -0,0 +1 @@ +header-y += evtchn.h -- cgit v1.2.3-70-g09d2 From a1ce1be578365a4da7e7d7db4812539d2d5da763 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 9 Feb 2009 12:05:50 -0800 Subject: xen: remove suspend_cancel hook Remove suspend_cancel hook from xenbus_driver, in preparation for using the device model for suspending. Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/xenbus/xenbus_probe.c | 23 ----------------------- include/xen/xenbus.h | 1 - 2 files changed, 24 deletions(-) (limited to 'include') diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 773d1cf2328..bd20361fb09 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -689,27 +689,6 @@ static int suspend_dev(struct device *dev, void *data) return 0; } -static int suspend_cancel_dev(struct device *dev, void *data) -{ - int err = 0; - struct xenbus_driver *drv; - struct xenbus_device *xdev; - - DPRINTK(""); - - if (dev->driver == NULL) - return 0; - drv = to_xenbus_driver(dev->driver); - xdev = container_of(dev, struct xenbus_device, dev); - if (drv->suspend_cancel) - err = drv->suspend_cancel(xdev); - if (err) - printk(KERN_WARNING - "xenbus: suspend_cancel %s failed: %i\n", - dev_name(dev), err); - return 0; -} - static int resume_dev(struct device *dev, void *data) { int err; @@ -777,8 +756,6 @@ EXPORT_SYMBOL_GPL(xenbus_resume); void xenbus_suspend_cancel(void) { xs_suspend_cancel(); - bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_cancel_dev); - xenbus_backend_resume(suspend_cancel_dev); } EXPORT_SYMBOL_GPL(xenbus_suspend_cancel); diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index f87f9614844..0836772b968 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -92,7 +92,6 @@ struct xenbus_driver { enum xenbus_state backend_state); int (*remove)(struct xenbus_device *dev); int (*suspend)(struct xenbus_device *dev); - int (*suspend_cancel)(struct xenbus_device *dev); int (*resume)(struct xenbus_device *dev); int (*uevent)(struct xenbus_device *, char **, int, char *, int); struct device_driver driver; -- cgit v1.2.3-70-g09d2 From de5b31bd47de7e6f41be2e271318dbc8f1af354d Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 9 Feb 2009 12:05:50 -0800 Subject: xen: use device model for suspending xenbus devices Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/manage.c | 9 ++++----- drivers/xen/xenbus/xenbus_probe.c | 37 +++++++++---------------------------- drivers/xen/xenbus/xenbus_xs.c | 2 ++ include/xen/xenbus.h | 2 +- 4 files changed, 16 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index b703dd2c9f1..5269bb4d249 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -104,9 +104,8 @@ static void do_suspend(void) goto out; } - printk("suspending xenbus...\n"); - /* XXX use normal device tree? */ - xenbus_suspend(); + printk(KERN_DEBUG "suspending xenstore...\n"); + xs_suspend(); err = stop_machine(xen_suspend, &cancelled, cpumask_of(0)); if (err) { @@ -116,9 +115,9 @@ static void do_suspend(void) if (!cancelled) { xen_arch_resume(); - xenbus_resume(); + xs_resume(); } else - xenbus_suspend_cancel(); + xs_suspend_cancel(); device_resume(PMSG_RESUME); diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index bd20361fb09..4649213bed9 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -71,6 +71,9 @@ static int xenbus_probe_frontend(const char *type, const char *name); static void xenbus_dev_shutdown(struct device *_dev); +static int xenbus_dev_suspend(struct device *dev, pm_message_t state); +static int xenbus_dev_resume(struct device *dev); + /* If something in array of ids matches this device, return it. */ static const struct xenbus_device_id * match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev) @@ -188,6 +191,9 @@ static struct xen_bus_type xenbus_frontend = { .remove = xenbus_dev_remove, .shutdown = xenbus_dev_shutdown, .dev_attrs = xenbus_dev_attrs, + + .suspend = xenbus_dev_suspend, + .resume = xenbus_dev_resume, }, }; @@ -669,7 +675,7 @@ static struct xenbus_watch fe_watch = { .callback = frontend_changed, }; -static int suspend_dev(struct device *dev, void *data) +static int xenbus_dev_suspend(struct device *dev, pm_message_t state) { int err = 0; struct xenbus_driver *drv; @@ -682,14 +688,14 @@ static int suspend_dev(struct device *dev, void *data) drv = to_xenbus_driver(dev->driver); xdev = container_of(dev, struct xenbus_device, dev); if (drv->suspend) - err = drv->suspend(xdev); + err = drv->suspend(xdev, state); if (err) printk(KERN_WARNING "xenbus: suspend %s failed: %i\n", dev_name(dev), err); return 0; } -static int resume_dev(struct device *dev, void *data) +static int xenbus_dev_resume(struct device *dev) { int err; struct xenbus_driver *drv; @@ -734,31 +740,6 @@ static int resume_dev(struct device *dev, void *data) return 0; } -void xenbus_suspend(void) -{ - DPRINTK(""); - - bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev); - xenbus_backend_suspend(suspend_dev); - xs_suspend(); -} -EXPORT_SYMBOL_GPL(xenbus_suspend); - -void xenbus_resume(void) -{ - xb_init_comms(); - xs_resume(); - bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev); - xenbus_backend_resume(resume_dev); -} -EXPORT_SYMBOL_GPL(xenbus_resume); - -void xenbus_suspend_cancel(void) -{ - xs_suspend_cancel(); -} -EXPORT_SYMBOL_GPL(xenbus_suspend_cancel); - /* A flag to determine if xenstored is 'ready' (i.e. has started) */ int xenstored_ready = 0; diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index e325eab4724..eab33f1dbdf 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -673,6 +673,8 @@ void xs_resume(void) struct xenbus_watch *watch; char token[sizeof(watch) * 2 + 1]; + xb_init_comms(); + mutex_unlock(&xs_state.response_mutex); mutex_unlock(&xs_state.request_mutex); up_write(&xs_state.transaction_mutex); diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index 0836772b968..b9763badbd7 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -91,7 +91,7 @@ struct xenbus_driver { void (*otherend_changed)(struct xenbus_device *dev, enum xenbus_state backend_state); int (*remove)(struct xenbus_device *dev); - int (*suspend)(struct xenbus_device *dev); + int (*suspend)(struct xenbus_device *dev, pm_message_t state); int (*resume)(struct xenbus_device *dev); int (*uevent)(struct xenbus_device *, char **, int, char *, int); struct device_driver driver; -- cgit v1.2.3-70-g09d2 From cff7e81b3dd7c25cd2248cd7a04c5764552d5d55 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 10 Mar 2009 14:39:59 -0700 Subject: xen: add /sys/hypervisor support Adds support for Xen info under /sys/hypervisor. Taken from Novell 2.6.27 backport tree. Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/Kconfig | 10 + drivers/xen/Makefile | 3 +- drivers/xen/sys-hypervisor.c | 475 ++++++++++++++++++++++++++++++++++++++++ include/xen/interface/version.h | 3 + 4 files changed, 490 insertions(+), 1 deletion(-) create mode 100644 drivers/xen/sys-hypervisor.c (limited to 'include') diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 526187c8a12..88bca1c42db 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -41,3 +41,13 @@ config XEN_COMPAT_XENFS a xen platform. If in doubt, say yes. +config XEN_SYS_HYPERVISOR + bool "Create xen entries under /sys/hypervisor" + depends on XEN && SYSFS + select SYS_HYPERVISOR + default y + help + Create entries under /sys/hypervisor describing the Xen + hypervisor environment. When running native or in another + virtual environment, /sys/hypervisor will still be present, + but will have no xen contents. \ No newline at end of file diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index ff8accc9e10..f3603a39db5 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -4,4 +4,5 @@ obj-y += xenbus/ obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o obj-$(CONFIG_XEN_XENCOMM) += xencomm.o obj-$(CONFIG_XEN_BALLOON) += balloon.o -obj-$(CONFIG_XENFS) += xenfs/ \ No newline at end of file +obj-$(CONFIG_XENFS) += xenfs/ +obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c new file mode 100644 index 00000000000..cb29d1ccf0d --- /dev/null +++ b/drivers/xen/sys-hypervisor.c @@ -0,0 +1,475 @@ +/* + * copyright (c) 2006 IBM Corporation + * Authored by: Mike D. Day + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +#include +#include + +#include +#include +#include + +#define HYPERVISOR_ATTR_RO(_name) \ +static struct hyp_sysfs_attr _name##_attr = __ATTR_RO(_name) + +#define HYPERVISOR_ATTR_RW(_name) \ +static struct hyp_sysfs_attr _name##_attr = \ + __ATTR(_name, 0644, _name##_show, _name##_store) + +struct hyp_sysfs_attr { + struct attribute attr; + ssize_t (*show)(struct hyp_sysfs_attr *, char *); + ssize_t (*store)(struct hyp_sysfs_attr *, const char *, size_t); + void *hyp_attr_data; +}; + +static ssize_t type_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + return sprintf(buffer, "xen\n"); +} + +HYPERVISOR_ATTR_RO(type); + +static int __init xen_sysfs_type_init(void) +{ + return sysfs_create_file(hypervisor_kobj, &type_attr.attr); +} + +static void xen_sysfs_type_destroy(void) +{ + sysfs_remove_file(hypervisor_kobj, &type_attr.attr); +} + +/* xen version attributes */ +static ssize_t major_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int version = HYPERVISOR_xen_version(XENVER_version, NULL); + if (version) + return sprintf(buffer, "%d\n", version >> 16); + return -ENODEV; +} + +HYPERVISOR_ATTR_RO(major); + +static ssize_t minor_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int version = HYPERVISOR_xen_version(XENVER_version, NULL); + if (version) + return sprintf(buffer, "%d\n", version & 0xff); + return -ENODEV; +} + +HYPERVISOR_ATTR_RO(minor); + +static ssize_t extra_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int ret = -ENOMEM; + char *extra; + + extra = kmalloc(XEN_EXTRAVERSION_LEN, GFP_KERNEL); + if (extra) { + ret = HYPERVISOR_xen_version(XENVER_extraversion, extra); + if (!ret) + ret = sprintf(buffer, "%s\n", extra); + kfree(extra); + } + + return ret; +} + +HYPERVISOR_ATTR_RO(extra); + +static struct attribute *version_attrs[] = { + &major_attr.attr, + &minor_attr.attr, + &extra_attr.attr, + NULL +}; + +static struct attribute_group version_group = { + .name = "version", + .attrs = version_attrs, +}; + +static int __init xen_sysfs_version_init(void) +{ + return sysfs_create_group(hypervisor_kobj, &version_group); +} + +static void xen_sysfs_version_destroy(void) +{ + sysfs_remove_group(hypervisor_kobj, &version_group); +} + +/* UUID */ + +static ssize_t uuid_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + char *vm, *val; + int ret; + extern int xenstored_ready; + + if (!xenstored_ready) + return -EBUSY; + + vm = xenbus_read(XBT_NIL, "vm", "", NULL); + if (IS_ERR(vm)) + return PTR_ERR(vm); + val = xenbus_read(XBT_NIL, vm, "uuid", NULL); + kfree(vm); + if (IS_ERR(val)) + return PTR_ERR(val); + ret = sprintf(buffer, "%s\n", val); + kfree(val); + return ret; +} + +HYPERVISOR_ATTR_RO(uuid); + +static int __init xen_sysfs_uuid_init(void) +{ + return sysfs_create_file(hypervisor_kobj, &uuid_attr.attr); +} + +static void xen_sysfs_uuid_destroy(void) +{ + sysfs_remove_file(hypervisor_kobj, &uuid_attr.attr); +} + +/* xen compilation attributes */ + +static ssize_t compiler_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int ret = -ENOMEM; + struct xen_compile_info *info; + + info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL); + if (info) { + ret = HYPERVISOR_xen_version(XENVER_compile_info, info); + if (!ret) + ret = sprintf(buffer, "%s\n", info->compiler); + kfree(info); + } + + return ret; +} + +HYPERVISOR_ATTR_RO(compiler); + +static ssize_t compiled_by_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int ret = -ENOMEM; + struct xen_compile_info *info; + + info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL); + if (info) { + ret = HYPERVISOR_xen_version(XENVER_compile_info, info); + if (!ret) + ret = sprintf(buffer, "%s\n", info->compile_by); + kfree(info); + } + + return ret; +} + +HYPERVISOR_ATTR_RO(compiled_by); + +static ssize_t compile_date_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int ret = -ENOMEM; + struct xen_compile_info *info; + + info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL); + if (info) { + ret = HYPERVISOR_xen_version(XENVER_compile_info, info); + if (!ret) + ret = sprintf(buffer, "%s\n", info->compile_date); + kfree(info); + } + + return ret; +} + +HYPERVISOR_ATTR_RO(compile_date); + +static struct attribute *xen_compile_attrs[] = { + &compiler_attr.attr, + &compiled_by_attr.attr, + &compile_date_attr.attr, + NULL +}; + +static struct attribute_group xen_compilation_group = { + .name = "compilation", + .attrs = xen_compile_attrs, +}; + +int __init static xen_compilation_init(void) +{ + return sysfs_create_group(hypervisor_kobj, &xen_compilation_group); +} + +static void xen_compilation_destroy(void) +{ + sysfs_remove_group(hypervisor_kobj, &xen_compilation_group); +} + +/* xen properties info */ + +static ssize_t capabilities_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int ret = -ENOMEM; + char *caps; + + caps = kmalloc(XEN_CAPABILITIES_INFO_LEN, GFP_KERNEL); + if (caps) { + ret = HYPERVISOR_xen_version(XENVER_capabilities, caps); + if (!ret) + ret = sprintf(buffer, "%s\n", caps); + kfree(caps); + } + + return ret; +} + +HYPERVISOR_ATTR_RO(capabilities); + +static ssize_t changeset_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int ret = -ENOMEM; + char *cset; + + cset = kmalloc(XEN_CHANGESET_INFO_LEN, GFP_KERNEL); + if (cset) { + ret = HYPERVISOR_xen_version(XENVER_changeset, cset); + if (!ret) + ret = sprintf(buffer, "%s\n", cset); + kfree(cset); + } + + return ret; +} + +HYPERVISOR_ATTR_RO(changeset); + +static ssize_t virtual_start_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int ret = -ENOMEM; + struct xen_platform_parameters *parms; + + parms = kmalloc(sizeof(struct xen_platform_parameters), GFP_KERNEL); + if (parms) { + ret = HYPERVISOR_xen_version(XENVER_platform_parameters, + parms); + if (!ret) + ret = sprintf(buffer, "%lx\n", parms->virt_start); + kfree(parms); + } + + return ret; +} + +HYPERVISOR_ATTR_RO(virtual_start); + +static ssize_t pagesize_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int ret; + + ret = HYPERVISOR_xen_version(XENVER_pagesize, NULL); + if (ret > 0) + ret = sprintf(buffer, "%x\n", ret); + + return ret; +} + +HYPERVISOR_ATTR_RO(pagesize); + +/* eventually there will be several more features to export */ +static ssize_t xen_feature_show(int index, char *buffer) +{ + int ret = -ENOMEM; + struct xen_feature_info *info; + + info = kmalloc(sizeof(struct xen_feature_info), GFP_KERNEL); + if (info) { + info->submap_idx = index; + ret = HYPERVISOR_xen_version(XENVER_get_features, info); + if (!ret) + ret = sprintf(buffer, "%d\n", info->submap); + kfree(info); + } + + return ret; +} + +static ssize_t writable_pt_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + return xen_feature_show(XENFEAT_writable_page_tables, buffer); +} + +HYPERVISOR_ATTR_RO(writable_pt); + +static struct attribute *xen_properties_attrs[] = { + &capabilities_attr.attr, + &changeset_attr.attr, + &virtual_start_attr.attr, + &pagesize_attr.attr, + &writable_pt_attr.attr, + NULL +}; + +static struct attribute_group xen_properties_group = { + .name = "properties", + .attrs = xen_properties_attrs, +}; + +static int __init xen_properties_init(void) +{ + return sysfs_create_group(hypervisor_kobj, &xen_properties_group); +} + +static void xen_properties_destroy(void) +{ + sysfs_remove_group(hypervisor_kobj, &xen_properties_group); +} + +#ifdef CONFIG_KEXEC + +extern size_t vmcoreinfo_size_xen; +extern unsigned long paddr_vmcoreinfo_xen; + +static ssize_t vmcoreinfo_show(struct hyp_sysfs_attr *attr, char *page) +{ + return sprintf(page, "%lx %zx\n", + paddr_vmcoreinfo_xen, vmcoreinfo_size_xen); +} + +HYPERVISOR_ATTR_RO(vmcoreinfo); + +static int __init xen_sysfs_vmcoreinfo_init(void) +{ + return sysfs_create_file(hypervisor_kobj, + &vmcoreinfo_attr.attr); +} + +static void xen_sysfs_vmcoreinfo_destroy(void) +{ + sysfs_remove_file(hypervisor_kobj, &vmcoreinfo_attr.attr); +} + +#endif + +static int __init hyper_sysfs_init(void) +{ + int ret; + + if (!xen_domain()) + return -ENODEV; + + ret = xen_sysfs_type_init(); + if (ret) + goto out; + ret = xen_sysfs_version_init(); + if (ret) + goto version_out; + ret = xen_compilation_init(); + if (ret) + goto comp_out; + ret = xen_sysfs_uuid_init(); + if (ret) + goto uuid_out; + ret = xen_properties_init(); + if (ret) + goto prop_out; +#ifdef CONFIG_KEXEC + if (vmcoreinfo_size_xen != 0) { + ret = xen_sysfs_vmcoreinfo_init(); + if (ret) + goto vmcoreinfo_out; + } +#endif + + goto out; + +#ifdef CONFIG_KEXEC +vmcoreinfo_out: +#endif + xen_properties_destroy(); +prop_out: + xen_sysfs_uuid_destroy(); +uuid_out: + xen_compilation_destroy(); +comp_out: + xen_sysfs_version_destroy(); +version_out: + xen_sysfs_type_destroy(); +out: + return ret; +} + +static void __exit hyper_sysfs_exit(void) +{ +#ifdef CONFIG_KEXEC + if (vmcoreinfo_size_xen != 0) + xen_sysfs_vmcoreinfo_destroy(); +#endif + xen_properties_destroy(); + xen_compilation_destroy(); + xen_sysfs_uuid_destroy(); + xen_sysfs_version_destroy(); + xen_sysfs_type_destroy(); + +} +module_init(hyper_sysfs_init); +module_exit(hyper_sysfs_exit); + +static ssize_t hyp_sysfs_show(struct kobject *kobj, + struct attribute *attr, + char *buffer) +{ + struct hyp_sysfs_attr *hyp_attr; + hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr); + if (hyp_attr->show) + return hyp_attr->show(hyp_attr, buffer); + return 0; +} + +static ssize_t hyp_sysfs_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, + size_t len) +{ + struct hyp_sysfs_attr *hyp_attr; + hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr); + if (hyp_attr->store) + return hyp_attr->store(hyp_attr, buffer, len); + return 0; +} + +static struct sysfs_ops hyp_sysfs_ops = { + .show = hyp_sysfs_show, + .store = hyp_sysfs_store, +}; + +static struct kobj_type hyp_sysfs_kobj_type = { + .sysfs_ops = &hyp_sysfs_ops, +}; + +static int __init hypervisor_subsys_init(void) +{ + if (!xen_domain()) + return -ENODEV; + + hypervisor_kobj->ktype = &hyp_sysfs_kobj_type; + return 0; +} +device_initcall(hypervisor_subsys_init); diff --git a/include/xen/interface/version.h b/include/xen/interface/version.h index 453235e923f..e8b6519d47e 100644 --- a/include/xen/interface/version.h +++ b/include/xen/interface/version.h @@ -57,4 +57,7 @@ struct xen_feature_info { /* Declares the features reported by XENVER_get_features. */ #include "features.h" +/* arg == NULL; returns host memory page size. */ +#define XENVER_pagesize 7 + #endif /* __XEN_PUBLIC_VERSION_H__ */ -- cgit v1.2.3-70-g09d2 From 15dbf27cc18559a14e99609f78678aa86b9c6ff1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 13 Mar 2009 12:21:32 +0100 Subject: perf_counter: software counter event infrastructure Provide generic software counter infrastructure that supports software events. This will be used to allow sample based profiling based on software events such as pagefaults. The current infrastructure can only provide a count of such events, no place information. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 8 +- kernel/perf_counter.c | 201 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 208 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index dde564517b6..3fefc3b8150 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -126,6 +126,7 @@ struct hw_perf_counter { unsigned long counter_base; int nmi; unsigned int idx; + atomic64_t count; /* software */ atomic64_t prev_count; u64 irq_period; atomic64_t period_left; @@ -283,6 +284,8 @@ static inline int is_software_counter(struct perf_counter *counter) return !counter->hw_event.raw && counter->hw_event.type < 0; } +extern void perf_swcounter_event(enum hw_event_types, u64, int, struct pt_regs *); + #else static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } @@ -295,10 +298,13 @@ static inline void perf_counter_exit_task(struct task_struct *child) { } static inline void perf_counter_notify(struct pt_regs *regs) { } static inline void perf_counter_print_debug(void) { } static inline void perf_counter_unthrottle(void) { } -static inline void hw_perf_restore(u64 ctrl) { } +static inline void hw_perf_restore(u64 ctrl) { } static inline u64 hw_perf_save_disable(void) { return 0; } static inline int perf_counter_task_disable(void) { return -EINVAL; } static inline int perf_counter_task_enable(void) { return -EINVAL; } + +static inline void perf_swcounter_event(enum hw_event_types event, u64 nr, + int nmi, struct pt_regs *regs) { } #endif #endif /* __KERNEL__ */ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 0fe22c916e2..eeb1b46cf70 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1328,6 +1328,185 @@ static const struct file_operations perf_fops = { .compat_ioctl = perf_ioctl, }; +/* + * Generic software counter infrastructure + */ + +static void perf_swcounter_update(struct perf_counter *counter) +{ + struct hw_perf_counter *hwc = &counter->hw; + u64 prev, now; + s64 delta; + +again: + prev = atomic64_read(&hwc->prev_count); + now = atomic64_read(&hwc->count); + if (atomic64_cmpxchg(&hwc->prev_count, prev, now) != prev) + goto again; + + delta = now - prev; + + atomic64_add(delta, &counter->count); + atomic64_sub(delta, &hwc->period_left); +} + +static void perf_swcounter_set_period(struct perf_counter *counter) +{ + struct hw_perf_counter *hwc = &counter->hw; + s64 left = atomic64_read(&hwc->period_left); + s64 period = hwc->irq_period; + + if (unlikely(left <= -period)) { + left = period; + atomic64_set(&hwc->period_left, left); + } + + if (unlikely(left <= 0)) { + left += period; + atomic64_add(period, &hwc->period_left); + } + + atomic64_set(&hwc->prev_count, -left); + atomic64_set(&hwc->count, -left); +} + +static void perf_swcounter_save_and_restart(struct perf_counter *counter) +{ + perf_swcounter_update(counter); + perf_swcounter_set_period(counter); +} + +static void perf_swcounter_store_irq(struct perf_counter *counter, u64 data) +{ + struct perf_data *irqdata = counter->irqdata; + + if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) { + irqdata->overrun++; + } else { + u64 *p = (u64 *) &irqdata->data[irqdata->len]; + + *p = data; + irqdata->len += sizeof(u64); + } +} + +static void perf_swcounter_handle_group(struct perf_counter *sibling) +{ + struct perf_counter *counter, *group_leader = sibling->group_leader; + + list_for_each_entry(counter, &group_leader->sibling_list, list_entry) { + perf_swcounter_update(counter); + perf_swcounter_store_irq(sibling, counter->hw_event.type); + perf_swcounter_store_irq(sibling, atomic64_read(&counter->count)); + } +} + +static void perf_swcounter_interrupt(struct perf_counter *counter, + int nmi, struct pt_regs *regs) +{ + perf_swcounter_save_and_restart(counter); + + switch (counter->hw_event.record_type) { + case PERF_RECORD_SIMPLE: + break; + + case PERF_RECORD_IRQ: + perf_swcounter_store_irq(counter, instruction_pointer(regs)); + break; + + case PERF_RECORD_GROUP: + perf_swcounter_handle_group(counter); + break; + } + + if (nmi) { + counter->wakeup_pending = 1; + set_tsk_thread_flag(current, TIF_PERF_COUNTERS); + } else + wake_up(&counter->waitq); +} + +static int perf_swcounter_match(struct perf_counter *counter, + enum hw_event_types event, + struct pt_regs *regs) +{ + if (counter->state != PERF_COUNTER_STATE_ACTIVE) + return 0; + + if (counter->hw_event.raw) + return 0; + + if (counter->hw_event.type != event) + return 0; + + if (counter->hw_event.exclude_user && user_mode(regs)) + return 0; + + if (counter->hw_event.exclude_kernel && !user_mode(regs)) + return 0; + + return 1; +} + +static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, + enum hw_event_types event, u64 nr, + int nmi, struct pt_regs *regs) +{ + struct perf_counter *counter; + unsigned long flags; + int neg; + + if (list_empty(&ctx->counter_list)) + return; + + spin_lock_irqsave(&ctx->lock, flags); + + /* + * XXX: make counter_list RCU safe + */ + list_for_each_entry(counter, &ctx->counter_list, list_entry) { + if (perf_swcounter_match(counter, event, regs)) { + neg = atomic64_add_negative(nr, &counter->hw.count); + if (counter->hw.irq_period && !neg) + perf_swcounter_interrupt(counter, nmi, regs); + } + } + + spin_unlock_irqrestore(&ctx->lock, flags); +} + +void perf_swcounter_event(enum hw_event_types event, u64 nr, + int nmi, struct pt_regs *regs) +{ + struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); + + perf_swcounter_ctx_event(&cpuctx->ctx, event, nr, nmi, regs); + if (cpuctx->task_ctx) + perf_swcounter_ctx_event(cpuctx->task_ctx, event, nr, nmi, regs); + + put_cpu_var(perf_cpu_context); +} + +static void perf_swcounter_read(struct perf_counter *counter) +{ + perf_swcounter_update(counter); +} + +static int perf_swcounter_enable(struct perf_counter *counter) +{ + perf_swcounter_set_period(counter); + return 0; +} + +static void perf_swcounter_disable(struct perf_counter *counter) +{ + perf_swcounter_update(counter); +} + +/* + * Software counter: cpu wall time clock + */ + static int cpu_clock_perf_counter_enable(struct perf_counter *counter) { int cpu = raw_smp_processor_id(); @@ -1364,6 +1543,10 @@ static const struct hw_perf_counter_ops perf_ops_cpu_clock = { .read = cpu_clock_perf_counter_read, }; +/* + * Software counter: task time clock + */ + /* * Called from within the scheduler: */ @@ -1420,6 +1603,10 @@ static const struct hw_perf_counter_ops perf_ops_task_clock = { .read = task_clock_perf_counter_read, }; +/* + * Software counter: page faults + */ + #ifdef CONFIG_VM_EVENT_COUNTERS #define cpu_page_faults() __get_cpu_var(vm_event_states).event[PGFAULT] #else @@ -1473,6 +1660,10 @@ static const struct hw_perf_counter_ops perf_ops_page_faults = { .read = page_faults_perf_counter_read, }; +/* + * Software counter: context switches + */ + static u64 get_context_switches(struct perf_counter *counter) { struct task_struct *curr = counter->ctx->task; @@ -1521,6 +1712,10 @@ static const struct hw_perf_counter_ops perf_ops_context_switches = { .read = context_switches_perf_counter_read, }; +/* + * Software counter: cpu migrations + */ + static inline u64 get_cpu_migrations(struct perf_counter *counter) { struct task_struct *curr = counter->ctx->task; @@ -1572,7 +1767,9 @@ static const struct hw_perf_counter_ops perf_ops_cpu_migrations = { static const struct hw_perf_counter_ops * sw_perf_counter_init(struct perf_counter *counter) { + struct perf_counter_hw_event *hw_event = &counter->hw_event; const struct hw_perf_counter_ops *hw_ops = NULL; + struct hw_perf_counter *hwc = &counter->hw; /* * Software counters (currently) can't in general distinguish @@ -1618,6 +1815,10 @@ sw_perf_counter_init(struct perf_counter *counter) default: break; } + + if (hw_ops) + hwc->irq_period = hw_event->irq_period; + return hw_ops; } -- cgit v1.2.3-70-g09d2 From ac17dc8e58f3069ea895cfff963adf98ff3cf6b2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 13 Mar 2009 12:21:34 +0100 Subject: perf_counter: provide major/minor page fault software events Provide separate sw counters for major and minor page faults. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/powerpc/mm/fault.c | 5 ++++- arch/x86/mm/fault.c | 7 +++++-- include/linux/perf_counter.h | 4 +++- kernel/perf_counter.c | 22 +++++++++------------- 4 files changed, 21 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index eda5b0ca4af..17bbf6f91fb 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -312,6 +312,7 @@ good_area: } if (ret & VM_FAULT_MAJOR) { current->maj_flt++; + perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, regs); #ifdef CONFIG_PPC_SMLPAR if (firmware_has_feature(FW_FEATURE_CMO)) { preempt_disable(); @@ -319,8 +320,10 @@ good_area: preempt_enable(); } #endif - } else + } else { current->min_flt++; + perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, regs); + } up_read(&mm->mmap_sem); return 0; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index c8725752b6c..f2d3324d921 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1140,10 +1140,13 @@ good_area: return; } - if (fault & VM_FAULT_MAJOR) + if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; - else + perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, regs); + } else { tsk->min_flt++; + perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, regs); + } check_v8086_mode(regs, address, tsk); diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 3fefc3b8150..4b14a8e9dbf 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -49,8 +49,10 @@ enum hw_event_types { PERF_COUNT_PAGE_FAULTS = -3, PERF_COUNT_CONTEXT_SWITCHES = -4, PERF_COUNT_CPU_MIGRATIONS = -5, + PERF_COUNT_PAGE_FAULTS_MIN = -6, + PERF_COUNT_PAGE_FAULTS_MAJ = -7, - PERF_SW_EVENTS_MIN = -6, + PERF_SW_EVENTS_MIN = -8, }; /* diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 1773c5d7427..68950a3a52b 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1503,6 +1503,12 @@ static void perf_swcounter_disable(struct perf_counter *counter) perf_swcounter_update(counter); } +static const struct hw_perf_counter_ops perf_ops_generic = { + .enable = perf_swcounter_enable, + .disable = perf_swcounter_disable, + .read = perf_swcounter_read, +}; + /* * Software counter: cpu wall time clock */ @@ -1603,16 +1609,6 @@ static const struct hw_perf_counter_ops perf_ops_task_clock = { .read = task_clock_perf_counter_read, }; -/* - * Software counter: page faults - */ - -static const struct hw_perf_counter_ops perf_ops_page_faults = { - .enable = perf_swcounter_enable, - .disable = perf_swcounter_disable, - .read = perf_swcounter_read, -}; - /* * Software counter: context switches */ @@ -1753,9 +1749,9 @@ sw_perf_counter_init(struct perf_counter *counter) hw_ops = &perf_ops_cpu_clock; break; case PERF_COUNT_PAGE_FAULTS: - if (!(counter->hw_event.exclude_user || - counter->hw_event.exclude_kernel)) - hw_ops = &perf_ops_page_faults; + case PERF_COUNT_PAGE_FAULTS_MIN: + case PERF_COUNT_PAGE_FAULTS_MAJ: + hw_ops = &perf_ops_generic; break; case PERF_COUNT_CONTEXT_SWITCHES: if (!counter->hw_event.exclude_kernel) -- cgit v1.2.3-70-g09d2 From d6d020e9957745c61285ef3da9f294c5e6801f0f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 13 Mar 2009 12:21:35 +0100 Subject: perf_counter: hrtimer based sampling for software time events Use hrtimers to profile timer based sampling for the software time counters. This allows platforms without hardware counter support to still perform sample based profiling. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 20 ++++--- kernel/perf_counter.c | 123 ++++++++++++++++++++++++++++++------------- 2 files changed, 100 insertions(+), 43 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 4b14a8e9dbf..dfb4c7ce18b 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -114,6 +114,7 @@ struct perf_counter_hw_event { #include #include #include +#include #include struct task_struct; @@ -123,12 +124,19 @@ struct task_struct; */ struct hw_perf_counter { #ifdef CONFIG_PERF_COUNTERS - u64 config; - unsigned long config_base; - unsigned long counter_base; - int nmi; - unsigned int idx; - atomic64_t count; /* software */ + union { + struct { /* hardware */ + u64 config; + unsigned long config_base; + unsigned long counter_base; + int nmi; + unsigned int idx; + }; + union { /* software */ + atomic64_t count; + struct hrtimer hrtimer; + }; + }; atomic64_t prev_count; u64 irq_period; atomic64_t period_left; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 68950a3a52b..f9330d5827c 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1395,7 +1395,7 @@ static void perf_swcounter_handle_group(struct perf_counter *sibling) struct perf_counter *counter, *group_leader = sibling->group_leader; list_for_each_entry(counter, &group_leader->sibling_list, list_entry) { - perf_swcounter_update(counter); + counter->hw_ops->read(counter); perf_swcounter_store_irq(sibling, counter->hw_event.type); perf_swcounter_store_irq(sibling, atomic64_read(&counter->count)); } @@ -1404,8 +1404,6 @@ static void perf_swcounter_handle_group(struct perf_counter *sibling) static void perf_swcounter_interrupt(struct perf_counter *counter, int nmi, struct pt_regs *regs) { - perf_swcounter_save_and_restart(counter); - switch (counter->hw_event.record_type) { case PERF_RECORD_SIMPLE: break; @@ -1426,6 +1424,38 @@ static void perf_swcounter_interrupt(struct perf_counter *counter, wake_up(&counter->waitq); } +static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) +{ + struct perf_counter *counter; + struct pt_regs *regs; + + counter = container_of(hrtimer, struct perf_counter, hw.hrtimer); + counter->hw_ops->read(counter); + + regs = get_irq_regs(); + /* + * In case we exclude kernel IPs or are somehow not in interrupt + * context, provide the next best thing, the user IP. + */ + if ((counter->hw_event.exclude_kernel || !regs) && + !counter->hw_event.exclude_user) + regs = task_pt_regs(current); + + if (regs) + perf_swcounter_interrupt(counter, 0, regs); + + hrtimer_forward_now(hrtimer, ns_to_ktime(counter->hw.irq_period)); + + return HRTIMER_RESTART; +} + +static void perf_swcounter_overflow(struct perf_counter *counter, + int nmi, struct pt_regs *regs) +{ + perf_swcounter_save_and_restart(counter); + perf_swcounter_interrupt(counter, nmi, regs); +} + static int perf_swcounter_match(struct perf_counter *counter, enum hw_event_types event, struct pt_regs *regs) @@ -1448,13 +1478,20 @@ static int perf_swcounter_match(struct perf_counter *counter, return 1; } +static void perf_swcounter_add(struct perf_counter *counter, u64 nr, + int nmi, struct pt_regs *regs) +{ + int neg = atomic64_add_negative(nr, &counter->hw.count); + if (counter->hw.irq_period && !neg) + perf_swcounter_overflow(counter, nmi, regs); +} + static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, enum hw_event_types event, u64 nr, int nmi, struct pt_regs *regs) { struct perf_counter *counter; unsigned long flags; - int neg; if (list_empty(&ctx->counter_list)) return; @@ -1465,11 +1502,8 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, * XXX: make counter_list RCU safe */ list_for_each_entry(counter, &ctx->counter_list, list_entry) { - if (perf_swcounter_match(counter, event, regs)) { - neg = atomic64_add_negative(nr, &counter->hw.count); - if (counter->hw.irq_period && !neg) - perf_swcounter_interrupt(counter, nmi, regs); - } + if (perf_swcounter_match(counter, event, regs)) + perf_swcounter_add(counter, nr, nmi, regs); } spin_unlock_irqrestore(&ctx->lock, flags); @@ -1513,14 +1547,6 @@ static const struct hw_perf_counter_ops perf_ops_generic = { * Software counter: cpu wall time clock */ -static int cpu_clock_perf_counter_enable(struct perf_counter *counter) -{ - int cpu = raw_smp_processor_id(); - - atomic64_set(&counter->hw.prev_count, cpu_clock(cpu)); - return 0; -} - static void cpu_clock_perf_counter_update(struct perf_counter *counter) { int cpu = raw_smp_processor_id(); @@ -1533,8 +1559,26 @@ static void cpu_clock_perf_counter_update(struct perf_counter *counter) atomic64_add(now - prev, &counter->count); } +static int cpu_clock_perf_counter_enable(struct perf_counter *counter) +{ + struct hw_perf_counter *hwc = &counter->hw; + int cpu = raw_smp_processor_id(); + + atomic64_set(&hwc->prev_count, cpu_clock(cpu)); + if (hwc->irq_period) { + hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hwc->hrtimer.function = perf_swcounter_hrtimer; + __hrtimer_start_range_ns(&hwc->hrtimer, + ns_to_ktime(hwc->irq_period), 0, + HRTIMER_MODE_REL, 0); + } + + return 0; +} + static void cpu_clock_perf_counter_disable(struct perf_counter *counter) { + hrtimer_cancel(&counter->hw.hrtimer); cpu_clock_perf_counter_update(counter); } @@ -1580,27 +1624,33 @@ static void task_clock_perf_counter_update(struct perf_counter *counter, u64 now atomic64_add(delta, &counter->count); } -static void task_clock_perf_counter_read(struct perf_counter *counter) -{ - u64 now = task_clock_perf_counter_val(counter, 1); - - task_clock_perf_counter_update(counter, now); -} - static int task_clock_perf_counter_enable(struct perf_counter *counter) { - if (counter->prev_state <= PERF_COUNTER_STATE_OFF) - atomic64_set(&counter->hw.prev_count, - task_clock_perf_counter_val(counter, 0)); + struct hw_perf_counter *hwc = &counter->hw; + + atomic64_set(&hwc->prev_count, task_clock_perf_counter_val(counter, 0)); + if (hwc->irq_period) { + hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hwc->hrtimer.function = perf_swcounter_hrtimer; + __hrtimer_start_range_ns(&hwc->hrtimer, + ns_to_ktime(hwc->irq_period), 0, + HRTIMER_MODE_REL, 0); + } return 0; } static void task_clock_perf_counter_disable(struct perf_counter *counter) { - u64 now = task_clock_perf_counter_val(counter, 0); + hrtimer_cancel(&counter->hw.hrtimer); + task_clock_perf_counter_update(counter, + task_clock_perf_counter_val(counter, 0)); +} - task_clock_perf_counter_update(counter, now); +static void task_clock_perf_counter_read(struct perf_counter *counter) +{ + task_clock_perf_counter_update(counter, + task_clock_perf_counter_val(counter, 1)); } static const struct hw_perf_counter_ops perf_ops_task_clock = { @@ -1729,16 +1779,12 @@ sw_perf_counter_init(struct perf_counter *counter) */ switch (counter->hw_event.type) { case PERF_COUNT_CPU_CLOCK: - if (!(counter->hw_event.exclude_user || - counter->hw_event.exclude_kernel || - counter->hw_event.exclude_hv)) - hw_ops = &perf_ops_cpu_clock; + hw_ops = &perf_ops_cpu_clock; + + if (hw_event->irq_period && hw_event->irq_period < 10000) + hw_event->irq_period = 10000; break; case PERF_COUNT_TASK_CLOCK: - if (counter->hw_event.exclude_user || - counter->hw_event.exclude_kernel || - counter->hw_event.exclude_hv) - break; /* * If the user instantiates this as a per-cpu counter, * use the cpu_clock counter instead. @@ -1747,6 +1793,9 @@ sw_perf_counter_init(struct perf_counter *counter) hw_ops = &perf_ops_task_clock; else hw_ops = &perf_ops_cpu_clock; + + if (hw_event->irq_period && hw_event->irq_period < 10000) + hw_event->irq_period = 10000; break; case PERF_COUNT_PAGE_FAULTS: case PERF_COUNT_PAGE_FAULTS_MIN: -- cgit v1.2.3-70-g09d2 From 592903cdcbf606a838056bae6d03fc557806c914 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 13 Mar 2009 12:21:36 +0100 Subject: perf_counter: add an event_list I noticed that the counter_list only includes top-level counters, thus perf_swcounter_event() will miss sw-counters in groups. Since perf_swcounter_event() also wants an RCU safe list, create a new event_list that includes all counters and uses RCU list ops and use call_rcu to free the counter structure. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 4 ++++ kernel/perf_counter.c | 30 +++++++++++++++++++----------- 2 files changed, 23 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index dfb4c7ce18b..08c11a6afeb 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -187,6 +187,7 @@ struct file; struct perf_counter { #ifdef CONFIG_PERF_COUNTERS struct list_head list_entry; + struct list_head event_entry; struct list_head sibling_list; struct perf_counter *group_leader; const struct hw_perf_counter_ops *hw_ops; @@ -220,6 +221,8 @@ struct perf_counter { struct perf_data *irqdata; struct perf_data *usrdata; struct perf_data data[2]; + + struct rcu_head rcu_head; #endif }; @@ -243,6 +246,7 @@ struct perf_counter_context { struct mutex mutex; struct list_head counter_list; + struct list_head event_list; int nr_counters; int nr_active; int is_active; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index f9330d5827c..8d6ecfa64c0 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -22,6 +22,7 @@ #include #include #include +#include /* * Each CPU has a list of per CPU counters: @@ -72,6 +73,8 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) list_add_tail(&counter->list_entry, &ctx->counter_list); else list_add_tail(&counter->list_entry, &group_leader->sibling_list); + + list_add_rcu(&counter->event_entry, &ctx->event_list); } static void @@ -80,6 +83,7 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx) struct perf_counter *sibling, *tmp; list_del_init(&counter->list_entry); + list_del_rcu(&counter->event_entry); /* * If this was a group counter with sibling counters then @@ -1133,6 +1137,14 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu) return ctx; } +static void free_counter_rcu(struct rcu_head *head) +{ + struct perf_counter *counter; + + counter = container_of(head, struct perf_counter, rcu_head); + kfree(counter); +} + /* * Called when the last reference to the file is gone. */ @@ -1151,7 +1163,7 @@ static int perf_release(struct inode *inode, struct file *file) mutex_unlock(&counter->mutex); mutex_unlock(&ctx->mutex); - kfree(counter); + call_rcu(&counter->rcu_head, free_counter_rcu); put_context(ctx); return 0; @@ -1491,22 +1503,16 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, int nmi, struct pt_regs *regs) { struct perf_counter *counter; - unsigned long flags; - if (list_empty(&ctx->counter_list)) + if (list_empty(&ctx->event_list)) return; - spin_lock_irqsave(&ctx->lock, flags); - - /* - * XXX: make counter_list RCU safe - */ - list_for_each_entry(counter, &ctx->counter_list, list_entry) { + rcu_read_lock(); + list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { if (perf_swcounter_match(counter, event, regs)) perf_swcounter_add(counter, nr, nmi, regs); } - - spin_unlock_irqrestore(&ctx->lock, flags); + rcu_read_unlock(); } void perf_swcounter_event(enum hw_event_types event, u64 nr, @@ -1846,6 +1852,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, mutex_init(&counter->mutex); INIT_LIST_HEAD(&counter->list_entry); + INIT_LIST_HEAD(&counter->event_entry); INIT_LIST_HEAD(&counter->sibling_list); init_waitqueue_head(&counter->waitq); @@ -1992,6 +1999,7 @@ __perf_counter_init_context(struct perf_counter_context *ctx, spin_lock_init(&ctx->lock); mutex_init(&ctx->mutex); INIT_LIST_HEAD(&ctx->counter_list); + INIT_LIST_HEAD(&ctx->event_list); ctx->task = task; } -- cgit v1.2.3-70-g09d2 From 01ef09d9ffb5ce9f8d62d1e5206da3d5ca612acc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Mar 2009 20:26:11 +0100 Subject: perf_counter: fix uninitialized usage of event_list Impact: fix boot crash When doing the generic context switch event I ran into some early boot hangs, which were caused by inf func recursion (event, fault, event, fault). I eventually tracked it down to event_list not being initialized at the time of the first event. Fix this. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Steven Rostedt Orig-LKML-Reference: <20090319194233.195392657@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 2 ++ kernel/perf_counter.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 219748d0026..ca226a91abe 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -124,6 +124,8 @@ extern struct cred init_cred; # define INIT_PERF_COUNTERS(tsk) \ .perf_counter_ctx.counter_list = \ LIST_HEAD_INIT(tsk.perf_counter_ctx.counter_list), \ + .perf_counter_ctx.event_list = \ + LIST_HEAD_INIT(tsk.perf_counter_ctx.event_list), \ .perf_counter_ctx.lock = \ __SPIN_LOCK_UNLOCKED(tsk.perf_counter_ctx.lock), #else diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index b39456ad74a..4c4e9eb37ab 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1506,7 +1506,7 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, { struct perf_counter *counter; - if (list_empty(&ctx->event_list)) + if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) return; rcu_read_lock(); -- cgit v1.2.3-70-g09d2 From 4a0deca657f3dbb8a707b5dc8f173beec01e7ed2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Mar 2009 20:26:12 +0100 Subject: perf_counter: generic context switch event Impact: cleanup Use the generic software events for context switches. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Steven Rostedt Orig-LKML-Reference: <20090319194233.283522645@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 - kernel/perf_counter.c | 60 ++++----------------------------------------------- kernel/sched.c | 6 ------ 3 files changed, 4 insertions(+), 63 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 75b2fc5306d..7ed41f7c5ac 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -138,7 +138,6 @@ extern unsigned long nr_running(void); extern unsigned long nr_uninterruptible(void); extern unsigned long nr_active(void); extern unsigned long nr_iowait(void); -extern u64 cpu_nr_switches(int cpu); extern u64 cpu_nr_migrations(int cpu); extern unsigned long get_parent_ip(unsigned long addr); diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 4c4e9eb37ab..99d5930f0a5 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -710,10 +710,13 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu) { struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); struct perf_counter_context *ctx = &task->perf_counter_ctx; + struct pt_regs *regs; if (likely(!cpuctx->task_ctx)) return; + regs = task_pt_regs(task); + perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs); __perf_counter_sched_out(ctx, cpuctx); cpuctx->task_ctx = NULL; @@ -1667,58 +1670,6 @@ static const struct hw_perf_counter_ops perf_ops_task_clock = { .read = task_clock_perf_counter_read, }; -/* - * Software counter: context switches - */ - -static u64 get_context_switches(struct perf_counter *counter) -{ - struct task_struct *curr = counter->ctx->task; - - if (curr) - return curr->nvcsw + curr->nivcsw; - return cpu_nr_switches(smp_processor_id()); -} - -static void context_switches_perf_counter_update(struct perf_counter *counter) -{ - u64 prev, now; - s64 delta; - - prev = atomic64_read(&counter->hw.prev_count); - now = get_context_switches(counter); - - atomic64_set(&counter->hw.prev_count, now); - - delta = now - prev; - - atomic64_add(delta, &counter->count); -} - -static void context_switches_perf_counter_read(struct perf_counter *counter) -{ - context_switches_perf_counter_update(counter); -} - -static int context_switches_perf_counter_enable(struct perf_counter *counter) -{ - if (counter->prev_state <= PERF_COUNTER_STATE_OFF) - atomic64_set(&counter->hw.prev_count, - get_context_switches(counter)); - return 0; -} - -static void context_switches_perf_counter_disable(struct perf_counter *counter) -{ - context_switches_perf_counter_update(counter); -} - -static const struct hw_perf_counter_ops perf_ops_context_switches = { - .enable = context_switches_perf_counter_enable, - .disable = context_switches_perf_counter_disable, - .read = context_switches_perf_counter_read, -}; - /* * Software counter: cpu migrations */ @@ -1808,11 +1759,8 @@ sw_perf_counter_init(struct perf_counter *counter) case PERF_COUNT_PAGE_FAULTS: case PERF_COUNT_PAGE_FAULTS_MIN: case PERF_COUNT_PAGE_FAULTS_MAJ: - hw_ops = &perf_ops_generic; - break; case PERF_COUNT_CONTEXT_SWITCHES: - if (!counter->hw_event.exclude_kernel) - hw_ops = &perf_ops_context_switches; + hw_ops = &perf_ops_generic; break; case PERF_COUNT_CPU_MIGRATIONS: if (!counter->hw_event.exclude_kernel) diff --git a/kernel/sched.c b/kernel/sched.c index 39e70860216..f76e3c0188a 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2900,14 +2900,8 @@ unsigned long nr_active(void) /* * Externally visible per-cpu scheduler statistics: - * cpu_nr_switches(cpu) - number of context switches on that cpu * cpu_nr_migrations(cpu) - number of migrations into that cpu */ -u64 cpu_nr_switches(int cpu) -{ - return cpu_rq(cpu)->nr_switches; -} - u64 cpu_nr_migrations(int cpu) { return cpu_rq(cpu)->nr_migrations_in; -- cgit v1.2.3-70-g09d2 From e077df4f439681e43f0db8255b2d215b342ebdc6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Mar 2009 20:26:17 +0100 Subject: perf_counter: hook up the tracepoint events Impact: new perfcounters feature Enable usage of tracepoints as perf counter events. tracepoint event ids can be found in /debug/tracing/event/*/*/id and (for now) are represented as -65536+id in the type field. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Steven Rostedt Orig-LKML-Reference: <20090319194233.744044174@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 3 +++ init/Kconfig | 5 +++++ kernel/perf_counter.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 08c11a6afeb..065984c1ff5 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -53,6 +53,8 @@ enum hw_event_types { PERF_COUNT_PAGE_FAULTS_MAJ = -7, PERF_SW_EVENTS_MIN = -8, + + PERF_TP_EVENTS_MIN = -65536 }; /* @@ -222,6 +224,7 @@ struct perf_counter { struct perf_data *usrdata; struct perf_data data[2]; + void (*destroy)(struct perf_counter *); struct rcu_head rcu_head; #endif }; diff --git a/init/Kconfig b/init/Kconfig index 38a2ecd47c3..4f647142f2e 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -947,6 +947,11 @@ config PERF_COUNTERS Say Y if unsure. +config EVENT_PROFILE + bool "Tracepoint profile sources" + depends on PERF_COUNTERS && EVENT_TRACER + default y + endmenu config VM_EVENT_COUNTERS diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 97f891ffeb4..0bbe3e45ba0 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1152,6 +1152,9 @@ static void free_counter_rcu(struct rcu_head *head) static void free_counter(struct perf_counter *counter) { + if (counter->destroy) + counter->destroy(counter); + call_rcu(&counter->rcu_head, free_counter_rcu); } @@ -1727,6 +1730,45 @@ static const struct hw_perf_counter_ops perf_ops_cpu_migrations = { .read = cpu_migrations_perf_counter_read, }; +#ifdef CONFIG_EVENT_PROFILE +void perf_tpcounter_event(int event_id) +{ + perf_swcounter_event(PERF_TP_EVENTS_MIN + event_id, 1, 1, + task_pt_regs(current)); +} + +extern int ftrace_profile_enable(int); +extern void ftrace_profile_disable(int); + +static void tp_perf_counter_destroy(struct perf_counter *counter) +{ + int event_id = counter->hw_event.type - PERF_TP_EVENTS_MIN; + + ftrace_profile_disable(event_id); +} + +static const struct hw_perf_counter_ops * +tp_perf_counter_init(struct perf_counter *counter) +{ + int event_id = counter->hw_event.type - PERF_TP_EVENTS_MIN; + int ret; + + ret = ftrace_profile_enable(event_id); + if (ret) + return NULL; + + counter->destroy = tp_perf_counter_destroy; + + return &perf_ops_generic; +} +#else +static const struct hw_perf_counter_ops * +tp_perf_counter_init(struct perf_counter *counter) +{ + return NULL; +} +#endif + static const struct hw_perf_counter_ops * sw_perf_counter_init(struct perf_counter *counter) { @@ -1772,6 +1814,7 @@ sw_perf_counter_init(struct perf_counter *counter) hw_ops = &perf_ops_cpu_migrations; break; default: + hw_ops = tp_perf_counter_init(counter); break; } -- cgit v1.2.3-70-g09d2 From b8e83514b64577b48bfb794fe85fcde40a9343ca Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Mar 2009 20:26:18 +0100 Subject: perf_counter: revamp syscall input ABI Impact: modify ABI The hardware/software classification in hw_event->type became a little strained due to the addition of tracepoint tracing. Instead split up the field and provide a type field to explicitly specify the counter type, while using the event_id field to specify which event to use. Raw counters still work as before, only the raw config now goes into raw_event. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Steven Rostedt Orig-LKML-Reference: <20090319194233.836807573@chello.nl> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 4 +- arch/x86/kernel/cpu/perf_counter.c | 10 ++-- include/linux/perf_counter.h | 95 ++++++++++++++++++++++++-------------- kernel/perf_counter.c | 83 ++++++++++++++++++++------------- 4 files changed, 117 insertions(+), 75 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 5008762e8bf..26f69dc7130 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -602,7 +602,7 @@ hw_perf_counter_init(struct perf_counter *counter) return NULL; if ((s64)counter->hw_event.irq_period < 0) return NULL; - ev = counter->hw_event.type; + ev = counter->hw_event.event_id; if (!counter->hw_event.raw) { if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) @@ -692,7 +692,7 @@ static void perf_handle_group(struct perf_counter *counter) list_for_each_entry(sub, &leader->sibling_list, list_entry) { if (sub != counter) sub->hw_ops->read(sub); - perf_store_irq_data(counter, sub->hw_event.type); + perf_store_irq_data(counter, sub->hw_event.event_config); perf_store_irq_data(counter, atomic64_read(&sub->count)); } } diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 6cba9d47b71..d844ae41d5a 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -217,15 +217,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter) /* * Raw event type provide the config in the event structure */ - if (hw_event->raw) { - hwc->config |= pmc_ops->raw_event(hw_event->type); + if (hw_event->raw_type) { + hwc->config |= pmc_ops->raw_event(hw_event->raw_event_id); } else { - if (hw_event->type >= pmc_ops->max_events) + if (hw_event->event_id >= pmc_ops->max_events) return -EINVAL; /* * The generic map: */ - hwc->config |= pmc_ops->event_map(hw_event->type); + hwc->config |= pmc_ops->event_map(hw_event->event_id); } counter->wakeup_pending = 0; @@ -715,7 +715,7 @@ perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown) list_for_each_entry(counter, &group_leader->sibling_list, list_entry) { x86_perf_counter_update(counter, &counter->hw, counter->hw.idx); - perf_store_irq_data(sibling, counter->hw_event.type); + perf_store_irq_data(sibling, counter->hw_event.event_config); perf_store_irq_data(sibling, atomic64_read(&counter->count)); } } diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 065984c1ff5..8f939490550 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -21,56 +21,81 @@ */ /* - * Generalized performance counter event types, used by the hw_event.type - * parameter of the sys_perf_counter_open() syscall: + * hw_event.type */ -enum hw_event_types { +enum perf_event_types { + PERF_TYPE_HARDWARE = 0, + PERF_TYPE_SOFTWARE = 1, + PERF_TYPE_TRACEPOINT = 2, + /* - * Common hardware events, generalized by the kernel: + * available TYPE space, raw is the max value. */ - PERF_COUNT_CPU_CYCLES = 0, - PERF_COUNT_INSTRUCTIONS = 1, - PERF_COUNT_CACHE_REFERENCES = 2, - PERF_COUNT_CACHE_MISSES = 3, - PERF_COUNT_BRANCH_INSTRUCTIONS = 4, - PERF_COUNT_BRANCH_MISSES = 5, - PERF_COUNT_BUS_CYCLES = 6, - PERF_HW_EVENTS_MAX = 7, + PERF_TYPE_RAW = 128, +}; +/* + * Generalized performance counter event types, used by the hw_event.event_id + * parameter of the sys_perf_counter_open() syscall: + */ +enum hw_event_ids { /* - * Special "software" counters provided by the kernel, even if - * the hardware does not support performance counters. These - * counters measure various physical and sw events of the - * kernel (and allow the profiling of them as well): + * Common hardware events, generalized by the kernel: */ - PERF_COUNT_CPU_CLOCK = -1, - PERF_COUNT_TASK_CLOCK = -2, - PERF_COUNT_PAGE_FAULTS = -3, - PERF_COUNT_CONTEXT_SWITCHES = -4, - PERF_COUNT_CPU_MIGRATIONS = -5, - PERF_COUNT_PAGE_FAULTS_MIN = -6, - PERF_COUNT_PAGE_FAULTS_MAJ = -7, - - PERF_SW_EVENTS_MIN = -8, + PERF_COUNT_CPU_CYCLES = 0, + PERF_COUNT_INSTRUCTIONS = 1, + PERF_COUNT_CACHE_REFERENCES = 2, + PERF_COUNT_CACHE_MISSES = 3, + PERF_COUNT_BRANCH_INSTRUCTIONS = 4, + PERF_COUNT_BRANCH_MISSES = 5, + PERF_COUNT_BUS_CYCLES = 6, + + PERF_HW_EVENTS_MAX = 7, +}; - PERF_TP_EVENTS_MIN = -65536 +/* + * Special "software" counters provided by the kernel, even if the hardware + * does not support performance counters. These counters measure various + * physical and sw events of the kernel (and allow the profiling of them as + * well): + */ +enum sw_event_ids { + PERF_COUNT_CPU_CLOCK = 0, + PERF_COUNT_TASK_CLOCK = 1, + PERF_COUNT_PAGE_FAULTS = 2, + PERF_COUNT_CONTEXT_SWITCHES = 3, + PERF_COUNT_CPU_MIGRATIONS = 4, + PERF_COUNT_PAGE_FAULTS_MIN = 5, + PERF_COUNT_PAGE_FAULTS_MAJ = 6, + + PERF_SW_EVENTS_MAX = 7, }; /* * IRQ-notification data record type: */ enum perf_counter_record_type { - PERF_RECORD_SIMPLE = 0, - PERF_RECORD_IRQ = 1, - PERF_RECORD_GROUP = 2, + PERF_RECORD_SIMPLE = 0, + PERF_RECORD_IRQ = 1, + PERF_RECORD_GROUP = 2, }; /* * Hardware event to monitor via a performance monitoring counter: */ struct perf_counter_hw_event { - __s64 type; + union { + struct { + __u64 event_id : 56, + type : 8; + }; + struct { + __u64 raw_event_id : 63, + raw_type : 1; + }; + __u64 event_config; + }; __u64 irq_period; __u64 record_type; @@ -78,7 +103,6 @@ struct perf_counter_hw_event { __u64 disabled : 1, /* off by default */ nmi : 1, /* NMI sampling */ - raw : 1, /* raw event type */ inherit : 1, /* children inherit it */ pinned : 1, /* must always be on PMU */ exclusive : 1, /* only group on PMU */ @@ -87,7 +111,7 @@ struct perf_counter_hw_event { exclude_hv : 1, /* ditto hypervisor */ exclude_idle : 1, /* don't count when idle */ - __reserved_1 : 54; + __reserved_1 : 55; __u32 extra_config_len; __u32 __reserved_4; @@ -298,10 +322,11 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader, */ static inline int is_software_counter(struct perf_counter *counter) { - return !counter->hw_event.raw && counter->hw_event.type < 0; + return !counter->hw_event.raw_type && + counter->hw_event.type != PERF_TYPE_HARDWARE; } -extern void perf_swcounter_event(enum hw_event_types, u64, int, struct pt_regs *); +extern void perf_swcounter_event(u32, u64, int, struct pt_regs *); #else static inline void @@ -320,7 +345,7 @@ static inline u64 hw_perf_save_disable(void) { return 0; } static inline int perf_counter_task_disable(void) { return -EINVAL; } static inline int perf_counter_task_enable(void) { return -EINVAL; } -static inline void perf_swcounter_event(enum hw_event_types event, u64 nr, +static inline void perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs) { } #endif diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 0bbe3e45ba0..68a56a68bc7 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1395,12 +1395,6 @@ static void perf_swcounter_set_period(struct perf_counter *counter) atomic64_set(&hwc->count, -left); } -static void perf_swcounter_save_and_restart(struct perf_counter *counter) -{ - perf_swcounter_update(counter); - perf_swcounter_set_period(counter); -} - static void perf_swcounter_store_irq(struct perf_counter *counter, u64 data) { struct perf_data *irqdata = counter->irqdata; @@ -1421,7 +1415,7 @@ static void perf_swcounter_handle_group(struct perf_counter *sibling) list_for_each_entry(counter, &group_leader->sibling_list, list_entry) { counter->hw_ops->read(counter); - perf_swcounter_store_irq(sibling, counter->hw_event.type); + perf_swcounter_store_irq(sibling, counter->hw_event.event_config); perf_swcounter_store_irq(sibling, atomic64_read(&counter->count)); } } @@ -1477,21 +1471,25 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) static void perf_swcounter_overflow(struct perf_counter *counter, int nmi, struct pt_regs *regs) { - perf_swcounter_save_and_restart(counter); + perf_swcounter_update(counter); + perf_swcounter_set_period(counter); perf_swcounter_interrupt(counter, nmi, regs); } static int perf_swcounter_match(struct perf_counter *counter, - enum hw_event_types event, - struct pt_regs *regs) + enum perf_event_types type, + u32 event, struct pt_regs *regs) { if (counter->state != PERF_COUNTER_STATE_ACTIVE) return 0; - if (counter->hw_event.raw) + if (counter->hw_event.raw_type) + return 0; + + if (counter->hw_event.type != type) return 0; - if (counter->hw_event.type != event) + if (counter->hw_event.event_id != event) return 0; if (counter->hw_event.exclude_user && user_mode(regs)) @@ -1512,8 +1510,8 @@ static void perf_swcounter_add(struct perf_counter *counter, u64 nr, } static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, - enum hw_event_types event, u64 nr, - int nmi, struct pt_regs *regs) + enum perf_event_types type, u32 event, + u64 nr, int nmi, struct pt_regs *regs) { struct perf_counter *counter; @@ -1522,24 +1520,31 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, rcu_read_lock(); list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { - if (perf_swcounter_match(counter, event, regs)) + if (perf_swcounter_match(counter, type, event, regs)) perf_swcounter_add(counter, nr, nmi, regs); } rcu_read_unlock(); } -void perf_swcounter_event(enum hw_event_types event, u64 nr, - int nmi, struct pt_regs *regs) +static void __perf_swcounter_event(enum perf_event_types type, u32 event, + u64 nr, int nmi, struct pt_regs *regs) { struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); - perf_swcounter_ctx_event(&cpuctx->ctx, event, nr, nmi, regs); - if (cpuctx->task_ctx) - perf_swcounter_ctx_event(cpuctx->task_ctx, event, nr, nmi, regs); + perf_swcounter_ctx_event(&cpuctx->ctx, type, event, nr, nmi, regs); + if (cpuctx->task_ctx) { + perf_swcounter_ctx_event(cpuctx->task_ctx, type, event, + nr, nmi, regs); + } put_cpu_var(perf_cpu_context); } +void perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs) +{ + __perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, regs); +} + static void perf_swcounter_read(struct perf_counter *counter) { perf_swcounter_update(counter); @@ -1733,8 +1738,12 @@ static const struct hw_perf_counter_ops perf_ops_cpu_migrations = { #ifdef CONFIG_EVENT_PROFILE void perf_tpcounter_event(int event_id) { - perf_swcounter_event(PERF_TP_EVENTS_MIN + event_id, 1, 1, - task_pt_regs(current)); + struct pt_regs *regs = get_irq_regs(); + + if (!regs) + regs = task_pt_regs(current); + + __perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, regs); } extern int ftrace_profile_enable(int); @@ -1742,15 +1751,13 @@ extern void ftrace_profile_disable(int); static void tp_perf_counter_destroy(struct perf_counter *counter) { - int event_id = counter->hw_event.type - PERF_TP_EVENTS_MIN; - - ftrace_profile_disable(event_id); + ftrace_profile_disable(counter->hw_event.event_id); } static const struct hw_perf_counter_ops * tp_perf_counter_init(struct perf_counter *counter) { - int event_id = counter->hw_event.type - PERF_TP_EVENTS_MIN; + int event_id = counter->hw_event.event_id; int ret; ret = ftrace_profile_enable(event_id); @@ -1758,6 +1765,7 @@ tp_perf_counter_init(struct perf_counter *counter) return NULL; counter->destroy = tp_perf_counter_destroy; + counter->hw.irq_period = counter->hw_event.irq_period; return &perf_ops_generic; } @@ -1783,7 +1791,7 @@ sw_perf_counter_init(struct perf_counter *counter) * to be kernel events, and page faults are never hypervisor * events. */ - switch (counter->hw_event.type) { + switch (counter->hw_event.event_id) { case PERF_COUNT_CPU_CLOCK: hw_ops = &perf_ops_cpu_clock; @@ -1813,9 +1821,6 @@ sw_perf_counter_init(struct perf_counter *counter) if (!counter->hw_event.exclude_kernel) hw_ops = &perf_ops_cpu_migrations; break; - default: - hw_ops = tp_perf_counter_init(counter); - break; } if (hw_ops) @@ -1870,10 +1875,22 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, counter->state = PERF_COUNTER_STATE_OFF; hw_ops = NULL; - if (!hw_event->raw && hw_event->type < 0) - hw_ops = sw_perf_counter_init(counter); - else + + if (hw_event->raw_type) + hw_ops = hw_perf_counter_init(counter); + else switch (hw_event->type) { + case PERF_TYPE_HARDWARE: hw_ops = hw_perf_counter_init(counter); + break; + + case PERF_TYPE_SOFTWARE: + hw_ops = sw_perf_counter_init(counter); + break; + + case PERF_TYPE_TRACEPOINT: + hw_ops = tp_perf_counter_init(counter); + break; + } if (!hw_ops) { kfree(counter); -- cgit v1.2.3-70-g09d2 From 0322cd6ec504b0bf08ca7b2c3d7f43bda37d79c9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Mar 2009 20:26:19 +0100 Subject: perf_counter: unify irq output code Impact: cleanup Having 3 slightly different copies of the same code around does nobody any good. First step in revamping the output format. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Steven Rostedt Orig-LKML-Reference: <20090319194233.929962222@chello.nl> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 51 +----------------- arch/x86/kernel/cpu/perf_counter.c | 53 +------------------ include/linux/perf_counter.h | 2 + kernel/perf_counter.c | 106 ++++++++++++++++++++----------------- 4 files changed, 61 insertions(+), 151 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 26f69dc7130..88b72eb4af1 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -662,41 +662,6 @@ void perf_counter_do_pending(void) } } -/* - * Record data for an irq counter. - * This function was lifted from the x86 code; maybe it should - * go in the core? - */ -static void perf_store_irq_data(struct perf_counter *counter, u64 data) -{ - struct perf_data *irqdata = counter->irqdata; - - if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) { - irqdata->overrun++; - } else { - u64 *p = (u64 *) &irqdata->data[irqdata->len]; - - *p = data; - irqdata->len += sizeof(u64); - } -} - -/* - * Record all the values of the counters in a group - */ -static void perf_handle_group(struct perf_counter *counter) -{ - struct perf_counter *leader, *sub; - - leader = counter->group_leader; - list_for_each_entry(sub, &leader->sibling_list, list_entry) { - if (sub != counter) - sub->hw_ops->read(sub); - perf_store_irq_data(counter, sub->hw_event.event_config); - perf_store_irq_data(counter, atomic64_read(&sub->count)); - } -} - /* * A counter has overflowed; update its count and record * things if requested. Note that interrupts are hard-disabled @@ -736,20 +701,8 @@ static void record_and_restart(struct perf_counter *counter, long val, /* * Finally record data if requested. */ - if (record) { - switch (counter->hw_event.record_type) { - case PERF_RECORD_SIMPLE: - break; - case PERF_RECORD_IRQ: - perf_store_irq_data(counter, instruction_pointer(regs)); - counter->wakeup_pending = 1; - break; - case PERF_RECORD_GROUP: - perf_handle_group(counter); - counter->wakeup_pending = 1; - break; - } - } + if (record) + perf_counter_output(counter, 1, regs); } /* diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index d844ae41d5a..902282d68b0 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -674,20 +674,6 @@ static void pmc_generic_disable(struct perf_counter *counter) x86_perf_counter_update(counter, hwc, idx); } -static void perf_store_irq_data(struct perf_counter *counter, u64 data) -{ - struct perf_data *irqdata = counter->irqdata; - - if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) { - irqdata->overrun++; - } else { - u64 *p = (u64 *) &irqdata->data[irqdata->len]; - - *p = data; - irqdata->len += sizeof(u64); - } -} - /* * Save and restart an expired counter. Called by NMI contexts, * so it has to be careful about preempting normal counter ops: @@ -704,22 +690,6 @@ static void perf_save_and_restart(struct perf_counter *counter) __pmc_generic_enable(counter, hwc, idx); } -static void -perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown) -{ - struct perf_counter *counter, *group_leader = sibling->group_leader; - - /* - * Store sibling timestamps (if any): - */ - list_for_each_entry(counter, &group_leader->sibling_list, list_entry) { - - x86_perf_counter_update(counter, &counter->hw, counter->hw.idx); - perf_store_irq_data(sibling, counter->hw_event.event_config); - perf_store_irq_data(sibling, atomic64_read(&counter->count)); - } -} - /* * Maximum interrupt frequency of 100KHz per CPU */ @@ -754,28 +724,7 @@ again: continue; perf_save_and_restart(counter); - - switch (counter->hw_event.record_type) { - case PERF_RECORD_SIMPLE: - continue; - case PERF_RECORD_IRQ: - perf_store_irq_data(counter, instruction_pointer(regs)); - break; - case PERF_RECORD_GROUP: - perf_handle_group(counter, &status, &ack); - break; - } - /* - * From NMI context we cannot call into the scheduler to - * do a task wakeup - but we mark these generic as - * wakeup_pending and initate a wakeup callback: - */ - if (nmi) { - counter->wakeup_pending = 1; - set_tsk_thread_flag(current, TIF_PERF_COUNTERS); - } else { - wake_up(&counter->waitq); - } + perf_counter_output(counter, nmi, regs); } hw_perf_ack_status(ack); diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 8f939490550..a4b76c0175f 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -317,6 +317,8 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader, struct perf_cpu_context *cpuctx, struct perf_counter_context *ctx, int cpu); +extern void perf_counter_output(struct perf_counter *counter, + int nmi, struct pt_regs *regs); /* * Return 1 for a software counter, 0 for a hardware counter */ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 68a56a68bc7..f054b8c9bf9 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1353,6 +1353,60 @@ static const struct file_operations perf_fops = { .compat_ioctl = perf_ioctl, }; +/* + * Output + */ + +static void perf_counter_store_irq(struct perf_counter *counter, u64 data) +{ + struct perf_data *irqdata = counter->irqdata; + + if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) { + irqdata->overrun++; + } else { + u64 *p = (u64 *) &irqdata->data[irqdata->len]; + + *p = data; + irqdata->len += sizeof(u64); + } +} + +static void perf_counter_handle_group(struct perf_counter *counter) +{ + struct perf_counter *leader, *sub; + + leader = counter->group_leader; + list_for_each_entry(sub, &leader->sibling_list, list_entry) { + if (sub != counter) + sub->hw_ops->read(sub); + perf_counter_store_irq(counter, sub->hw_event.event_config); + perf_counter_store_irq(counter, atomic64_read(&sub->count)); + } +} + +void perf_counter_output(struct perf_counter *counter, + int nmi, struct pt_regs *regs) +{ + switch (counter->hw_event.record_type) { + case PERF_RECORD_SIMPLE: + return; + + case PERF_RECORD_IRQ: + perf_counter_store_irq(counter, instruction_pointer(regs)); + break; + + case PERF_RECORD_GROUP: + perf_counter_handle_group(counter); + break; + } + + if (nmi) { + counter->wakeup_pending = 1; + set_perf_counter_pending(); + } else + wake_up(&counter->waitq); +} + /* * Generic software counter infrastructure */ @@ -1395,54 +1449,6 @@ static void perf_swcounter_set_period(struct perf_counter *counter) atomic64_set(&hwc->count, -left); } -static void perf_swcounter_store_irq(struct perf_counter *counter, u64 data) -{ - struct perf_data *irqdata = counter->irqdata; - - if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) { - irqdata->overrun++; - } else { - u64 *p = (u64 *) &irqdata->data[irqdata->len]; - - *p = data; - irqdata->len += sizeof(u64); - } -} - -static void perf_swcounter_handle_group(struct perf_counter *sibling) -{ - struct perf_counter *counter, *group_leader = sibling->group_leader; - - list_for_each_entry(counter, &group_leader->sibling_list, list_entry) { - counter->hw_ops->read(counter); - perf_swcounter_store_irq(sibling, counter->hw_event.event_config); - perf_swcounter_store_irq(sibling, atomic64_read(&counter->count)); - } -} - -static void perf_swcounter_interrupt(struct perf_counter *counter, - int nmi, struct pt_regs *regs) -{ - switch (counter->hw_event.record_type) { - case PERF_RECORD_SIMPLE: - break; - - case PERF_RECORD_IRQ: - perf_swcounter_store_irq(counter, instruction_pointer(regs)); - break; - - case PERF_RECORD_GROUP: - perf_swcounter_handle_group(counter); - break; - } - - if (nmi) { - counter->wakeup_pending = 1; - set_perf_counter_pending(); - } else - wake_up(&counter->waitq); -} - static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) { struct perf_counter *counter; @@ -1461,7 +1467,7 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) regs = task_pt_regs(current); if (regs) - perf_swcounter_interrupt(counter, 0, regs); + perf_counter_output(counter, 0, regs); hrtimer_forward_now(hrtimer, ns_to_ktime(counter->hw.irq_period)); @@ -1473,7 +1479,7 @@ static void perf_swcounter_overflow(struct perf_counter *counter, { perf_swcounter_update(counter); perf_swcounter_set_period(counter); - perf_swcounter_interrupt(counter, nmi, regs); + perf_counter_output(counter, nmi, regs); } static int perf_swcounter_match(struct perf_counter *counter, -- cgit v1.2.3-70-g09d2 From 9aaa131a279834dff75c290c91f0058f62d72d46 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sat, 21 Mar 2009 15:31:47 +1100 Subject: perf_counter: fix type/event_id layout on big-endian systems Impact: build fix for powerpc Commit db3a944aca35ae61 ("perf_counter: revamp syscall input ABI") expanded the hw_event.type field into a union of structs containing bitfields. In particular it introduced a type field and a raw_type field, with the intention that the 1-bit raw_type field should overlay the most-significant bit of the 8-bit type field, and in fact perf_counter_alloc() now assumes that (or at least, assumes that raw_type doesn't overlay any of the bits that are 1 in the values of PERF_TYPE_{HARDWARE,SOFTWARE,TRACEPOINT}). Unfortunately this is not true on big-endian systems such as PowerPC, where bitfields are laid out from left to right, i.e. from most significant bit to least significant. This means that setting hw_event.type = PERF_TYPE_SOFTWARE will set hw_event.raw_type to 1. This fixes it by making the layout depend on whether or not __BIG_ENDIAN_BITFIELD is defined. It's a bit ugly, but that's what we get for using bitfields in a user/kernel ABI. Also, that commit didn't fix up some places in arch/powerpc/kernel/ perf_counter.c where hw_event.raw and hw_event.event_id were used. This fixes them too. Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/perf_counter.c | 9 +++++---- include/linux/perf_counter.h | 12 ++++++++++++ 2 files changed, 17 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 830ca9c4494..6413d9c0313 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -602,12 +602,13 @@ hw_perf_counter_init(struct perf_counter *counter) return NULL; if ((s64)counter->hw_event.irq_period < 0) return NULL; - ev = counter->hw_event.event_id; - if (!counter->hw_event.raw) { - if (ev >= ppmu->n_generic || - ppmu->generic_events[ev] == 0) + if (!counter->hw_event.raw_type) { + ev = counter->hw_event.event_id; + if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) return NULL; ev = ppmu->generic_events[ev]; + } else { + ev = counter->hw_event.raw_event_id; } counter->hw.config_base = ev; counter->hw.idx = 0; diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index a4b76c0175f..98f5990be1e 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -15,6 +15,7 @@ #include #include +#include /* * User-space ABI bits: @@ -86,6 +87,7 @@ enum perf_counter_record_type { */ struct perf_counter_hw_event { union { +#ifndef __BIG_ENDIAN_BITFIELD struct { __u64 event_id : 56, type : 8; @@ -94,6 +96,16 @@ struct perf_counter_hw_event { __u64 raw_event_id : 63, raw_type : 1; }; +#else + struct { + __u64 type : 8, + event_id : 56; + }; + struct { + __u64 raw_type : 1, + raw_event_id : 63; + }; +#endif /* __BIT_ENDIAN_BITFIELD */ __u64 event_config; }; -- cgit v1.2.3-70-g09d2 From f4a2deb4860497f4332cf6a1acddab3dd628ddf0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 23 Mar 2009 18:22:06 +0100 Subject: perf_counter: remove the event config bitfields Since the bitfields turned into a bit of a mess, remove them and rely on good old masks. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Orig-LKML-Reference: <20090323172417.059499915@chello.nl> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 6 ++-- arch/x86/kernel/cpu/perf_counter.c | 8 ++--- include/linux/perf_counter.h | 74 +++++++++++++++++++++++++------------- kernel/perf_counter.c | 22 +++++++----- 4 files changed, 70 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 6413d9c0313..d05651584d4 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -602,13 +602,13 @@ hw_perf_counter_init(struct perf_counter *counter) return NULL; if ((s64)counter->hw_event.irq_period < 0) return NULL; - if (!counter->hw_event.raw_type) { - ev = counter->hw_event.event_id; + if (!perf_event_raw(&counter->hw_event)) { + ev = perf_event_id(&counter->hw_event); if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) return NULL; ev = ppmu->generic_events[ev]; } else { - ev = counter->hw_event.raw_event_id; + ev = perf_event_config(&counter->hw_event); } counter->hw.config_base = ev; counter->hw.idx = 0; diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 902282d68b0..3f95b0cdc55 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -217,15 +217,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter) /* * Raw event type provide the config in the event structure */ - if (hw_event->raw_type) { - hwc->config |= pmc_ops->raw_event(hw_event->raw_event_id); + if (perf_event_raw(hw_event)) { + hwc->config |= pmc_ops->raw_event(perf_event_config(hw_event)); } else { - if (hw_event->event_id >= pmc_ops->max_events) + if (perf_event_id(hw_event) >= pmc_ops->max_events) return -EINVAL; /* * The generic map: */ - hwc->config |= pmc_ops->event_map(hw_event->event_id); + hwc->config |= pmc_ops->event_map(perf_event_id(hw_event)); } counter->wakeup_pending = 0; diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 98f5990be1e..56099e52970 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -82,32 +82,37 @@ enum perf_counter_record_type { PERF_RECORD_GROUP = 2, }; +#define __PERF_COUNTER_MASK(name) \ + (((1ULL << PERF_COUNTER_##name##_BITS) - 1) << \ + PERF_COUNTER_##name##_SHIFT) + +#define PERF_COUNTER_RAW_BITS 1 +#define PERF_COUNTER_RAW_SHIFT 63 +#define PERF_COUNTER_RAW_MASK __PERF_COUNTER_MASK(RAW) + +#define PERF_COUNTER_CONFIG_BITS 63 +#define PERF_COUNTER_CONFIG_SHIFT 0 +#define PERF_COUNTER_CONFIG_MASK __PERF_COUNTER_MASK(CONFIG) + +#define PERF_COUNTER_TYPE_BITS 7 +#define PERF_COUNTER_TYPE_SHIFT 56 +#define PERF_COUNTER_TYPE_MASK __PERF_COUNTER_MASK(TYPE) + +#define PERF_COUNTER_EVENT_BITS 56 +#define PERF_COUNTER_EVENT_SHIFT 0 +#define PERF_COUNTER_EVENT_MASK __PERF_COUNTER_MASK(EVENT) + /* * Hardware event to monitor via a performance monitoring counter: */ struct perf_counter_hw_event { - union { -#ifndef __BIG_ENDIAN_BITFIELD - struct { - __u64 event_id : 56, - type : 8; - }; - struct { - __u64 raw_event_id : 63, - raw_type : 1; - }; -#else - struct { - __u64 type : 8, - event_id : 56; - }; - struct { - __u64 raw_type : 1, - raw_event_id : 63; - }; -#endif /* __BIT_ENDIAN_BITFIELD */ - __u64 event_config; - }; + /* + * The MSB of the config word signifies if the rest contains cpu + * specific (raw) counter configuration data, if unset, the next + * 7 bits are an event type and the rest of the bits are the event + * identifier. + */ + __u64 config; __u64 irq_period; __u64 record_type; @@ -157,6 +162,27 @@ struct perf_counter_hw_event { struct task_struct; +static inline u64 perf_event_raw(struct perf_counter_hw_event *hw_event) +{ + return hw_event->config & PERF_COUNTER_RAW_MASK; +} + +static inline u64 perf_event_config(struct perf_counter_hw_event *hw_event) +{ + return hw_event->config & PERF_COUNTER_CONFIG_MASK; +} + +static inline u64 perf_event_type(struct perf_counter_hw_event *hw_event) +{ + return (hw_event->config & PERF_COUNTER_TYPE_MASK) >> + PERF_COUNTER_TYPE_SHIFT; +} + +static inline u64 perf_event_id(struct perf_counter_hw_event *hw_event) +{ + return hw_event->config & PERF_COUNTER_EVENT_MASK; +} + /** * struct hw_perf_counter - performance counter hardware details: */ @@ -336,8 +362,8 @@ extern void perf_counter_output(struct perf_counter *counter, */ static inline int is_software_counter(struct perf_counter *counter) { - return !counter->hw_event.raw_type && - counter->hw_event.type != PERF_TYPE_HARDWARE; + return !perf_event_raw(&counter->hw_event) && + perf_event_type(&counter->hw_event) != PERF_TYPE_HARDWARE; } extern void perf_swcounter_event(u32, u64, int, struct pt_regs *); diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index f054b8c9bf9..ca14fc41ccd 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1379,7 +1379,7 @@ static void perf_counter_handle_group(struct perf_counter *counter) list_for_each_entry(sub, &leader->sibling_list, list_entry) { if (sub != counter) sub->hw_ops->read(sub); - perf_counter_store_irq(counter, sub->hw_event.event_config); + perf_counter_store_irq(counter, sub->hw_event.config); perf_counter_store_irq(counter, atomic64_read(&sub->count)); } } @@ -1489,13 +1489,13 @@ static int perf_swcounter_match(struct perf_counter *counter, if (counter->state != PERF_COUNTER_STATE_ACTIVE) return 0; - if (counter->hw_event.raw_type) + if (perf_event_raw(&counter->hw_event)) return 0; - if (counter->hw_event.type != type) + if (perf_event_type(&counter->hw_event) != type) return 0; - if (counter->hw_event.event_id != event) + if (perf_event_id(&counter->hw_event) != event) return 0; if (counter->hw_event.exclude_user && user_mode(regs)) @@ -1757,13 +1757,13 @@ extern void ftrace_profile_disable(int); static void tp_perf_counter_destroy(struct perf_counter *counter) { - ftrace_profile_disable(counter->hw_event.event_id); + ftrace_profile_disable(perf_event_id(&counter->hw_event)); } static const struct hw_perf_counter_ops * tp_perf_counter_init(struct perf_counter *counter) { - int event_id = counter->hw_event.event_id; + int event_id = perf_event_id(&counter->hw_event); int ret; ret = ftrace_profile_enable(event_id); @@ -1797,7 +1797,7 @@ sw_perf_counter_init(struct perf_counter *counter) * to be kernel events, and page faults are never hypervisor * events. */ - switch (counter->hw_event.event_id) { + switch (perf_event_id(&counter->hw_event)) { case PERF_COUNT_CPU_CLOCK: hw_ops = &perf_ops_cpu_clock; @@ -1882,9 +1882,12 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, hw_ops = NULL; - if (hw_event->raw_type) + if (perf_event_raw(hw_event)) { hw_ops = hw_perf_counter_init(counter); - else switch (hw_event->type) { + goto done; + } + + switch (perf_event_type(hw_event)) { case PERF_TYPE_HARDWARE: hw_ops = hw_perf_counter_init(counter); break; @@ -1902,6 +1905,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, kfree(counter); return NULL; } +done: counter->hw_ops = hw_ops; return counter; -- cgit v1.2.3-70-g09d2 From 96f6d4444302bb2ea2cf409529eef816462f6ce0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 23 Mar 2009 18:22:07 +0100 Subject: perf_counter: avoid recursion Tracepoint events like lock_acquire and software counters like pagefaults can recurse into the perf counter code again, avoid that. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Orig-LKML-Reference: <20090323172417.152096433@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 7 +++++++ kernel/perf_counter.c | 26 ++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 56099e52970..18dc17d0a61 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -328,6 +328,13 @@ struct perf_cpu_context { int active_oncpu; int max_pertask; int exclusive; + + /* + * Recursion avoidance: + * + * task, softirq, irq, nmi context + */ + int recursion[4]; }; /* diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index ca14fc41ccd..ce34bff07bd 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -1532,10 +1533,31 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, rcu_read_unlock(); } +static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx) +{ + if (in_nmi()) + return &cpuctx->recursion[3]; + + if (in_irq()) + return &cpuctx->recursion[2]; + + if (in_softirq()) + return &cpuctx->recursion[1]; + + return &cpuctx->recursion[0]; +} + static void __perf_swcounter_event(enum perf_event_types type, u32 event, u64 nr, int nmi, struct pt_regs *regs) { struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); + int *recursion = perf_swcounter_recursion_context(cpuctx); + + if (*recursion) + goto out; + + (*recursion)++; + barrier(); perf_swcounter_ctx_event(&cpuctx->ctx, type, event, nr, nmi, regs); if (cpuctx->task_ctx) { @@ -1543,6 +1565,10 @@ static void __perf_swcounter_event(enum perf_event_types type, u32 event, nr, nmi, regs); } + barrier(); + (*recursion)--; + +out: put_cpu_var(perf_cpu_context); } -- cgit v1.2.3-70-g09d2 From 37d81828385f8ff823caaaf1a83e72d065b6cfa1 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 23 Mar 2009 18:22:08 +0100 Subject: perf_counter: add an mmap method to allow userspace to read hardware counters Impact: new feature giving performance improvement This adds the ability for userspace to do an mmap on a hardware counter fd and get access to a read-only page that contains the information needed to translate a hardware counter value to the full 64-bit counter value that would be returned by a read on the fd. This is useful on architectures that allow user programs to read the hardware counters, such as PowerPC. The mmap will only succeed if the counter is a hardware counter monitoring the current process. On my quad 2.5GHz PowerPC 970MP machine, userspace can read a counter and translate it to the full 64-bit value in about 30ns using the mmapped page, compared to about 830ns for the read syscall on the counter, so this does give a significant performance improvement. Signed-off-by: Paul Mackerras Signed-off-by: Peter Zijlstra Orig-LKML-Reference: <20090323172417.297057964@chello.nl> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 6 +++ include/linux/perf_counter.h | 15 ++++++++ kernel/perf_counter.c | 76 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 97 insertions(+) (limited to 'include') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index d05651584d4..e4349281b07 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -417,6 +417,8 @@ void hw_perf_restore(u64 disable) atomic64_set(&counter->hw.prev_count, val); counter->hw.idx = hwc_index[i] + 1; write_pmc(counter->hw.idx, val); + if (counter->user_page) + perf_counter_update_userpage(counter); } mb(); cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; @@ -572,6 +574,8 @@ static void power_perf_disable(struct perf_counter *counter) ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr); write_pmc(counter->hw.idx, 0); counter->hw.idx = 0; + if (counter->user_page) + perf_counter_update_userpage(counter); break; } } @@ -698,6 +702,8 @@ static void record_and_restart(struct perf_counter *counter, long val, write_pmc(counter->hw.idx, val); atomic64_set(&counter->hw.prev_count, val); atomic64_set(&counter->hw.period_left, left); + if (counter->user_page) + perf_counter_update_userpage(counter); /* * Finally record data if requested. diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 18dc17d0a61..40b324e91bf 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -143,6 +143,17 @@ struct perf_counter_hw_event { #define PERF_COUNTER_IOC_ENABLE _IO('$', 0) #define PERF_COUNTER_IOC_DISABLE _IO('$', 1) +/* + * Structure of the page that can be mapped via mmap + */ +struct perf_counter_mmap_page { + __u32 version; /* version number of this structure */ + __u32 compat_version; /* lowest version this is compat with */ + __u32 lock; /* seqlock for synchronization */ + __u32 index; /* hardware counter identifier */ + __s64 offset; /* add to hardware counter value */ +}; + #ifdef __KERNEL__ /* * Kernel-internal data types and definitions: @@ -278,6 +289,9 @@ struct perf_counter { int oncpu; int cpu; + /* pointer to page shared with userspace via mmap */ + unsigned long user_page; + /* read() / irq related data */ wait_queue_head_t waitq; /* optional: for NMIs */ @@ -361,6 +375,7 @@ extern int perf_counter_task_enable(void); extern int hw_perf_group_sched_in(struct perf_counter *group_leader, struct perf_cpu_context *cpuctx, struct perf_counter_context *ctx, int cpu); +extern void perf_counter_update_userpage(struct perf_counter *counter); extern void perf_counter_output(struct perf_counter *counter, int nmi, struct pt_regs *regs); diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index ce34bff07bd..d9cfd902140 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1177,6 +1177,7 @@ static int perf_release(struct inode *inode, struct file *file) mutex_unlock(&counter->mutex); mutex_unlock(&ctx->mutex); + free_page(counter->user_page); free_counter(counter); put_context(ctx); @@ -1346,12 +1347,87 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return err; } +void perf_counter_update_userpage(struct perf_counter *counter) +{ + struct perf_counter_mmap_page *userpg; + + if (!counter->user_page) + return; + userpg = (struct perf_counter_mmap_page *) counter->user_page; + + ++userpg->lock; + smp_wmb(); + userpg->index = counter->hw.idx; + userpg->offset = atomic64_read(&counter->count); + if (counter->state == PERF_COUNTER_STATE_ACTIVE) + userpg->offset -= atomic64_read(&counter->hw.prev_count); + smp_wmb(); + ++userpg->lock; +} + +static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct perf_counter *counter = vma->vm_file->private_data; + + if (!counter->user_page) + return VM_FAULT_SIGBUS; + + vmf->page = virt_to_page(counter->user_page); + get_page(vmf->page); + return 0; +} + +static struct vm_operations_struct perf_mmap_vmops = { + .fault = perf_mmap_fault, +}; + +static int perf_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct perf_counter *counter = file->private_data; + unsigned long userpg; + + if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE)) + return -EINVAL; + if (vma->vm_end - vma->vm_start != PAGE_SIZE) + return -EINVAL; + + /* + * For now, restrict to the case of a hardware counter + * on the current task. + */ + if (is_software_counter(counter) || counter->task != current) + return -EINVAL; + + userpg = counter->user_page; + if (!userpg) { + userpg = get_zeroed_page(GFP_KERNEL); + mutex_lock(&counter->mutex); + if (counter->user_page) { + free_page(userpg); + userpg = counter->user_page; + } else { + counter->user_page = userpg; + } + mutex_unlock(&counter->mutex); + if (!userpg) + return -ENOMEM; + } + + perf_counter_update_userpage(counter); + + vma->vm_flags &= ~VM_MAYWRITE; + vma->vm_flags |= VM_RESERVED; + vma->vm_ops = &perf_mmap_vmops; + return 0; +} + static const struct file_operations perf_fops = { .release = perf_release, .read = perf_read, .poll = perf_poll, .unlocked_ioctl = perf_ioctl, .compat_ioctl = perf_ioctl, + .mmap = perf_mmap, }; /* -- cgit v1.2.3-70-g09d2 From 9ab772cd535c4b256a577eae516f9c7462346b2d Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Mon, 23 Mar 2009 18:22:09 +0100 Subject: mutex: add atomic_dec_and_mutex_lock() Much like the atomic_dec_and_lock() function in which we take an hold a spin_lock if we drop the atomic to 0 this function takes and holds the mutex if we dec the atomic to 0. Signed-off-by: Eric Paris Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Orig-LKML-Reference: <20090323172417.410913479@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/mutex.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 3069ec7e0ab..93054fc3635 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -151,4 +151,27 @@ extern int __must_check mutex_lock_killable(struct mutex *lock); extern int mutex_trylock(struct mutex *lock); extern void mutex_unlock(struct mutex *lock); +/** + * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0 + * @cnt: the atomic which we are to dec + * @lock: the mutex to return holding if we dec to 0 + * + * return true and hold lock if we dec to 0, return false otherwise + */ +static inline int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) +{ + /* dec if we can't possibly hit 0 */ + if (atomic_add_unless(cnt, -1, 1)) + return 0; + /* we might hit 0, so take the lock */ + mutex_lock(lock); + if (!atomic_dec_and_test(cnt)) { + /* when we actually did the dec, we didn't hit 0 */ + mutex_unlock(lock); + return 0; + } + /* we hit 0, and we hold the lock */ + return 1; +} + #endif -- cgit v1.2.3-70-g09d2 From 7b732a75047738e4f85438ed2f9cd34bf5f2a19a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 23 Mar 2009 18:22:10 +0100 Subject: perf_counter: new output ABI - part 1 Impact: Rework the perfcounter output ABI use sys_read() only for instant data and provide mmap() output for all async overflow data. The first mmap() determines the size of the output buffer. The mmap() size must be a PAGE_SIZE multiple of 1+pages, where pages must be a power of 2 or 0. Further mmap()s of the same fd must have the same size. Once all maps are gone, you can again mmap() with a new size. In case of 0 extra pages there is no data output and the first page only contains meta data. When there are data pages, a poll() event will be generated for each full page of data. Furthermore, the output is circular. This means that although 1 page is a valid configuration, its useless, since we'll start overwriting it the instant we report a full page. Future work will focus on the output format (currently maintained) where we'll likey want each entry denoted by a header which includes a type and length. Further future work will allow to splice() the fd, also containing the async overflow data -- splice() would be mutually exclusive with mmap() of the data. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Orig-LKML-Reference: <20090323172417.470536358@chello.nl> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 9 +- include/linux/perf_counter.h | 36 ++- kernel/perf_counter.c | 464 ++++++++++++++++++++----------------- 3 files changed, 263 insertions(+), 246 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index e4349281b07..d48596ab655 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -417,8 +417,7 @@ void hw_perf_restore(u64 disable) atomic64_set(&counter->hw.prev_count, val); counter->hw.idx = hwc_index[i] + 1; write_pmc(counter->hw.idx, val); - if (counter->user_page) - perf_counter_update_userpage(counter); + perf_counter_update_userpage(counter); } mb(); cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; @@ -574,8 +573,7 @@ static void power_perf_disable(struct perf_counter *counter) ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr); write_pmc(counter->hw.idx, 0); counter->hw.idx = 0; - if (counter->user_page) - perf_counter_update_userpage(counter); + perf_counter_update_userpage(counter); break; } } @@ -702,8 +700,7 @@ static void record_and_restart(struct perf_counter *counter, long val, write_pmc(counter->hw.idx, val); atomic64_set(&counter->hw.prev_count, val); atomic64_set(&counter->hw.period_left, left); - if (counter->user_page) - perf_counter_update_userpage(counter); + perf_counter_update_userpage(counter); /* * Finally record data if requested. diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 40b324e91bf..2b5e66d5ebd 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -152,6 +152,8 @@ struct perf_counter_mmap_page { __u32 lock; /* seqlock for synchronization */ __u32 index; /* hardware counter identifier */ __s64 offset; /* add to hardware counter value */ + + __u32 data_head; /* head in the data section */ }; #ifdef __KERNEL__ @@ -218,21 +220,6 @@ struct hw_perf_counter { #endif }; -/* - * Hardcoded buffer length limit for now, for IRQ-fed events: - */ -#define PERF_DATA_BUFLEN 2048 - -/** - * struct perf_data - performance counter IRQ data sampling ... - */ -struct perf_data { - int len; - int rd_idx; - int overrun; - u8 data[PERF_DATA_BUFLEN]; -}; - struct perf_counter; /** @@ -256,6 +243,14 @@ enum perf_counter_active_state { struct file; +struct perf_mmap_data { + struct rcu_head rcu_head; + int nr_pages; + atomic_t head; + struct perf_counter_mmap_page *user_page; + void *data_pages[0]; +}; + /** * struct perf_counter - performance counter kernel representation: */ @@ -289,16 +284,15 @@ struct perf_counter { int oncpu; int cpu; - /* pointer to page shared with userspace via mmap */ - unsigned long user_page; + /* mmap bits */ + struct mutex mmap_mutex; + atomic_t mmap_count; + struct perf_mmap_data *data; - /* read() / irq related data */ + /* poll related */ wait_queue_head_t waitq; /* optional: for NMIs */ int wakeup_pending; - struct perf_data *irqdata; - struct perf_data *usrdata; - struct perf_data data[2]; void (*destroy)(struct perf_counter *); struct rcu_head rcu_head; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index d9cfd902140..0dfe91094fd 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -4,7 +4,8 @@ * Copyright(C) 2008 Thomas Gleixner * Copyright(C) 2008 Red Hat, Inc., Ingo Molnar * - * For licencing details see kernel-base/COPYING + * + * For licensing details see kernel-base/COPYING */ #include @@ -1022,66 +1023,6 @@ static u64 perf_counter_read(struct perf_counter *counter) return atomic64_read(&counter->count); } -/* - * Cross CPU call to switch performance data pointers - */ -static void __perf_switch_irq_data(void *info) -{ - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - struct perf_counter *counter = info; - struct perf_counter_context *ctx = counter->ctx; - struct perf_data *oldirqdata = counter->irqdata; - - /* - * If this is a task context, we need to check whether it is - * the current task context of this cpu. If not it has been - * scheduled out before the smp call arrived. - */ - if (ctx->task) { - if (cpuctx->task_ctx != ctx) - return; - spin_lock(&ctx->lock); - } - - /* Change the pointer NMI safe */ - atomic_long_set((atomic_long_t *)&counter->irqdata, - (unsigned long) counter->usrdata); - counter->usrdata = oldirqdata; - - if (ctx->task) - spin_unlock(&ctx->lock); -} - -static struct perf_data *perf_switch_irq_data(struct perf_counter *counter) -{ - struct perf_counter_context *ctx = counter->ctx; - struct perf_data *oldirqdata = counter->irqdata; - struct task_struct *task = ctx->task; - - if (!task) { - smp_call_function_single(counter->cpu, - __perf_switch_irq_data, - counter, 1); - return counter->usrdata; - } - -retry: - spin_lock_irq(&ctx->lock); - if (counter->state != PERF_COUNTER_STATE_ACTIVE) { - counter->irqdata = counter->usrdata; - counter->usrdata = oldirqdata; - spin_unlock_irq(&ctx->lock); - return oldirqdata; - } - spin_unlock_irq(&ctx->lock); - task_oncpu_function_call(task, __perf_switch_irq_data, counter); - /* Might have failed, because task was scheduled out */ - if (counter->irqdata == oldirqdata) - goto retry; - - return counter->usrdata; -} - static void put_context(struct perf_counter_context *ctx) { if (ctx->task) @@ -1177,7 +1118,6 @@ static int perf_release(struct inode *inode, struct file *file) mutex_unlock(&counter->mutex); mutex_unlock(&ctx->mutex); - free_page(counter->user_page); free_counter(counter); put_context(ctx); @@ -1192,7 +1132,7 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) { u64 cntval; - if (count != sizeof(cntval)) + if (count < sizeof(cntval)) return -EINVAL; /* @@ -1210,122 +1150,21 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) return put_user(cntval, (u64 __user *) buf) ? -EFAULT : sizeof(cntval); } -static ssize_t -perf_copy_usrdata(struct perf_data *usrdata, char __user *buf, size_t count) -{ - if (!usrdata->len) - return 0; - - count = min(count, (size_t)usrdata->len); - if (copy_to_user(buf, usrdata->data + usrdata->rd_idx, count)) - return -EFAULT; - - /* Adjust the counters */ - usrdata->len -= count; - if (!usrdata->len) - usrdata->rd_idx = 0; - else - usrdata->rd_idx += count; - - return count; -} - -static ssize_t -perf_read_irq_data(struct perf_counter *counter, - char __user *buf, - size_t count, - int nonblocking) -{ - struct perf_data *irqdata, *usrdata; - DECLARE_WAITQUEUE(wait, current); - ssize_t res, res2; - - irqdata = counter->irqdata; - usrdata = counter->usrdata; - - if (usrdata->len + irqdata->len >= count) - goto read_pending; - - if (nonblocking) - return -EAGAIN; - - spin_lock_irq(&counter->waitq.lock); - __add_wait_queue(&counter->waitq, &wait); - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - if (usrdata->len + irqdata->len >= count) - break; - - if (signal_pending(current)) - break; - - if (counter->state == PERF_COUNTER_STATE_ERROR) - break; - - spin_unlock_irq(&counter->waitq.lock); - schedule(); - spin_lock_irq(&counter->waitq.lock); - } - __remove_wait_queue(&counter->waitq, &wait); - __set_current_state(TASK_RUNNING); - spin_unlock_irq(&counter->waitq.lock); - - if (usrdata->len + irqdata->len < count && - counter->state != PERF_COUNTER_STATE_ERROR) - return -ERESTARTSYS; -read_pending: - mutex_lock(&counter->mutex); - - /* Drain pending data first: */ - res = perf_copy_usrdata(usrdata, buf, count); - if (res < 0 || res == count) - goto out; - - /* Switch irq buffer: */ - usrdata = perf_switch_irq_data(counter); - res2 = perf_copy_usrdata(usrdata, buf + res, count - res); - if (res2 < 0) { - if (!res) - res = -EFAULT; - } else { - res += res2; - } -out: - mutex_unlock(&counter->mutex); - - return res; -} - static ssize_t perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct perf_counter *counter = file->private_data; - switch (counter->hw_event.record_type) { - case PERF_RECORD_SIMPLE: - return perf_read_hw(counter, buf, count); - - case PERF_RECORD_IRQ: - case PERF_RECORD_GROUP: - return perf_read_irq_data(counter, buf, count, - file->f_flags & O_NONBLOCK); - } - return -EINVAL; + return perf_read_hw(counter, buf, count); } static unsigned int perf_poll(struct file *file, poll_table *wait) { struct perf_counter *counter = file->private_data; - unsigned int events = 0; - unsigned long flags; + unsigned int events = POLLIN; poll_wait(file, &counter->waitq, wait); - spin_lock_irqsave(&counter->waitq.lock, flags); - if (counter->usrdata->len || counter->irqdata->len) - events |= POLLIN; - spin_unlock_irqrestore(&counter->waitq.lock, flags); - return events; } @@ -1347,78 +1186,207 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return err; } -void perf_counter_update_userpage(struct perf_counter *counter) +static void __perf_counter_update_userpage(struct perf_counter *counter, + struct perf_mmap_data *data) { - struct perf_counter_mmap_page *userpg; - - if (!counter->user_page) - return; - userpg = (struct perf_counter_mmap_page *) counter->user_page; + struct perf_counter_mmap_page *userpg = data->user_page; + /* + * Disable preemption so as to not let the corresponding user-space + * spin too long if we get preempted. + */ + preempt_disable(); ++userpg->lock; smp_wmb(); userpg->index = counter->hw.idx; userpg->offset = atomic64_read(&counter->count); if (counter->state == PERF_COUNTER_STATE_ACTIVE) userpg->offset -= atomic64_read(&counter->hw.prev_count); + + userpg->data_head = atomic_read(&data->head); smp_wmb(); ++userpg->lock; + preempt_enable(); +} + +void perf_counter_update_userpage(struct perf_counter *counter) +{ + struct perf_mmap_data *data; + + rcu_read_lock(); + data = rcu_dereference(counter->data); + if (data) + __perf_counter_update_userpage(counter, data); + rcu_read_unlock(); } static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { struct perf_counter *counter = vma->vm_file->private_data; + struct perf_mmap_data *data; + int ret = VM_FAULT_SIGBUS; - if (!counter->user_page) - return VM_FAULT_SIGBUS; + rcu_read_lock(); + data = rcu_dereference(counter->data); + if (!data) + goto unlock; + + if (vmf->pgoff == 0) { + vmf->page = virt_to_page(data->user_page); + } else { + int nr = vmf->pgoff - 1; - vmf->page = virt_to_page(counter->user_page); + if ((unsigned)nr > data->nr_pages) + goto unlock; + + vmf->page = virt_to_page(data->data_pages[nr]); + } get_page(vmf->page); + ret = 0; +unlock: + rcu_read_unlock(); + + return ret; +} + +static int perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages) +{ + struct perf_mmap_data *data; + unsigned long size; + int i; + + WARN_ON(atomic_read(&counter->mmap_count)); + + size = sizeof(struct perf_mmap_data); + size += nr_pages * sizeof(void *); + + data = kzalloc(size, GFP_KERNEL); + if (!data) + goto fail; + + data->user_page = (void *)get_zeroed_page(GFP_KERNEL); + if (!data->user_page) + goto fail_user_page; + + for (i = 0; i < nr_pages; i++) { + data->data_pages[i] = (void *)get_zeroed_page(GFP_KERNEL); + if (!data->data_pages[i]) + goto fail_data_pages; + } + + data->nr_pages = nr_pages; + + rcu_assign_pointer(counter->data, data); + return 0; + +fail_data_pages: + for (i--; i >= 0; i--) + free_page((unsigned long)data->data_pages[i]); + + free_page((unsigned long)data->user_page); + +fail_user_page: + kfree(data); + +fail: + return -ENOMEM; +} + +static void __perf_mmap_data_free(struct rcu_head *rcu_head) +{ + struct perf_mmap_data *data = container_of(rcu_head, + struct perf_mmap_data, rcu_head); + int i; + + free_page((unsigned long)data->user_page); + for (i = 0; i < data->nr_pages; i++) + free_page((unsigned long)data->data_pages[i]); + kfree(data); +} + +static void perf_mmap_data_free(struct perf_counter *counter) +{ + struct perf_mmap_data *data = counter->data; + + WARN_ON(atomic_read(&counter->mmap_count)); + + rcu_assign_pointer(counter->data, NULL); + call_rcu(&data->rcu_head, __perf_mmap_data_free); +} + +static void perf_mmap_open(struct vm_area_struct *vma) +{ + struct perf_counter *counter = vma->vm_file->private_data; + + atomic_inc(&counter->mmap_count); +} + +static void perf_mmap_close(struct vm_area_struct *vma) +{ + struct perf_counter *counter = vma->vm_file->private_data; + + if (atomic_dec_and_mutex_lock(&counter->mmap_count, + &counter->mmap_mutex)) { + perf_mmap_data_free(counter); + mutex_unlock(&counter->mmap_mutex); + } } static struct vm_operations_struct perf_mmap_vmops = { + .open = perf_mmap_open, + .close = perf_mmap_close, .fault = perf_mmap_fault, }; static int perf_mmap(struct file *file, struct vm_area_struct *vma) { struct perf_counter *counter = file->private_data; - unsigned long userpg; + unsigned long vma_size; + unsigned long nr_pages; + unsigned long locked, lock_limit; + int ret = 0; if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE)) return -EINVAL; - if (vma->vm_end - vma->vm_start != PAGE_SIZE) + + vma_size = vma->vm_end - vma->vm_start; + nr_pages = (vma_size / PAGE_SIZE) - 1; + + if (nr_pages == 0 || !is_power_of_2(nr_pages)) return -EINVAL; - /* - * For now, restrict to the case of a hardware counter - * on the current task. - */ - if (is_software_counter(counter) || counter->task != current) + if (vma_size != PAGE_SIZE * (1 + nr_pages)) return -EINVAL; - userpg = counter->user_page; - if (!userpg) { - userpg = get_zeroed_page(GFP_KERNEL); - mutex_lock(&counter->mutex); - if (counter->user_page) { - free_page(userpg); - userpg = counter->user_page; - } else { - counter->user_page = userpg; - } - mutex_unlock(&counter->mutex); - if (!userpg) - return -ENOMEM; - } + if (vma->vm_pgoff != 0) + return -EINVAL; + + locked = vma_size >> PAGE_SHIFT; + locked += vma->vm_mm->locked_vm; - perf_counter_update_userpage(counter); + lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; + lock_limit >>= PAGE_SHIFT; + + if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) + return -EPERM; + + mutex_lock(&counter->mmap_mutex); + if (atomic_inc_not_zero(&counter->mmap_count)) + goto out; + + WARN_ON(counter->data); + ret = perf_mmap_data_alloc(counter, nr_pages); + if (!ret) + atomic_set(&counter->mmap_count, 1); +out: + mutex_unlock(&counter->mmap_mutex); vma->vm_flags &= ~VM_MAYWRITE; vma->vm_flags |= VM_RESERVED; vma->vm_ops = &perf_mmap_vmops; - return 0; + + return ret; } static const struct file_operations perf_fops = { @@ -1434,30 +1402,94 @@ static const struct file_operations perf_fops = { * Output */ -static void perf_counter_store_irq(struct perf_counter *counter, u64 data) +static int perf_output_write(struct perf_counter *counter, int nmi, + void *buf, ssize_t size) { - struct perf_data *irqdata = counter->irqdata; + struct perf_mmap_data *data; + unsigned int offset, head, nr; + unsigned int len; + int ret, wakeup; - if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) { - irqdata->overrun++; - } else { - u64 *p = (u64 *) &irqdata->data[irqdata->len]; + rcu_read_lock(); + ret = -ENOSPC; + data = rcu_dereference(counter->data); + if (!data) + goto out; + + if (!data->nr_pages) + goto out; + + ret = -EINVAL; + if (size > PAGE_SIZE) + goto out; + + do { + offset = head = atomic_read(&data->head); + head += sizeof(u64); + } while (atomic_cmpxchg(&data->head, offset, head) != offset); + + wakeup = (offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT); - *p = data; - irqdata->len += sizeof(u64); + nr = (offset >> PAGE_SHIFT) & (data->nr_pages - 1); + offset &= PAGE_SIZE - 1; + + len = min_t(unsigned int, PAGE_SIZE - offset, size); + memcpy(data->data_pages[nr] + offset, buf, len); + size -= len; + + if (size) { + nr = (nr + 1) & (data->nr_pages - 1); + memcpy(data->data_pages[nr], buf + len, size); + } + + /* + * generate a poll() wakeup for every page boundary crossed + */ + if (wakeup) { + __perf_counter_update_userpage(counter, data); + if (nmi) { + counter->wakeup_pending = 1; + set_perf_counter_pending(); + } else + wake_up(&counter->waitq); } + ret = 0; +out: + rcu_read_unlock(); + + return ret; } -static void perf_counter_handle_group(struct perf_counter *counter) +static void perf_output_simple(struct perf_counter *counter, + int nmi, struct pt_regs *regs) +{ + u64 entry; + + entry = instruction_pointer(regs); + + perf_output_write(counter, nmi, &entry, sizeof(entry)); +} + +struct group_entry { + u64 event; + u64 counter; +}; + +static void perf_output_group(struct perf_counter *counter, int nmi) { struct perf_counter *leader, *sub; leader = counter->group_leader; list_for_each_entry(sub, &leader->sibling_list, list_entry) { + struct group_entry entry; + if (sub != counter) sub->hw_ops->read(sub); - perf_counter_store_irq(counter, sub->hw_event.config); - perf_counter_store_irq(counter, atomic64_read(&sub->count)); + + entry.event = sub->hw_event.config; + entry.counter = atomic64_read(&sub->count); + + perf_output_write(counter, nmi, &entry, sizeof(entry)); } } @@ -1469,19 +1501,13 @@ void perf_counter_output(struct perf_counter *counter, return; case PERF_RECORD_IRQ: - perf_counter_store_irq(counter, instruction_pointer(regs)); + perf_output_simple(counter, nmi, regs); break; case PERF_RECORD_GROUP: - perf_counter_handle_group(counter); + perf_output_group(counter, nmi); break; } - - if (nmi) { - counter->wakeup_pending = 1; - set_perf_counter_pending(); - } else - wake_up(&counter->waitq); } /* @@ -1967,10 +1993,10 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, INIT_LIST_HEAD(&counter->sibling_list); init_waitqueue_head(&counter->waitq); + mutex_init(&counter->mmap_mutex); + INIT_LIST_HEAD(&counter->child_list); - counter->irqdata = &counter->data[0]; - counter->usrdata = &counter->data[1]; counter->cpu = cpu; counter->hw_event = *hw_event; counter->wakeup_pending = 0; -- cgit v1.2.3-70-g09d2 From c7138f37f905bb7987b1f9f5a8ee73667db39f25 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 24 Mar 2009 13:18:16 +0100 Subject: perf_counter: fix perf_poll() Impact: fix kerneltop 100% CPU usage Only return a poll event when there's actually been one, poll_wait() doesn't actually wait for the waitq you pass it, it only enqueues you on it. Only once all FDs have been iterated and none of thm returned a poll-event will it schedule(). Also make it return POLL_HUP when there's not mmap() area to read from. Further, fix a silly bug in the write code. Reported-by: Mike Galbraith Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Arjan van de Ven Orig-LKML-Reference: <1237897096.24918.181.camel@twins> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 1 + kernel/perf_counter.c | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 2b5e66d5ebd..48212c15b7d 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -246,6 +246,7 @@ struct file; struct perf_mmap_data { struct rcu_head rcu_head; int nr_pages; + atomic_t wakeup; atomic_t head; struct perf_counter_mmap_page *user_page; void *data_pages[0]; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 0dfe91094fd..affe227d56a 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1161,7 +1161,16 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) static unsigned int perf_poll(struct file *file, poll_table *wait) { struct perf_counter *counter = file->private_data; - unsigned int events = POLLIN; + struct perf_mmap_data *data; + unsigned int events; + + rcu_read_lock(); + data = rcu_dereference(counter->data); + if (data) + events = atomic_xchg(&data->wakeup, 0); + else + events = POLL_HUP; + rcu_read_unlock(); poll_wait(file, &counter->waitq, wait); @@ -1425,7 +1434,7 @@ static int perf_output_write(struct perf_counter *counter, int nmi, do { offset = head = atomic_read(&data->head); - head += sizeof(u64); + head += size; } while (atomic_cmpxchg(&data->head, offset, head) != offset); wakeup = (offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT); @@ -1446,6 +1455,7 @@ static int perf_output_write(struct perf_counter *counter, int nmi, * generate a poll() wakeup for every page boundary crossed */ if (wakeup) { + atomic_xchg(&data->wakeup, POLL_IN); __perf_counter_update_userpage(counter, data); if (nmi) { counter->wakeup_pending = 1; -- cgit v1.2.3-70-g09d2 From 5c1481943250ab65fa5130e05ec479c93216e9f7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 25 Mar 2009 12:30:23 +0100 Subject: perf_counter: output objects Provide a {type,size} header for each output entry. This should provide extensible output, and the ability to mix multiple streams. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Mike Galbraith Cc: Arjan van de Ven Cc: Wu Fengguang Orig-LKML-Reference: <20090325113316.831607932@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 11 +++++++++ kernel/perf_counter.c | 53 ++++++++++++++++++++++++++++++++++---------- 2 files changed, 52 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 48212c15b7d..c256635377d 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -156,6 +156,16 @@ struct perf_counter_mmap_page { __u32 data_head; /* head in the data section */ }; +struct perf_event_header { + __u32 type; + __u32 size; +}; + +enum perf_event_type { + PERF_EVENT_IP = 0, + PERF_EVENT_GROUP = 1, +}; + #ifdef __KERNEL__ /* * Kernel-internal data types and definitions: @@ -260,6 +270,7 @@ struct perf_counter { struct list_head list_entry; struct list_head event_entry; struct list_head sibling_list; + int nr_siblings; struct perf_counter *group_leader; const struct hw_perf_counter_ops *hw_ops; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 0422fd9bf62..d76e3112d38 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -75,8 +75,10 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) */ if (counter->group_leader == counter) list_add_tail(&counter->list_entry, &ctx->counter_list); - else + else { list_add_tail(&counter->list_entry, &group_leader->sibling_list); + group_leader->nr_siblings++; + } list_add_rcu(&counter->event_entry, &ctx->event_list); } @@ -89,6 +91,9 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx) list_del_init(&counter->list_entry); list_del_rcu(&counter->event_entry); + if (counter->group_leader != counter) + counter->group_leader->nr_siblings--; + /* * If this was a group counter with sibling counters then * upgrade the siblings to singleton counters by adding them @@ -381,9 +386,11 @@ static int is_software_only_group(struct perf_counter *leader) if (!is_software_counter(leader)) return 0; + list_for_each_entry(counter, &leader->sibling_list, list_entry) if (!is_software_counter(counter)) return 0; + return 1; } @@ -1480,6 +1487,9 @@ static void perf_output_copy(struct perf_output_handle *handle, handle->offset = offset; } +#define perf_output_put(handle, x) \ + perf_output_copy((handle), &(x), sizeof(x)) + static void perf_output_end(struct perf_output_handle *handle, int nmi) { if (handle->wakeup) { @@ -1514,34 +1524,53 @@ out: static void perf_output_simple(struct perf_counter *counter, int nmi, struct pt_regs *regs) { - u64 entry; + struct { + struct perf_event_header header; + u64 ip; + } event; - entry = instruction_pointer(regs); + event.header.type = PERF_EVENT_IP; + event.header.size = sizeof(event); + event.ip = instruction_pointer(regs); - perf_output_write(counter, nmi, &entry, sizeof(entry)); + perf_output_write(counter, nmi, &event, sizeof(event)); } -struct group_entry { - u64 event; - u64 counter; -}; - static void perf_output_group(struct perf_counter *counter, int nmi) { + struct perf_output_handle handle; + struct perf_event_header header; struct perf_counter *leader, *sub; + unsigned int size; + struct { + u64 event; + u64 counter; + } entry; + int ret; + + size = sizeof(header) + counter->nr_siblings * sizeof(entry); + + ret = perf_output_begin(&handle, counter, size); + if (ret) + return; + + header.type = PERF_EVENT_GROUP; + header.size = size; + + perf_output_put(&handle, header); leader = counter->group_leader; list_for_each_entry(sub, &leader->sibling_list, list_entry) { - struct group_entry entry; - if (sub != counter) sub->hw_ops->read(sub); entry.event = sub->hw_event.config; entry.counter = atomic64_read(&sub->count); - perf_output_write(counter, nmi, &entry, sizeof(entry)); + perf_output_put(&handle, entry); } + + perf_output_end(&handle, nmi); } void perf_counter_output(struct perf_counter *counter, -- cgit v1.2.3-70-g09d2 From ea5d20cf99db5d26d43b6d322d3ace17e08a6552 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 25 Mar 2009 12:30:25 +0100 Subject: perf_counter: optionally provide the pid/tid of the sampled task Allow cpu wide counters to profile userspace by providing what process the sample belongs to. This raises the first issue with the output type, lots of these options: group, tid, callchain, etc.. are non-exclusive and could be combined, suggesting a bitfield. However, things like the mmap() data stream doesn't fit in that. How to split the type field... Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Mike Galbraith Cc: Arjan van de Ven Cc: Wu Fengguang Orig-LKML-Reference: <20090325113317.013775235@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 5 ++++- kernel/perf_counter.c | 18 ++++++++++++++++-- 2 files changed, 20 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index c256635377d..7fdbdf8be77 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -127,8 +127,9 @@ struct perf_counter_hw_event { exclude_kernel : 1, /* ditto kernel */ exclude_hv : 1, /* ditto hypervisor */ exclude_idle : 1, /* don't count when idle */ + include_tid : 1, /* include the tid */ - __reserved_1 : 55; + __reserved_1 : 54; __u32 extra_config_len; __u32 __reserved_4; @@ -164,6 +165,8 @@ struct perf_event_header { enum perf_event_type { PERF_EVENT_IP = 0, PERF_EVENT_GROUP = 1, + + __PERF_EVENT_TID = 0x100, }; #ifdef __KERNEL__ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 7669afe82cc..f3e1b27bc1b 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1528,16 +1528,30 @@ out: static void perf_output_simple(struct perf_counter *counter, int nmi, struct pt_regs *regs) { + unsigned int size; struct { struct perf_event_header header; u64 ip; + u32 pid, tid; } event; event.header.type = PERF_EVENT_IP; - event.header.size = sizeof(event); event.ip = instruction_pointer(regs); - perf_output_write(counter, nmi, &event, sizeof(event)); + size = sizeof(event); + + if (counter->hw_event.include_tid) { + /* namespace issues */ + event.pid = current->group_leader->pid; + event.tid = current->pid; + + event.header.type |= __PERF_EVENT_TID; + } else + size -= sizeof(u64); + + event.header.size = size; + + perf_output_write(counter, nmi, &event, size); } static void perf_output_group(struct perf_counter *counter, int nmi) -- cgit v1.2.3-70-g09d2 From 53cfbf593758916aac41db728f029986a62f1254 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 25 Mar 2009 22:46:58 +1100 Subject: perf_counter: record time running and time enabled for each counter Impact: new functionality Currently, if there are more counters enabled than can fit on the CPU, the kernel will multiplex the counters on to the hardware using round-robin scheduling. That isn't too bad for sampling counters, but for counting counters it means that the value read from a counter represents some unknown fraction of the true count of events that occurred while the counter was enabled. This remedies the situation by keeping track of how long each counter is enabled for, and how long it is actually on the cpu and counting events. These times are recorded in nanoseconds using the task clock for per-task counters and the cpu clock for per-cpu counters. These values can be supplied to userspace on a read from the counter. Userspace requests that they be supplied after the counter value by setting the PERF_FORMAT_TOTAL_TIME_ENABLED and/or PERF_FORMAT_TOTAL_TIME_RUNNING bits in the hw_event.read_format field when creating the counter. (There is no way to change the read format after the counter is created, though it would be possible to add some way to do that.) Using this information it is possible for userspace to scale the count it reads from the counter to get an estimate of the true count: true_count_estimate = count * total_time_enabled / total_time_running This also lets userspace detect the situation where the counter never got to go on the cpu: total_time_running == 0. This functionality has been requested by the PAPI developers, and will be generally needed for interpreting the count values from counting counters correctly. In the implementation, this keeps 5 time values (in nanoseconds) for each counter: total_time_enabled and total_time_running are used when the counter is in state OFF or ERROR and for reporting back to userspace. When the counter is in state INACTIVE or ACTIVE, it is the tstamp_enabled, tstamp_running and tstamp_stopped values that are relevant, and total_time_enabled and total_time_running are determined from them. (tstamp_stopped is only used in INACTIVE state.) The reason for doing it like this is that it means that only counters being enabled or disabled at sched-in and sched-out time need to be updated. There are no new loops that iterate over all counters to update total_time_enabled or total_time_running. This also keeps separate child_total_time_running and child_total_time_enabled fields that get added in when reporting the totals to userspace. They are separate fields so that they can be atomic. We don't want to use atomics for total_time_running, total_time_enabled etc., because then we would have to use atomic sequences to update them, which are slower than regular arithmetic and memory accesses. It is possible to measure total_time_running by adding a task_clock counter to each group of counters, and total_time_enabled can be measured approximately with a top-level task_clock counter (though inaccuracies will creep in if you need to disable and enable groups since it is not possible in general to disable/enable the top-level task_clock counter simultaneously with another group). However, that adds extra overhead - I measured around 15% increase in the context switch latency reported by lat_ctx (from lmbench) when a task_clock counter was added to each of 2 groups, and around 25% increase when a task_clock counter was added to each of 4 groups. (In both cases a top-level task-clock counter was also added.) In contrast, the code added in this commit gives better information with no overhead that I could measure (in fact in some cases I measured lower times with this code, but the differences were all less than one standard deviation). [ v2: address review comments by Andrew Morton. ] Signed-off-by: Paul Mackerras Acked-by: Peter Zijlstra Cc: Andrew Morton Orig-LKML-Reference: <18890.6578.728637.139402@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 2 + include/linux/perf_counter.h | 53 +++++++++++++ kernel/perf_counter.c | 157 ++++++++++++++++++++++++++++++++----- 3 files changed, 191 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index d48596ab655..df007fe0cc0 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -455,6 +455,8 @@ static void counter_sched_in(struct perf_counter *counter, int cpu) { counter->state = PERF_COUNTER_STATE_ACTIVE; counter->oncpu = cpu; + counter->tstamp_running += counter->ctx->time_now - + counter->tstamp_stopped; if (is_software_counter(counter)) counter->hw_ops->enable(counter); } diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 7fdbdf8be77..6bf67ce1762 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -102,6 +102,16 @@ enum perf_counter_record_type { #define PERF_COUNTER_EVENT_SHIFT 0 #define PERF_COUNTER_EVENT_MASK __PERF_COUNTER_MASK(EVENT) +/* + * Bits that can be set in hw_event.read_format to request that + * reads on the counter should return the indicated quantities, + * in increasing order of bit value, after the counter value. + */ +enum perf_counter_read_format { + PERF_FORMAT_TOTAL_TIME_ENABLED = 1, + PERF_FORMAT_TOTAL_TIME_RUNNING = 2, +}; + /* * Hardware event to monitor via a performance monitoring counter: */ @@ -281,6 +291,32 @@ struct perf_counter { enum perf_counter_active_state prev_state; atomic64_t count; + /* + * These are the total time in nanoseconds that the counter + * has been enabled (i.e. eligible to run, and the task has + * been scheduled in, if this is a per-task counter) + * and running (scheduled onto the CPU), respectively. + * + * They are computed from tstamp_enabled, tstamp_running and + * tstamp_stopped when the counter is in INACTIVE or ACTIVE state. + */ + u64 total_time_enabled; + u64 total_time_running; + + /* + * These are timestamps used for computing total_time_enabled + * and total_time_running when the counter is in INACTIVE or + * ACTIVE state, measured in nanoseconds from an arbitrary point + * in time. + * tstamp_enabled: the notional time when the counter was enabled + * tstamp_running: the notional time when the counter was scheduled on + * tstamp_stopped: in INACTIVE state, the notional time when the + * counter was scheduled off. + */ + u64 tstamp_enabled; + u64 tstamp_running; + u64 tstamp_stopped; + struct perf_counter_hw_event hw_event; struct hw_perf_counter hw; @@ -291,6 +327,13 @@ struct perf_counter { struct perf_counter *parent; struct list_head child_list; + /* + * These accumulate total time (in nanoseconds) that children + * counters have been enabled and running, respectively. + */ + atomic64_t child_total_time_enabled; + atomic64_t child_total_time_running; + /* * Protect attach/detach and child_list: */ @@ -339,6 +382,16 @@ struct perf_counter_context { int nr_active; int is_active; struct task_struct *task; + + /* + * time_now is the current time in nanoseconds since an arbitrary + * point in the past. For per-task counters, this is based on the + * task clock, and for per-cpu counters it is based on the cpu clock. + * time_lost is an offset from the task/cpu clock, used to make it + * appear that time only passes while the context is scheduled in. + */ + u64 time_now; + u64 time_lost; #endif }; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 95e02575546..3b862a7988c 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -116,6 +116,7 @@ counter_sched_out(struct perf_counter *counter, return; counter->state = PERF_COUNTER_STATE_INACTIVE; + counter->tstamp_stopped = ctx->time_now; counter->hw_ops->disable(counter); counter->oncpu = -1; @@ -251,6 +252,60 @@ retry: spin_unlock_irq(&ctx->lock); } +/* + * Get the current time for this context. + * If this is a task context, we use the task's task clock, + * or for a per-cpu context, we use the cpu clock. + */ +static u64 get_context_time(struct perf_counter_context *ctx, int update) +{ + struct task_struct *curr = ctx->task; + + if (!curr) + return cpu_clock(smp_processor_id()); + + return __task_delta_exec(curr, update) + curr->se.sum_exec_runtime; +} + +/* + * Update the record of the current time in a context. + */ +static void update_context_time(struct perf_counter_context *ctx, int update) +{ + ctx->time_now = get_context_time(ctx, update) - ctx->time_lost; +} + +/* + * Update the total_time_enabled and total_time_running fields for a counter. + */ +static void update_counter_times(struct perf_counter *counter) +{ + struct perf_counter_context *ctx = counter->ctx; + u64 run_end; + + if (counter->state >= PERF_COUNTER_STATE_INACTIVE) { + counter->total_time_enabled = ctx->time_now - + counter->tstamp_enabled; + if (counter->state == PERF_COUNTER_STATE_INACTIVE) + run_end = counter->tstamp_stopped; + else + run_end = ctx->time_now; + counter->total_time_running = run_end - counter->tstamp_running; + } +} + +/* + * Update total_time_enabled and total_time_running for all counters in a group. + */ +static void update_group_times(struct perf_counter *leader) +{ + struct perf_counter *counter; + + update_counter_times(leader); + list_for_each_entry(counter, &leader->sibling_list, list_entry) + update_counter_times(counter); +} + /* * Cross CPU call to disable a performance counter */ @@ -276,6 +331,8 @@ static void __perf_counter_disable(void *info) * If it is in error state, leave it in error state. */ if (counter->state >= PERF_COUNTER_STATE_INACTIVE) { + update_context_time(ctx, 1); + update_counter_times(counter); if (counter == counter->group_leader) group_sched_out(counter, cpuctx, ctx); else @@ -320,8 +377,10 @@ static void perf_counter_disable(struct perf_counter *counter) * Since we have the lock this context can't be scheduled * in, so we can change the state safely. */ - if (counter->state == PERF_COUNTER_STATE_INACTIVE) + if (counter->state == PERF_COUNTER_STATE_INACTIVE) { + update_counter_times(counter); counter->state = PERF_COUNTER_STATE_OFF; + } spin_unlock_irq(&ctx->lock); } @@ -366,6 +425,8 @@ counter_sched_in(struct perf_counter *counter, return -EAGAIN; } + counter->tstamp_running += ctx->time_now - counter->tstamp_stopped; + if (!is_software_counter(counter)) cpuctx->active_oncpu++; ctx->nr_active++; @@ -425,6 +486,17 @@ static int group_can_go_on(struct perf_counter *counter, return can_add_hw; } +static void add_counter_to_ctx(struct perf_counter *counter, + struct perf_counter_context *ctx) +{ + list_add_counter(counter, ctx); + ctx->nr_counters++; + counter->prev_state = PERF_COUNTER_STATE_OFF; + counter->tstamp_enabled = ctx->time_now; + counter->tstamp_running = ctx->time_now; + counter->tstamp_stopped = ctx->time_now; +} + /* * Cross CPU call to install and enable a performance counter */ @@ -449,6 +521,7 @@ static void __perf_install_in_context(void *info) curr_rq_lock_irq_save(&flags); spin_lock(&ctx->lock); + update_context_time(ctx, 1); /* * Protect the list operation against NMI by disabling the @@ -456,9 +529,7 @@ static void __perf_install_in_context(void *info) */ perf_flags = hw_perf_save_disable(); - list_add_counter(counter, ctx); - ctx->nr_counters++; - counter->prev_state = PERF_COUNTER_STATE_OFF; + add_counter_to_ctx(counter, ctx); /* * Don't put the counter on if it is disabled or if @@ -486,8 +557,10 @@ static void __perf_install_in_context(void *info) */ if (leader != counter) group_sched_out(leader, cpuctx, ctx); - if (leader->hw_event.pinned) + if (leader->hw_event.pinned) { + update_group_times(leader); leader->state = PERF_COUNTER_STATE_ERROR; + } } if (!err && !ctx->task && cpuctx->max_pertask) @@ -548,10 +621,8 @@ retry: * can add the counter safely, if it the call above did not * succeed. */ - if (list_empty(&counter->list_entry)) { - list_add_counter(counter, ctx); - ctx->nr_counters++; - } + if (list_empty(&counter->list_entry)) + add_counter_to_ctx(counter, ctx); spin_unlock_irq(&ctx->lock); } @@ -576,11 +647,13 @@ static void __perf_counter_enable(void *info) curr_rq_lock_irq_save(&flags); spin_lock(&ctx->lock); + update_context_time(ctx, 1); counter->prev_state = counter->state; if (counter->state >= PERF_COUNTER_STATE_INACTIVE) goto unlock; counter->state = PERF_COUNTER_STATE_INACTIVE; + counter->tstamp_enabled = ctx->time_now - counter->total_time_enabled; /* * If the counter is in a group and isn't the group leader, @@ -602,8 +675,10 @@ static void __perf_counter_enable(void *info) */ if (leader != counter) group_sched_out(leader, cpuctx, ctx); - if (leader->hw_event.pinned) + if (leader->hw_event.pinned) { + update_group_times(leader); leader->state = PERF_COUNTER_STATE_ERROR; + } } unlock: @@ -659,8 +734,11 @@ static void perf_counter_enable(struct perf_counter *counter) * Since we have the lock this context can't be scheduled * in, so we can change the state safely. */ - if (counter->state == PERF_COUNTER_STATE_OFF) + if (counter->state == PERF_COUNTER_STATE_OFF) { counter->state = PERF_COUNTER_STATE_INACTIVE; + counter->tstamp_enabled = ctx->time_now - + counter->total_time_enabled; + } out: spin_unlock_irq(&ctx->lock); } @@ -693,6 +771,7 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx, ctx->is_active = 0; if (likely(!ctx->nr_counters)) goto out; + update_context_time(ctx, 0); flags = hw_perf_save_disable(); if (ctx->nr_active) { @@ -797,6 +876,13 @@ __perf_counter_sched_in(struct perf_counter_context *ctx, if (likely(!ctx->nr_counters)) goto out; + /* + * Add any time since the last sched_out to the lost time + * so it doesn't get included in the total_time_enabled and + * total_time_running measures for counters in the context. + */ + ctx->time_lost = get_context_time(ctx, 0) - ctx->time_now; + flags = hw_perf_save_disable(); /* @@ -817,8 +903,10 @@ __perf_counter_sched_in(struct perf_counter_context *ctx, * If this pinned group hasn't been scheduled, * put it in error state. */ - if (counter->state == PERF_COUNTER_STATE_INACTIVE) + if (counter->state == PERF_COUNTER_STATE_INACTIVE) { + update_group_times(counter); counter->state = PERF_COUNTER_STATE_ERROR; + } } list_for_each_entry(counter, &ctx->counter_list, list_entry) { @@ -902,8 +990,10 @@ int perf_counter_task_disable(void) perf_flags = hw_perf_save_disable(); list_for_each_entry(counter, &ctx->counter_list, list_entry) { - if (counter->state != PERF_COUNTER_STATE_ERROR) + if (counter->state != PERF_COUNTER_STATE_ERROR) { + update_group_times(counter); counter->state = PERF_COUNTER_STATE_OFF; + } } hw_perf_restore(perf_flags); @@ -946,6 +1036,8 @@ int perf_counter_task_enable(void) if (counter->state > PERF_COUNTER_STATE_OFF) continue; counter->state = PERF_COUNTER_STATE_INACTIVE; + counter->tstamp_enabled = ctx->time_now - + counter->total_time_enabled; counter->hw_event.disabled = 0; } hw_perf_restore(perf_flags); @@ -1009,10 +1101,14 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) static void __read(void *info) { struct perf_counter *counter = info; + struct perf_counter_context *ctx = counter->ctx; unsigned long flags; curr_rq_lock_irq_save(&flags); + if (ctx->is_active) + update_context_time(ctx, 1); counter->hw_ops->read(counter); + update_counter_times(counter); curr_rq_unlock_irq_restore(&flags); } @@ -1025,6 +1121,8 @@ static u64 perf_counter_read(struct perf_counter *counter) if (counter->state == PERF_COUNTER_STATE_ACTIVE) { smp_call_function_single(counter->oncpu, __read, counter, 1); + } else if (counter->state == PERF_COUNTER_STATE_INACTIVE) { + update_counter_times(counter); } return atomic64_read(&counter->count); @@ -1137,10 +1235,8 @@ static int perf_release(struct inode *inode, struct file *file) static ssize_t perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) { - u64 cntval; - - if (count < sizeof(cntval)) - return -EINVAL; + u64 values[3]; + int n; /* * Return end-of-file for a read on a counter that is in @@ -1151,10 +1247,24 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) return 0; mutex_lock(&counter->mutex); - cntval = perf_counter_read(counter); + values[0] = perf_counter_read(counter); + n = 1; + if (counter->hw_event.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + values[n++] = counter->total_time_enabled + + atomic64_read(&counter->child_total_time_enabled); + if (counter->hw_event.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + values[n++] = counter->total_time_running + + atomic64_read(&counter->child_total_time_running); mutex_unlock(&counter->mutex); - return put_user(cntval, (u64 __user *) buf) ? -EFAULT : sizeof(cntval); + if (count < n * sizeof(u64)) + return -EINVAL; + count = n * sizeof(u64); + + if (copy_to_user(buf, values, count)) + return -EFAULT; + + return count; } static ssize_t @@ -2290,8 +2400,7 @@ inherit_counter(struct perf_counter *parent_counter, * Link it up in the child's context: */ child_counter->task = child; - list_add_counter(child_counter, child_ctx); - child_ctx->nr_counters++; + add_counter_to_ctx(child_counter, child_ctx); child_counter->parent = parent_counter; /* @@ -2361,6 +2470,10 @@ static void sync_child_counter(struct perf_counter *child_counter, * Add back the child's count to the parent's count: */ atomic64_add(child_val, &parent_counter->count); + atomic64_add(child_counter->total_time_enabled, + &parent_counter->child_total_time_enabled); + atomic64_add(child_counter->total_time_running, + &parent_counter->child_total_time_running); /* * Remove this counter from the parent's list @@ -2395,6 +2508,7 @@ __perf_counter_exit_task(struct task_struct *child, if (child != current) { wait_task_inactive(child, 0); list_del_init(&child_counter->list_entry); + update_counter_times(child_counter); } else { struct perf_cpu_context *cpuctx; unsigned long flags; @@ -2412,6 +2526,7 @@ __perf_counter_exit_task(struct task_struct *child, cpuctx = &__get_cpu_var(perf_cpu_context); group_sched_out(child_counter, cpuctx, child_ctx); + update_counter_times(child_counter); list_del_init(&child_counter->list_entry); -- cgit v1.2.3-70-g09d2 From 925d519ab82b6dd7aca9420d809ee83819c08db2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 Mar 2009 19:07:02 +0200 Subject: perf_counter: unify and fix delayed counter wakeup While going over the wakeup code I noticed delayed wakeups only work for hardware counters but basically all software counters rely on them. This patch unifies and generalizes the delayed wakeup to fix this issue. Since we're dealing with NMI context bits here, use a cmpxchg() based single link list implementation to track counters that have pending wakeups. [ This should really be generic code for delayed wakeups, but since we cannot use cmpxchg()/xchg() in generic code, I've let it live in the perf_counter code. -- Eric Dumazet could use it to aggregate the network wakeups. ] Furthermore, the x86 method of using TIF flags was flawed in that its quite possible to end up setting the bit on the idle task, loosing the wakeup. The powerpc method uses per-cpu storage and does appear to be sufficient. Signed-off-by: Peter Zijlstra Acked-by: Paul Mackerras Orig-LKML-Reference: <20090330171023.153932974@chello.nl> Signed-off-by: Ingo Molnar --- arch/powerpc/include/asm/hw_irq.h | 4 +- arch/powerpc/kernel/irq.c | 2 +- arch/powerpc/kernel/perf_counter.c | 22 +------ arch/x86/include/asm/perf_counter.h | 5 +- arch/x86/include/asm/thread_info.h | 4 +- arch/x86/kernel/cpu/perf_counter.c | 29 -------- arch/x86/kernel/signal.c | 6 -- include/linux/perf_counter.h | 15 +++-- kernel/perf_counter.c | 128 +++++++++++++++++++++++++++++++++--- kernel/timer.c | 3 + 10 files changed, 142 insertions(+), 76 deletions(-) (limited to 'include') diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index cb32d571c9c..20a44d0c9fd 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -132,7 +132,7 @@ static inline int irqs_disabled_flags(unsigned long flags) struct irq_chip; #ifdef CONFIG_PERF_COUNTERS -static inline unsigned long get_perf_counter_pending(void) +static inline unsigned long test_perf_counter_pending(void) { unsigned long x; @@ -160,7 +160,7 @@ extern void perf_counter_do_pending(void); #else -static inline unsigned long get_perf_counter_pending(void) +static inline unsigned long test_perf_counter_pending(void) { return 0; } diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 469e9635ff0..2cd471f92fe 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -135,7 +135,7 @@ notrace void raw_local_irq_restore(unsigned long en) iseries_handle_interrupts(); } - if (get_perf_counter_pending()) { + if (test_perf_counter_pending()) { clear_perf_counter_pending(); perf_counter_do_pending(); } diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index df007fe0cc0..cde720fc495 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -649,24 +649,6 @@ hw_perf_counter_init(struct perf_counter *counter) return &power_perf_ops; } -/* - * Handle wakeups. - */ -void perf_counter_do_pending(void) -{ - int i; - struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters); - struct perf_counter *counter; - - for (i = 0; i < cpuhw->n_counters; ++i) { - counter = cpuhw->counter[i]; - if (counter && counter->wakeup_pending) { - counter->wakeup_pending = 0; - wake_up(&counter->waitq); - } - } -} - /* * A counter has overflowed; update its count and record * things if requested. Note that interrupts are hard-disabled @@ -720,7 +702,7 @@ static void perf_counter_interrupt(struct pt_regs *regs) struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters); struct perf_counter *counter; long val; - int need_wakeup = 0, found = 0; + int found = 0; for (i = 0; i < cpuhw->n_counters; ++i) { counter = cpuhw->counter[i]; @@ -761,7 +743,7 @@ static void perf_counter_interrupt(struct pt_regs *regs) * immediately; otherwise we'll have do the wakeup when interrupts * get soft-enabled. */ - if (get_perf_counter_pending() && regs->softe) { + if (test_perf_counter_pending() && regs->softe) { irq_enter(); clear_perf_counter_pending(); perf_counter_do_pending(); diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index 1662043b340..e2b0e66b235 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -84,8 +84,9 @@ union cpuid10_edx { #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) -#define set_perf_counter_pending() \ - set_tsk_thread_flag(current, TIF_PERF_COUNTERS); +#define set_perf_counter_pending() do { } while (0) +#define clear_perf_counter_pending() do { } while (0) +#define test_perf_counter_pending() (0) #ifdef CONFIG_PERF_COUNTERS extern void init_hw_perf_counters(void); diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 3ffd5d2a367..8820a73ae09 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -83,7 +83,6 @@ struct thread_info { #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ -#define TIF_PERF_COUNTERS 11 /* notify perf counter work */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* 32bit process */ #define TIF_FORK 18 /* ret_from_fork */ @@ -107,7 +106,6 @@ struct thread_info { #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) -#define _TIF_PERF_COUNTERS (1 << TIF_PERF_COUNTERS) #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_FORK (1 << TIF_FORK) @@ -141,7 +139,7 @@ struct thread_info { /* Only used for 64 bit */ #define _TIF_DO_NOTIFY_MASK \ - (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_PERF_COUNTERS|_TIF_NOTIFY_RESUME) + (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME) /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 3f95b0cdc55..7aab177fb56 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -227,7 +227,6 @@ static int __hw_perf_counter_init(struct perf_counter *counter) */ hwc->config |= pmc_ops->event_map(perf_event_id(hw_event)); } - counter->wakeup_pending = 0; return 0; } @@ -773,34 +772,6 @@ void smp_perf_counter_interrupt(struct pt_regs *regs) irq_exit(); } -/* - * This handler is triggered by NMI contexts: - */ -void perf_counter_notify(struct pt_regs *regs) -{ - struct cpu_hw_counters *cpuc; - unsigned long flags; - int bit, cpu; - - local_irq_save(flags); - cpu = smp_processor_id(); - cpuc = &per_cpu(cpu_hw_counters, cpu); - - for_each_bit(bit, cpuc->used, X86_PMC_IDX_MAX) { - struct perf_counter *counter = cpuc->counters[bit]; - - if (!counter) - continue; - - if (counter->wakeup_pending) { - counter->wakeup_pending = 0; - wake_up(&counter->waitq); - } - } - - local_irq_restore(flags); -} - void perf_counters_lapic_init(int nmi) { u32 apic_val; diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 611615a92c9..0a813b17b17 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -6,7 +6,6 @@ * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes * 2000-2002 x86-64 support by Andi Kleen */ -#include #include #include #include @@ -872,11 +871,6 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) tracehook_notify_resume(regs); } - if (thread_info_flags & _TIF_PERF_COUNTERS) { - clear_thread_flag(TIF_PERF_COUNTERS); - perf_counter_notify(regs); - } - #ifdef CONFIG_X86_32 clear_thread_flag(TIF_IRET); #endif /* CONFIG_X86_32 */ diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 6bf67ce1762..0d833228eee 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -275,6 +275,10 @@ struct perf_mmap_data { void *data_pages[0]; }; +struct perf_wakeup_entry { + struct perf_wakeup_entry *next; +}; + /** * struct perf_counter - performance counter kernel representation: */ @@ -350,7 +354,7 @@ struct perf_counter { /* poll related */ wait_queue_head_t waitq; /* optional: for NMIs */ - int wakeup_pending; + struct perf_wakeup_entry wakeup; void (*destroy)(struct perf_counter *); struct rcu_head rcu_head; @@ -427,7 +431,7 @@ extern void perf_counter_task_sched_out(struct task_struct *task, int cpu); extern void perf_counter_task_tick(struct task_struct *task, int cpu); extern void perf_counter_init_task(struct task_struct *child); extern void perf_counter_exit_task(struct task_struct *child); -extern void perf_counter_notify(struct pt_regs *regs); +extern void perf_counter_do_pending(void); extern void perf_counter_print_debug(void); extern void perf_counter_unthrottle(void); extern u64 hw_perf_save_disable(void); @@ -461,7 +465,7 @@ static inline void perf_counter_task_tick(struct task_struct *task, int cpu) { } static inline void perf_counter_init_task(struct task_struct *child) { } static inline void perf_counter_exit_task(struct task_struct *child) { } -static inline void perf_counter_notify(struct pt_regs *regs) { } +static inline void perf_counter_do_pending(void) { } static inline void perf_counter_print_debug(void) { } static inline void perf_counter_unthrottle(void) { } static inline void hw_perf_restore(u64 ctrl) { } @@ -469,8 +473,9 @@ static inline u64 hw_perf_save_disable(void) { return 0; } static inline int perf_counter_task_disable(void) { return -EINVAL; } static inline int perf_counter_task_enable(void) { return -EINVAL; } -static inline void perf_swcounter_event(u32 event, u64 nr, - int nmi, struct pt_regs *regs) { } +static inline void +perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs) { } + #endif #endif /* __KERNEL__ */ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 3b862a7988c..f70ff80e79d 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1197,8 +1197,12 @@ static void free_counter_rcu(struct rcu_head *head) kfree(counter); } +static void perf_pending_sync(struct perf_counter *counter); + static void free_counter(struct perf_counter *counter) { + perf_pending_sync(counter); + if (counter->destroy) counter->destroy(counter); @@ -1528,6 +1532,118 @@ static const struct file_operations perf_fops = { .mmap = perf_mmap, }; +/* + * Perf counter wakeup + * + * If there's data, ensure we set the poll() state and publish everything + * to user-space before waking everybody up. + */ + +void perf_counter_wakeup(struct perf_counter *counter) +{ + struct perf_mmap_data *data; + + rcu_read_lock(); + data = rcu_dereference(counter->data); + if (data) { + (void)atomic_xchg(&data->wakeup, POLL_IN); + __perf_counter_update_userpage(counter, data); + } + rcu_read_unlock(); + + wake_up_all(&counter->waitq); +} + +/* + * Pending wakeups + * + * Handle the case where we need to wakeup up from NMI (or rq->lock) context. + * + * The NMI bit means we cannot possibly take locks. Therefore, maintain a + * single linked list and use cmpxchg() to add entries lockless. + */ + +#define PENDING_TAIL ((struct perf_wakeup_entry *)-1UL) + +static DEFINE_PER_CPU(struct perf_wakeup_entry *, perf_wakeup_head) = { + PENDING_TAIL, +}; + +static void perf_pending_queue(struct perf_counter *counter) +{ + struct perf_wakeup_entry **head; + struct perf_wakeup_entry *prev, *next; + + if (cmpxchg(&counter->wakeup.next, NULL, PENDING_TAIL) != NULL) + return; + + head = &get_cpu_var(perf_wakeup_head); + + do { + prev = counter->wakeup.next = *head; + next = &counter->wakeup; + } while (cmpxchg(head, prev, next) != prev); + + set_perf_counter_pending(); + + put_cpu_var(perf_wakeup_head); +} + +static int __perf_pending_run(void) +{ + struct perf_wakeup_entry *list; + int nr = 0; + + list = xchg(&__get_cpu_var(perf_wakeup_head), PENDING_TAIL); + while (list != PENDING_TAIL) { + struct perf_counter *counter = container_of(list, + struct perf_counter, wakeup); + + list = list->next; + + counter->wakeup.next = NULL; + /* + * Ensure we observe the unqueue before we issue the wakeup, + * so that we won't be waiting forever. + * -- see perf_not_pending(). + */ + smp_wmb(); + + perf_counter_wakeup(counter); + nr++; + } + + return nr; +} + +static inline int perf_not_pending(struct perf_counter *counter) +{ + /* + * If we flush on whatever cpu we run, there is a chance we don't + * need to wait. + */ + get_cpu(); + __perf_pending_run(); + put_cpu(); + + /* + * Ensure we see the proper queue state before going to sleep + * so that we do not miss the wakeup. -- see perf_pending_handle() + */ + smp_rmb(); + return counter->wakeup.next == NULL; +} + +static void perf_pending_sync(struct perf_counter *counter) +{ + wait_event(counter->waitq, perf_not_pending(counter)); +} + +void perf_counter_do_pending(void) +{ + __perf_pending_run(); +} + /* * Output */ @@ -1611,13 +1727,10 @@ static void perf_output_copy(struct perf_output_handle *handle, static void perf_output_end(struct perf_output_handle *handle, int nmi) { if (handle->wakeup) { - (void)atomic_xchg(&handle->data->wakeup, POLL_IN); - __perf_counter_update_userpage(handle->counter, handle->data); - if (nmi) { - handle->counter->wakeup_pending = 1; - set_perf_counter_pending(); - } else - wake_up(&handle->counter->waitq); + if (nmi) + perf_pending_queue(handle->counter); + else + perf_counter_wakeup(handle->counter); } rcu_read_unlock(); } @@ -2211,7 +2324,6 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, counter->cpu = cpu; counter->hw_event = *hw_event; - counter->wakeup_pending = 0; counter->group_leader = group_leader; counter->hw_ops = NULL; counter->ctx = ctx; diff --git a/kernel/timer.c b/kernel/timer.c index b4555568b4e..672ca25fbc4 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -1167,6 +1168,8 @@ static void run_timer_softirq(struct softirq_action *h) { struct tvec_base *base = __get_cpu_var(tvec_bases); + perf_counter_do_pending(); + hrtimer_run_pending(); if (time_after_eq(jiffies, base->timer_jiffies)) -- cgit v1.2.3-70-g09d2 From 38ff667b321b00f5e6830e93fb4ab11a653a2920 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 Mar 2009 19:07:03 +0200 Subject: perf_counter: fix update_userpage() It just occured to me it is possible to have multiple contending updates of the userpage (mmap information vs overflow vs counter). This would break the seqlock logic. It appear the arch code uses this from NMI context, so we cannot possibly serialize its use, therefore separate the data_head update from it and let it return to its original use. The arch code needs to make sure there are no contending callers by disabling the counter before using it -- powerpc appears to do this nicely. Signed-off-by: Peter Zijlstra Acked-by: Paul Mackerras Orig-LKML-Reference: <20090330171023.241410660@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 35 +++++++++++++++++++++++++++++++++++ kernel/perf_counter.c | 38 +++++++++++++++++++++++--------------- 2 files changed, 58 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 0d833228eee..8ac18852dcf 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -160,10 +160,45 @@ struct perf_counter_hw_event { struct perf_counter_mmap_page { __u32 version; /* version number of this structure */ __u32 compat_version; /* lowest version this is compat with */ + + /* + * Bits needed to read the hw counters in user-space. + * + * The index and offset should be read atomically using the seqlock: + * + * __u32 seq, index; + * __s64 offset; + * + * again: + * rmb(); + * seq = pc->lock; + * + * if (unlikely(seq & 1)) { + * cpu_relax(); + * goto again; + * } + * + * index = pc->index; + * offset = pc->offset; + * + * rmb(); + * if (pc->lock != seq) + * goto again; + * + * After this, index contains architecture specific counter index + 1, + * so that 0 means unavailable, offset contains the value to be added + * to the result of the raw timer read to obtain this counter's value. + */ __u32 lock; /* seqlock for synchronization */ __u32 index; /* hardware counter identifier */ __s64 offset; /* add to hardware counter value */ + /* + * Control data for the mmap() data buffer. + * + * User-space reading this value should issue an rmb(), on SMP capable + * platforms, after reading this value -- see perf_counter_wakeup(). + */ __u32 data_head; /* head in the data section */ }; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index f70ff80e79d..c95e92329b9 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1316,10 +1316,22 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return err; } -static void __perf_counter_update_userpage(struct perf_counter *counter, - struct perf_mmap_data *data) +/* + * Callers need to ensure there can be no nesting of this function, otherwise + * the seqlock logic goes bad. We can not serialize this because the arch + * code calls this from NMI context. + */ +void perf_counter_update_userpage(struct perf_counter *counter) { - struct perf_counter_mmap_page *userpg = data->user_page; + struct perf_mmap_data *data; + struct perf_counter_mmap_page *userpg; + + rcu_read_lock(); + data = rcu_dereference(counter->data); + if (!data) + goto unlock; + + userpg = data->user_page; /* * Disable preemption so as to not let the corresponding user-space @@ -1333,20 +1345,10 @@ static void __perf_counter_update_userpage(struct perf_counter *counter, if (counter->state == PERF_COUNTER_STATE_ACTIVE) userpg->offset -= atomic64_read(&counter->hw.prev_count); - userpg->data_head = atomic_read(&data->head); smp_wmb(); ++userpg->lock; preempt_enable(); -} - -void perf_counter_update_userpage(struct perf_counter *counter) -{ - struct perf_mmap_data *data; - - rcu_read_lock(); - data = rcu_dereference(counter->data); - if (data) - __perf_counter_update_userpage(counter, data); +unlock: rcu_read_unlock(); } @@ -1547,7 +1549,13 @@ void perf_counter_wakeup(struct perf_counter *counter) data = rcu_dereference(counter->data); if (data) { (void)atomic_xchg(&data->wakeup, POLL_IN); - __perf_counter_update_userpage(counter, data); + /* + * Ensure all data writes are issued before updating the + * user-space data head information. The matching rmb() + * will be in userspace after reading this value. + */ + smp_wmb(); + data->user_page->data_head = atomic_read(&data->head); } rcu_read_unlock(); -- cgit v1.2.3-70-g09d2 From 0a4a93919bdc5cee48fe4367591e8e0449c1086c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 Mar 2009 19:07:05 +0200 Subject: perf_counter: executable mmap() information Currently the profiling information returns userspace IPs but no way to correlate them to userspace code. Userspace could look into /proc/$pid/maps but that might not be current or even present anymore at the time of analyzing the IPs. Therefore provide means to track the mmap information and provide it in the output stream. XXX: only covers mmap()/munmap(), mremap() and mprotect() are missing. Signed-off-by: Peter Zijlstra Acked-by: Paul Mackerras Cc: Andrew Morton Orig-LKML-Reference: <20090330171023.417259499@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 24 ++++++- kernel/perf_counter.c | 145 +++++++++++++++++++++++++++++++++++++++++++ mm/mmap.c | 10 +++ 3 files changed, 177 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 8ac18852dcf..037a81145ac 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -137,9 +137,11 @@ struct perf_counter_hw_event { exclude_kernel : 1, /* ditto kernel */ exclude_hv : 1, /* ditto hypervisor */ exclude_idle : 1, /* don't count when idle */ - include_tid : 1, /* include the tid */ + include_tid : 1, /* include the tid */ + mmap : 1, /* include mmap data */ + munmap : 1, /* include munmap data */ - __reserved_1 : 54; + __reserved_1 : 52; __u32 extra_config_len; __u32 __reserved_4; @@ -211,6 +213,9 @@ enum perf_event_type { PERF_EVENT_IP = 0, PERF_EVENT_GROUP = 1, + PERF_EVENT_MMAP = 2, + PERF_EVENT_MUNMAP = 3, + __PERF_EVENT_TID = 0x100, }; @@ -491,6 +496,12 @@ static inline int is_software_counter(struct perf_counter *counter) extern void perf_swcounter_event(u32, u64, int, struct pt_regs *); +extern void perf_counter_mmap(unsigned long addr, unsigned long len, + unsigned long pgoff, struct file *file); + +extern void perf_counter_munmap(unsigned long addr, unsigned long len, + unsigned long pgoff, struct file *file); + #else static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } @@ -511,6 +522,15 @@ static inline int perf_counter_task_enable(void) { return -EINVAL; } static inline void perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs) { } + +static inline void +perf_counter_mmap(unsigned long addr, unsigned long len, + unsigned long pgoff, struct file *file) { } + +static inline void +perf_counter_munmap(unsigned long addr, unsigned long len, + unsigned long pgoff, struct file *file) { } + #endif #endif /* __KERNEL__ */ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index c95e92329b9..f35e89e3d6a 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -1843,6 +1844,150 @@ void perf_counter_output(struct perf_counter *counter, } } +/* + * mmap tracking + */ + +struct perf_mmap_event { + struct file *file; + char *file_name; + int file_size; + + struct { + struct perf_event_header header; + + u32 pid; + u32 tid; + u64 start; + u64 len; + u64 pgoff; + } event; +}; + +static void perf_counter_mmap_output(struct perf_counter *counter, + struct perf_mmap_event *mmap_event) +{ + struct perf_output_handle handle; + int size = mmap_event->event.header.size; + int ret = perf_output_begin(&handle, counter, size); + + if (ret) + return; + + perf_output_put(&handle, mmap_event->event); + perf_output_copy(&handle, mmap_event->file_name, + mmap_event->file_size); + perf_output_end(&handle, 0); +} + +static int perf_counter_mmap_match(struct perf_counter *counter, + struct perf_mmap_event *mmap_event) +{ + if (counter->hw_event.mmap && + mmap_event->event.header.type == PERF_EVENT_MMAP) + return 1; + + if (counter->hw_event.munmap && + mmap_event->event.header.type == PERF_EVENT_MUNMAP) + return 1; + + return 0; +} + +static void perf_counter_mmap_ctx(struct perf_counter_context *ctx, + struct perf_mmap_event *mmap_event) +{ + struct perf_counter *counter; + + if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) + return; + + rcu_read_lock(); + list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { + if (perf_counter_mmap_match(counter, mmap_event)) + perf_counter_mmap_output(counter, mmap_event); + } + rcu_read_unlock(); +} + +static void perf_counter_mmap_event(struct perf_mmap_event *mmap_event) +{ + struct perf_cpu_context *cpuctx; + struct file *file = mmap_event->file; + unsigned int size; + char tmp[16]; + char *buf = NULL; + char *name; + + if (file) { + buf = kzalloc(PATH_MAX, GFP_KERNEL); + if (!buf) { + name = strncpy(tmp, "//enomem", sizeof(tmp)); + goto got_name; + } + name = dentry_path(file->f_dentry, buf, PATH_MAX); + if (IS_ERR(name)) { + name = strncpy(tmp, "//toolong", sizeof(tmp)); + goto got_name; + } + } else { + name = strncpy(tmp, "//anon", sizeof(tmp)); + goto got_name; + } + +got_name: + size = ALIGN(strlen(name), sizeof(u64)); + + mmap_event->file_name = name; + mmap_event->file_size = size; + + mmap_event->event.header.size = sizeof(mmap_event->event) + size; + + cpuctx = &get_cpu_var(perf_cpu_context); + perf_counter_mmap_ctx(&cpuctx->ctx, mmap_event); + put_cpu_var(perf_cpu_context); + + perf_counter_mmap_ctx(¤t->perf_counter_ctx, mmap_event); + + kfree(buf); +} + +void perf_counter_mmap(unsigned long addr, unsigned long len, + unsigned long pgoff, struct file *file) +{ + struct perf_mmap_event mmap_event = { + .file = file, + .event = { + .header = { .type = PERF_EVENT_MMAP, }, + .pid = current->group_leader->pid, + .tid = current->pid, + .start = addr, + .len = len, + .pgoff = pgoff, + }, + }; + + perf_counter_mmap_event(&mmap_event); +} + +void perf_counter_munmap(unsigned long addr, unsigned long len, + unsigned long pgoff, struct file *file) +{ + struct perf_mmap_event mmap_event = { + .file = file, + .event = { + .header = { .type = PERF_EVENT_MUNMAP, }, + .pid = current->group_leader->pid, + .tid = current->pid, + .start = addr, + .len = len, + .pgoff = pgoff, + }, + }; + + perf_counter_mmap_event(&mmap_event); +} + /* * Generic software counter infrastructure */ diff --git a/mm/mmap.c b/mm/mmap.c index 4a3841186c1..1df63f614f9 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -1223,6 +1224,9 @@ munmap_back: if (correct_wcount) atomic_inc(&inode->i_writecount); out: + if (vm_flags & VM_EXEC) + perf_counter_mmap(addr, len, pgoff, file); + mm->total_vm += len >> PAGE_SHIFT; vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT); if (vm_flags & VM_LOCKED) { @@ -1756,6 +1760,12 @@ static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma) do { long nrpages = vma_pages(vma); + if (vma->vm_flags & VM_EXEC) { + perf_counter_munmap(vma->vm_start, + nrpages << PAGE_SHIFT, + vma->vm_pgoff, vma->vm_file); + } + mm->total_vm -= nrpages; vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages); vma = remove_vma(vma); -- cgit v1.2.3-70-g09d2 From 5ed00415e304203a0a9dcaef226d6d3f1106070e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 Mar 2009 19:07:12 +0200 Subject: perf_counter: re-arrange the perf_event_type Breaks ABI yet again :-) Change the event type so that [0, 2^31-1] are regular event types, but [2^31, 2^32-1] forms a bitmask for overflow events. Signed-off-by: Peter Zijlstra Acked-by: Paul Mackerras Orig-LKML-Reference: <20090330171024.047961770@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 6 +++-- kernel/perf_counter.c | 56 ++++++++++++++++++++------------------------ 2 files changed, 29 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 037a81145ac..edf5bfb7ff5 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -210,13 +210,15 @@ struct perf_event_header { }; enum perf_event_type { - PERF_EVENT_IP = 0, + PERF_EVENT_GROUP = 1, PERF_EVENT_MMAP = 2, PERF_EVENT_MUNMAP = 3, - __PERF_EVENT_TID = 0x100, + PERF_EVENT_OVERFLOW = 1UL << 31, + __PERF_EVENT_IP = 1UL << 30, + __PERF_EVENT_TID = 1UL << 29, }; #ifdef __KERNEL__ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 4471e7e2c10..d93e9ddf784 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1754,50 +1754,44 @@ static void perf_output_end(struct perf_output_handle *handle) rcu_read_unlock(); } -static int perf_output_write(struct perf_counter *counter, int nmi, - void *buf, ssize_t size) -{ - struct perf_output_handle handle; - int ret; - - ret = perf_output_begin(&handle, counter, size, nmi); - if (ret) - goto out; - - perf_output_copy(&handle, buf, size); - perf_output_end(&handle); - -out: - return ret; -} - static void perf_output_simple(struct perf_counter *counter, int nmi, struct pt_regs *regs) { - unsigned int size; + int ret; + struct perf_output_handle handle; + struct perf_event_header header; + u64 ip; struct { - struct perf_event_header header; - u64 ip; u32 pid, tid; - } event; + } tid_entry; - event.header.type = PERF_EVENT_IP; - event.ip = instruction_pointer(regs); + header.type = PERF_EVENT_OVERFLOW; + header.size = sizeof(header); - size = sizeof(event); + ip = instruction_pointer(regs); + header.type |= __PERF_EVENT_IP; + header.size += sizeof(ip); if (counter->hw_event.include_tid) { /* namespace issues */ - event.pid = current->group_leader->pid; - event.tid = current->pid; + tid_entry.pid = current->group_leader->pid; + tid_entry.tid = current->pid; - event.header.type |= __PERF_EVENT_TID; - } else - size -= sizeof(u64); + header.type |= __PERF_EVENT_TID; + header.size += sizeof(tid_entry); + } - event.header.size = size; + ret = perf_output_begin(&handle, counter, header.size, nmi); + if (ret) + return; + + perf_output_put(&handle, header); + perf_output_put(&handle, ip); + + if (counter->hw_event.include_tid) + perf_output_put(&handle, tid_entry); - perf_output_write(counter, nmi, &event, size); + perf_output_end(&handle); } static void perf_output_group(struct perf_counter *counter, int nmi) -- cgit v1.2.3-70-g09d2 From 394ee07623cf556c8daae2b3c00cf5fea47f0811 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 Mar 2009 19:07:14 +0200 Subject: perf_counter: provide generic callchain bits Provide the generic callchain support bits. If hw_event->callchain is set the arch specific perf_callchain() function is called upon to provide a perf_callchain_entry structure filled with the current callchain. If it does so, it is added to the overflow output event. Signed-off-by: Peter Zijlstra Acked-by: Paul Mackerras Orig-LKML-Reference: <20090330171024.254266860@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 13 ++++++++++++- kernel/perf_counter.c | 27 +++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index edf5bfb7ff5..43083afffe0 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -140,8 +140,9 @@ struct perf_counter_hw_event { include_tid : 1, /* include the tid */ mmap : 1, /* include mmap data */ munmap : 1, /* include munmap data */ + callchain : 1, /* add callchain data */ - __reserved_1 : 52; + __reserved_1 : 51; __u32 extra_config_len; __u32 __reserved_4; @@ -219,6 +220,7 @@ enum perf_event_type { PERF_EVENT_OVERFLOW = 1UL << 31, __PERF_EVENT_IP = 1UL << 30, __PERF_EVENT_TID = 1UL << 29, + __PERF_EVENT_CALLCHAIN = 1UL << 28, }; #ifdef __KERNEL__ @@ -504,6 +506,15 @@ extern void perf_counter_mmap(unsigned long addr, unsigned long len, extern void perf_counter_munmap(unsigned long addr, unsigned long len, unsigned long pgoff, struct file *file); +#define MAX_STACK_DEPTH 255 + +struct perf_callchain_entry { + u64 nr; + u64 ip[MAX_STACK_DEPTH]; +}; + +extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); + #else static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index d93e9ddf784..860cdc26bd7 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1653,6 +1653,17 @@ void perf_counter_do_pending(void) __perf_pending_run(); } +/* + * Callchain support -- arch specific + */ + +struct perf_callchain_entry * +__attribute__((weak)) +perf_callchain(struct pt_regs *regs) +{ + return NULL; +} + /* * Output */ @@ -1764,6 +1775,8 @@ static void perf_output_simple(struct perf_counter *counter, struct { u32 pid, tid; } tid_entry; + struct perf_callchain_entry *callchain = NULL; + int callchain_size = 0; header.type = PERF_EVENT_OVERFLOW; header.size = sizeof(header); @@ -1781,6 +1794,17 @@ static void perf_output_simple(struct perf_counter *counter, header.size += sizeof(tid_entry); } + if (counter->hw_event.callchain) { + callchain = perf_callchain(regs); + + if (callchain) { + callchain_size = (1 + callchain->nr) * sizeof(u64); + + header.type |= __PERF_EVENT_CALLCHAIN; + header.size += callchain_size; + } + } + ret = perf_output_begin(&handle, counter, header.size, nmi); if (ret) return; @@ -1791,6 +1815,9 @@ static void perf_output_simple(struct perf_counter *counter, if (counter->hw_event.include_tid) perf_output_put(&handle, tid_entry); + if (callchain) + perf_output_copy(&handle, callchain, callchain_size); + perf_output_end(&handle); } -- cgit v1.2.3-70-g09d2 From 8a057d84912f36e53f970c4d177cb4bb6b2f9e08 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 2 Apr 2009 11:11:59 +0200 Subject: perf_counter: move the event overflow output bits to record_type Per suggestion from Paul, move the event overflow bits to record_type and sanitize the enums a bit. Breaks the ABI -- again ;-) Suggested-by: Paul Mackerras Signed-off-by: Peter Zijlstra Cc: Corey Ashford Orig-LKML-Reference: <20090402091319.151921176@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 50 +++++++++++---------- kernel/perf_counter.c | 101 +++++++++++++++++-------------------------- 2 files changed, 68 insertions(+), 83 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 43083afffe0..06a6fba9f53 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -73,15 +73,6 @@ enum sw_event_ids { PERF_SW_EVENTS_MAX = 7, }; -/* - * IRQ-notification data record type: - */ -enum perf_counter_record_type { - PERF_RECORD_SIMPLE = 0, - PERF_RECORD_IRQ = 1, - PERF_RECORD_GROUP = 2, -}; - #define __PERF_COUNTER_MASK(name) \ (((1ULL << PERF_COUNTER_##name##_BITS) - 1) << \ PERF_COUNTER_##name##_SHIFT) @@ -102,6 +93,17 @@ enum perf_counter_record_type { #define PERF_COUNTER_EVENT_SHIFT 0 #define PERF_COUNTER_EVENT_MASK __PERF_COUNTER_MASK(EVENT) +/* + * Bits that can be set in hw_event.record_type to request information + * in the overflow packets. + */ +enum perf_counter_record_format { + PERF_RECORD_IP = 1U << 0, + PERF_RECORD_TID = 1U << 1, + PERF_RECORD_GROUP = 1U << 2, + PERF_RECORD_CALLCHAIN = 1U << 3, +}; + /* * Bits that can be set in hw_event.read_format to request that * reads on the counter should return the indicated quantities, @@ -125,8 +127,8 @@ struct perf_counter_hw_event { __u64 config; __u64 irq_period; - __u64 record_type; - __u64 read_format; + __u32 record_type; + __u32 read_format; __u64 disabled : 1, /* off by default */ nmi : 1, /* NMI sampling */ @@ -137,12 +139,10 @@ struct perf_counter_hw_event { exclude_kernel : 1, /* ditto kernel */ exclude_hv : 1, /* ditto hypervisor */ exclude_idle : 1, /* don't count when idle */ - include_tid : 1, /* include the tid */ mmap : 1, /* include mmap data */ munmap : 1, /* include munmap data */ - callchain : 1, /* add callchain data */ - __reserved_1 : 51; + __reserved_1 : 53; __u32 extra_config_len; __u32 __reserved_4; @@ -212,15 +212,21 @@ struct perf_event_header { enum perf_event_type { - PERF_EVENT_GROUP = 1, - - PERF_EVENT_MMAP = 2, - PERF_EVENT_MUNMAP = 3, + PERF_EVENT_MMAP = 1, + PERF_EVENT_MUNMAP = 2, - PERF_EVENT_OVERFLOW = 1UL << 31, - __PERF_EVENT_IP = 1UL << 30, - __PERF_EVENT_TID = 1UL << 29, - __PERF_EVENT_CALLCHAIN = 1UL << 28, + /* + * Half the event type space is reserved for the counter overflow + * bitfields, as found in hw_event.record_type. + * + * These events will have types of the form: + * PERF_EVENT_COUNTER_OVERFLOW { | __PERF_EVENT_* } * + */ + PERF_EVENT_COUNTER_OVERFLOW = 1UL << 31, + __PERF_EVENT_IP = PERF_RECORD_IP, + __PERF_EVENT_TID = PERF_RECORD_TID, + __PERF_EVENT_GROUP = PERF_RECORD_GROUP, + __PERF_EVENT_CALLCHAIN = PERF_RECORD_CALLCHAIN, }; #ifdef __KERNEL__ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 860cdc26bd7..995063df910 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1765,27 +1765,34 @@ static void perf_output_end(struct perf_output_handle *handle) rcu_read_unlock(); } -static void perf_output_simple(struct perf_counter *counter, - int nmi, struct pt_regs *regs) +void perf_counter_output(struct perf_counter *counter, + int nmi, struct pt_regs *regs) { int ret; + u64 record_type = counter->hw_event.record_type; struct perf_output_handle handle; struct perf_event_header header; u64 ip; struct { u32 pid, tid; } tid_entry; + struct { + u64 event; + u64 counter; + } group_entry; struct perf_callchain_entry *callchain = NULL; int callchain_size = 0; - header.type = PERF_EVENT_OVERFLOW; + header.type = PERF_EVENT_COUNTER_OVERFLOW; header.size = sizeof(header); - ip = instruction_pointer(regs); - header.type |= __PERF_EVENT_IP; - header.size += sizeof(ip); + if (record_type & PERF_RECORD_IP) { + ip = instruction_pointer(regs); + header.type |= __PERF_EVENT_IP; + header.size += sizeof(ip); + } - if (counter->hw_event.include_tid) { + if (record_type & PERF_RECORD_TID) { /* namespace issues */ tid_entry.pid = current->group_leader->pid; tid_entry.tid = current->pid; @@ -1794,7 +1801,13 @@ static void perf_output_simple(struct perf_counter *counter, header.size += sizeof(tid_entry); } - if (counter->hw_event.callchain) { + if (record_type & PERF_RECORD_GROUP) { + header.type |= __PERF_EVENT_GROUP; + header.size += sizeof(u64) + + counter->nr_siblings * sizeof(group_entry); + } + + if (record_type & PERF_RECORD_CALLCHAIN) { callchain = perf_callchain(regs); if (callchain) { @@ -1810,69 +1823,35 @@ static void perf_output_simple(struct perf_counter *counter, return; perf_output_put(&handle, header); - perf_output_put(&handle, ip); - if (counter->hw_event.include_tid) - perf_output_put(&handle, tid_entry); + if (record_type & PERF_RECORD_IP) + perf_output_put(&handle, ip); - if (callchain) - perf_output_copy(&handle, callchain, callchain_size); - - perf_output_end(&handle); -} - -static void perf_output_group(struct perf_counter *counter, int nmi) -{ - struct perf_output_handle handle; - struct perf_event_header header; - struct perf_counter *leader, *sub; - unsigned int size; - struct { - u64 event; - u64 counter; - } entry; - int ret; - - size = sizeof(header) + counter->nr_siblings * sizeof(entry); + if (record_type & PERF_RECORD_TID) + perf_output_put(&handle, tid_entry); - ret = perf_output_begin(&handle, counter, size, nmi); - if (ret) - return; + if (record_type & PERF_RECORD_GROUP) { + struct perf_counter *leader, *sub; + u64 nr = counter->nr_siblings; - header.type = PERF_EVENT_GROUP; - header.size = size; + perf_output_put(&handle, nr); - perf_output_put(&handle, header); + leader = counter->group_leader; + list_for_each_entry(sub, &leader->sibling_list, list_entry) { + if (sub != counter) + sub->hw_ops->read(sub); - leader = counter->group_leader; - list_for_each_entry(sub, &leader->sibling_list, list_entry) { - if (sub != counter) - sub->hw_ops->read(sub); + group_entry.event = sub->hw_event.config; + group_entry.counter = atomic64_read(&sub->count); - entry.event = sub->hw_event.config; - entry.counter = atomic64_read(&sub->count); - - perf_output_put(&handle, entry); + perf_output_put(&handle, group_entry); + } } - perf_output_end(&handle); -} - -void perf_counter_output(struct perf_counter *counter, - int nmi, struct pt_regs *regs) -{ - switch (counter->hw_event.record_type) { - case PERF_RECORD_SIMPLE: - return; - - case PERF_RECORD_IRQ: - perf_output_simple(counter, nmi, regs); - break; + if (callchain) + perf_output_copy(&handle, callchain, callchain_size); - case PERF_RECORD_GROUP: - perf_output_group(counter, nmi); - break; - } + perf_output_end(&handle); } /* -- cgit v1.2.3-70-g09d2 From c457810ab4a825161aec6ef71b581e1bc8febd1a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 2 Apr 2009 11:12:01 +0200 Subject: perf_counter: per event wakeups By request, provide a way to request a wakeup every 'n' events instead of every page of output. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Orig-LKML-Reference: <20090402091319.323309784@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 3 ++- kernel/perf_counter.c | 10 +++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 06a6fba9f53..5428ba120d7 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -145,7 +145,7 @@ struct perf_counter_hw_event { __reserved_1 : 53; __u32 extra_config_len; - __u32 __reserved_4; + __u32 wakeup_events; /* wakeup every n events */ __u64 __reserved_2; __u64 __reserved_3; @@ -321,6 +321,7 @@ struct perf_mmap_data { int nr_pages; atomic_t wakeup; atomic_t head; + atomic_t events; struct perf_counter_mmap_page *user_page; void *data_pages[0]; }; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 995063df910..9bcab10e735 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1760,7 +1760,15 @@ static void perf_output_copy(struct perf_output_handle *handle, static void perf_output_end(struct perf_output_handle *handle) { - if (handle->wakeup) + int wakeup_events = handle->counter->hw_event.wakeup_events; + + if (wakeup_events) { + int events = atomic_inc_return(&handle->data->events); + if (events >= wakeup_events) { + atomic_sub(wakeup_events, &handle->data->events); + __perf_output_wakeup(handle); + } + } else if (handle->wakeup) __perf_output_wakeup(handle); rcu_read_unlock(); } -- cgit v1.2.3-70-g09d2 From 5872bdb88a35fae7d224bd6b21e5f377e854ccfc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 2 Apr 2009 11:12:03 +0200 Subject: perf_counter: add more context information Put in counts to tell which ips belong to what context. ----- | | hv | -- nr | | kernel | -- | | user ----- Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Orig-LKML-Reference: <20090402091319.493101305@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 9 +++++++++ include/linux/perf_counter.h | 4 ++-- kernel/perf_counter.c | 2 +- 3 files changed, 12 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 2a946a160ca..c74e20d593a 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -1088,6 +1088,7 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) { unsigned long bp; char *stack; + int nr = entry->nr; callchain_store(entry, instruction_pointer(regs)); @@ -1099,6 +1100,8 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) #endif dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry); + + entry->kernel = entry->nr - nr; } @@ -1128,6 +1131,7 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) { struct stack_frame frame; const void __user *fp; + int nr = entry->nr; regs = (struct pt_regs *)current->thread.sp0 - 1; fp = (void __user *)regs->bp; @@ -1147,6 +1151,8 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) callchain_store(entry, frame.return_address); fp = frame.next_fp; } + + entry->user = entry->nr - nr; } static void @@ -1182,6 +1188,9 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) entry = &__get_cpu_var(irq_entry); entry->nr = 0; + entry->hv = 0; + entry->kernel = 0; + entry->user = 0; perf_do_callchain(regs, entry); diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 5428ba120d7..90cce0c74a0 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -513,10 +513,10 @@ extern void perf_counter_mmap(unsigned long addr, unsigned long len, extern void perf_counter_munmap(unsigned long addr, unsigned long len, unsigned long pgoff, struct file *file); -#define MAX_STACK_DEPTH 255 +#define MAX_STACK_DEPTH 254 struct perf_callchain_entry { - u64 nr; + u32 nr, hv, kernel, user; u64 ip[MAX_STACK_DEPTH]; }; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 9bcab10e735..f105a6e696c 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1819,7 +1819,7 @@ void perf_counter_output(struct perf_counter *counter, callchain = perf_callchain(regs); if (callchain) { - callchain_size = (1 + callchain->nr) * sizeof(u64); + callchain_size = (2 + callchain->nr) * sizeof(u64); header.type |= __PERF_EVENT_CALLCHAIN; header.size += callchain_size; -- cgit v1.2.3-70-g09d2 From 92f22a3865abe87eea2609a6f8e5be5123f7ce4f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 2 Apr 2009 11:12:04 +0200 Subject: perf_counter: update mmap() counter read Paul noted that we don't need SMP barriers for the mmap() counter read because its always on the same cpu (otherwise you can't access the hw counter anyway). So remove the SMP barriers and replace them with regular compiler barriers. Further, update the comment to include a race free method of reading said hardware counter. The primary change is putting the pmc_read inside the seq-loop, otherwise we can still race and read rubbish. Noticed-by: Paul Mackerras Signed-off-by: Peter Zijlstra Cc: Corey Ashford Orig-LKML-Reference: <20090402091319.577951445@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 22 ++++++++++------------ kernel/perf_counter.c | 4 ++-- 2 files changed, 12 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 90cce0c74a0..f2b914de3f0 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -167,30 +167,28 @@ struct perf_counter_mmap_page { /* * Bits needed to read the hw counters in user-space. * - * The index and offset should be read atomically using the seqlock: - * - * __u32 seq, index; - * __s64 offset; + * u32 seq; + * s64 count; * * again: - * rmb(); * seq = pc->lock; - * * if (unlikely(seq & 1)) { * cpu_relax(); * goto again; * } * - * index = pc->index; - * offset = pc->offset; + * if (pc->index) { + * count = pmc_read(pc->index - 1); + * count += pc->offset; + * } else + * goto regular_read; * - * rmb(); + * barrier(); * if (pc->lock != seq) * goto again; * - * After this, index contains architecture specific counter index + 1, - * so that 0 means unavailable, offset contains the value to be added - * to the result of the raw timer read to obtain this counter's value. + * NOTE: for obvious reason this only works on self-monitoring + * processes. */ __u32 lock; /* seqlock for synchronization */ __u32 index; /* hardware counter identifier */ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index f105a6e696c..2a5d4f52556 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1340,13 +1340,13 @@ void perf_counter_update_userpage(struct perf_counter *counter) */ preempt_disable(); ++userpg->lock; - smp_wmb(); + barrier(); userpg->index = counter->hw.idx; userpg->offset = atomic64_read(&counter->count); if (counter->state == PERF_COUNTER_STATE_ACTIVE) userpg->offset -= atomic64_read(&counter->hw.prev_count); - smp_wmb(); + barrier(); ++userpg->lock; preempt_enable(); unlock: -- cgit v1.2.3-70-g09d2 From 52400ba946759af28442dee6265c5c0180ac7122 Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Fri, 3 Apr 2009 13:40:49 -0700 Subject: futex: add requeue_pi functionality PI Futexes and their underlying rt_mutex cannot be left ownerless if there are pending waiters as this will break the PI boosting logic, so the standard requeue commands aren't sufficient. The new commands properly manage pi futex ownership by ensuring a futex with waiters has an owner at all times. This will allow glibc to properly handle pi mutexes with pthread_condvars. The approach taken here is to create two new futex op codes: FUTEX_WAIT_REQUEUE_PI: Tasks will use this op code to wait on a futex (such as a non-pi waitqueue) and wake after they have been requeued to a pi futex. Prior to returning to userspace, they will acquire this pi futex (and the underlying rt_mutex). futex_wait_requeue_pi() is the result of a high speed collision between futex_wait() and futex_lock_pi() (with the first part of futex_lock_pi() being done by futex_proxy_trylock_atomic() on behalf of the top_waiter). FUTEX_REQUEUE_PI (and FUTEX_CMP_REQUEUE_PI): This call must be used to wake tasks waiting with FUTEX_WAIT_REQUEUE_PI, regardless of how many tasks the caller intends to wake or requeue. pthread_cond_broadcast() should call this with nr_wake=1 and nr_requeue=INT_MAX. pthread_cond_signal() should call this with nr_wake=1 and nr_requeue=0. The reason being we need both callers to get the benefit of the futex_proxy_trylock_atomic() routine. futex_requeue() also enqueues the top_waiter on the rt_mutex via rt_mutex_start_proxy_lock(). Signed-off-by: Darren Hart Reviewed-by: Thomas Gleixner Signed-off-by: Thomas Gleixner --- include/linux/futex.h | 8 + include/linux/thread_info.h | 3 +- kernel/futex.c | 519 ++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 510 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/futex.h b/include/linux/futex.h index 3bf5bb5a34f..b05519ca9e5 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -23,6 +23,9 @@ union ktime; #define FUTEX_TRYLOCK_PI 8 #define FUTEX_WAIT_BITSET 9 #define FUTEX_WAKE_BITSET 10 +#define FUTEX_WAIT_REQUEUE_PI 11 +#define FUTEX_REQUEUE_PI 12 +#define FUTEX_CMP_REQUEUE_PI 13 #define FUTEX_PRIVATE_FLAG 128 #define FUTEX_CLOCK_REALTIME 256 @@ -38,6 +41,11 @@ union ktime; #define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG) #define FUTEX_WAIT_BITSET_PRIVATE (FUTEX_WAIT_BITS | FUTEX_PRIVATE_FLAG) #define FUTEX_WAKE_BITSET_PRIVATE (FUTEX_WAKE_BITS | FUTEX_PRIVATE_FLAG) +#define FUTEX_WAIT_REQUEUE_PI_PRIVATE (FUTEX_WAIT_REQUEUE_PI | \ + FUTEX_PRIVATE_FLAG) +#define FUTEX_REQUEUE_PI_PRIVATE (FUTEX_REQUEUE_PI | FUTEX_PRIVATE_FLAG) +#define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \ + FUTEX_PRIVATE_FLAG) /* * Support for robust futexes: the kernel cleans up held futexes at diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index e6b820f8b56..a8cc4e13434 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -21,13 +21,14 @@ struct restart_block { struct { unsigned long arg0, arg1, arg2, arg3; }; - /* For futex_wait */ + /* For futex_wait and futex_wait_requeue_pi */ struct { u32 *uaddr; u32 val; u32 flags; u32 bitset; u64 time; + u32 *uaddr2; } futex; /* For nanosleep */ struct { diff --git a/kernel/futex.c b/kernel/futex.c index dbe857aa438..185c981d89e 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -19,6 +19,10 @@ * PRIVATE futexes by Eric Dumazet * Copyright (C) 2007 Eric Dumazet * + * Requeue-PI support by Darren Hart + * Copyright (C) IBM Corporation, 2009 + * Thanks to Thomas Gleixner for conceptual design and careful reviews. + * * Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly * enough at me, Linus for the original (flawed) idea, Matthew * Kirkwood for proof-of-concept implementation. @@ -109,6 +113,9 @@ struct futex_q { struct futex_pi_state *pi_state; struct task_struct *task; + /* rt_waiter storage for requeue_pi: */ + struct rt_mutex_waiter *rt_waiter; + /* Bitset for the optional bitmasked wakeup */ u32 bitset; }; @@ -827,7 +834,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset) plist_for_each_entry_safe(this, next, head, list) { if (match_futex (&this->key, &key)) { - if (this->pi_state) { + if (this->pi_state || this->rt_waiter) { ret = -EINVAL; break; } @@ -968,20 +975,138 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1, q->key = *key2; } -/* - * Requeue all waiters hashed on one physical page to another - * physical page. +/** + * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue + * q: the futex_q + * key: the key of the requeue target futex + * + * During futex_requeue, with requeue_pi=1, it is possible to acquire the + * target futex if it is uncontended or via a lock steal. Set the futex_q key + * to the requeue target futex so the waiter can detect the wakeup on the right + * futex, but remove it from the hb and NULL the rt_waiter so it can detect + * atomic lock acquisition. Must be called with the q->lock_ptr held. + */ +static inline +void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key) +{ + drop_futex_key_refs(&q->key); + get_futex_key_refs(key); + q->key = *key; + + WARN_ON(plist_node_empty(&q->list)); + plist_del(&q->list, &q->list.plist); + + WARN_ON(!q->rt_waiter); + q->rt_waiter = NULL; + + wake_up(&q->waiter); +} + +/** + * futex_proxy_trylock_atomic() - Attempt an atomic lock for the top waiter + * @pifutex: the user address of the to futex + * @hb1: the from futex hash bucket, must be locked by the caller + * @hb2: the to futex hash bucket, must be locked by the caller + * @key1: the from futex key + * @key2: the to futex key + * + * Try and get the lock on behalf of the top waiter if we can do it atomically. + * Wake the top waiter if we succeed. hb1 and hb2 must be held by the caller. + * + * Returns: + * 0 - failed to acquire the lock atomicly + * 1 - acquired the lock + * <0 - error + */ +static int futex_proxy_trylock_atomic(u32 __user *pifutex, + struct futex_hash_bucket *hb1, + struct futex_hash_bucket *hb2, + union futex_key *key1, union futex_key *key2, + struct futex_pi_state **ps) +{ + struct futex_q *top_waiter; + u32 curval; + int ret; + + if (get_futex_value_locked(&curval, pifutex)) + return -EFAULT; + + top_waiter = futex_top_waiter(hb1, key1); + + /* There are no waiters, nothing for us to do. */ + if (!top_waiter) + return 0; + + /* + * Either take the lock for top_waiter or set the FUTEX_WAITERS bit. + * The pi_state is returned in ps in contended cases. + */ + ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task); + if (ret == 1) + requeue_pi_wake_futex(top_waiter, key2); + + return ret; +} + +/** + * futex_requeue() - Requeue waiters from uaddr1 to uaddr2 + * uaddr1: source futex user address + * uaddr2: target futex user address + * nr_wake: number of waiters to wake (must be 1 for requeue_pi) + * nr_requeue: number of waiters to requeue (0-INT_MAX) + * requeue_pi: if we are attempting to requeue from a non-pi futex to a + * pi futex (pi to pi requeue is not supported) + * + * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire + * uaddr2 atomically on behalf of the top waiter. + * + * Returns: + * >=0 - on success, the number of tasks requeued or woken + * <0 - on error */ static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, - int nr_wake, int nr_requeue, u32 *cmpval) + int nr_wake, int nr_requeue, u32 *cmpval, + int requeue_pi) { union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; + int drop_count = 0, task_count = 0, ret; + struct futex_pi_state *pi_state = NULL; struct futex_hash_bucket *hb1, *hb2; struct plist_head *head1; struct futex_q *this, *next; - int ret, drop_count = 0; + u32 curval2; + + if (requeue_pi) { + /* + * requeue_pi requires a pi_state, try to allocate it now + * without any locks in case it fails. + */ + if (refill_pi_state_cache()) + return -ENOMEM; + /* + * requeue_pi must wake as many tasks as it can, up to nr_wake + * + nr_requeue, since it acquires the rt_mutex prior to + * returning to userspace, so as to not leave the rt_mutex with + * waiters and no owner. However, second and third wake-ups + * cannot be predicted as they involve race conditions with the + * first wake and a fault while looking up the pi_state. Both + * pthread_cond_signal() and pthread_cond_broadcast() should + * use nr_wake=1. + */ + if (nr_wake != 1) + return -EINVAL; + } retry: + if (pi_state != NULL) { + /* + * We will have to lookup the pi_state again, so free this one + * to keep the accounting correct. + */ + free_pi_state(pi_state); + pi_state = NULL; + } + ret = get_futex_key(uaddr1, fshared, &key1); if (unlikely(ret != 0)) goto out; @@ -1020,19 +1145,94 @@ retry_private: } } + if (requeue_pi && (task_count - nr_wake < nr_requeue)) { + /* Attempt to acquire uaddr2 and wake the top_waiter. */ + ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1, + &key2, &pi_state); + + /* + * At this point the top_waiter has either taken uaddr2 or is + * waiting on it. If the former, then the pi_state will not + * exist yet, look it up one more time to ensure we have a + * reference to it. + */ + if (ret == 1) { + WARN_ON(pi_state); + task_count++; + ret = get_futex_value_locked(&curval2, uaddr2); + if (!ret) + ret = lookup_pi_state(curval2, hb2, &key2, + &pi_state); + } + + switch (ret) { + case 0: + break; + case -EFAULT: + double_unlock_hb(hb1, hb2); + put_futex_key(fshared, &key2); + put_futex_key(fshared, &key1); + ret = get_user(curval2, uaddr2); + if (!ret) + goto retry; + goto out; + case -EAGAIN: + /* The owner was exiting, try again. */ + double_unlock_hb(hb1, hb2); + put_futex_key(fshared, &key2); + put_futex_key(fshared, &key1); + cond_resched(); + goto retry; + default: + goto out_unlock; + } + } + head1 = &hb1->chain; plist_for_each_entry_safe(this, next, head1, list) { - if (!match_futex (&this->key, &key1)) + if (task_count - nr_wake >= nr_requeue) + break; + + if (!match_futex(&this->key, &key1)) continue; - if (++ret <= nr_wake) { + + WARN_ON(!requeue_pi && this->rt_waiter); + WARN_ON(requeue_pi && !this->rt_waiter); + + /* + * Wake nr_wake waiters. For requeue_pi, if we acquired the + * lock, we already woke the top_waiter. If not, it will be + * woken by futex_unlock_pi(). + */ + if (++task_count <= nr_wake && !requeue_pi) { wake_futex(this); - } else { - requeue_futex(this, hb1, hb2, &key2); - drop_count++; + continue; + } - if (ret - nr_wake >= nr_requeue) - break; + /* + * Requeue nr_requeue waiters and possibly one more in the case + * of requeue_pi if we couldn't acquire the lock atomically. + */ + if (requeue_pi) { + /* Prepare the waiter to take the rt_mutex. */ + atomic_inc(&pi_state->refcount); + this->pi_state = pi_state; + ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex, + this->rt_waiter, + this->task, 1); + if (ret == 1) { + /* We got the lock. */ + requeue_pi_wake_futex(this, &key2); + continue; + } else if (ret) { + /* -EDEADLK */ + this->pi_state = NULL; + free_pi_state(pi_state); + goto out_unlock; + } } + requeue_futex(this, hb1, hb2, &key2); + drop_count++; } out_unlock: @@ -1047,7 +1247,9 @@ out_put_keys: out_put_key1: put_futex_key(fshared, &key1); out: - return ret; + if (pi_state != NULL) + free_pi_state(pi_state); + return ret ? ret : task_count; } /* The key must be already stored in q->key. */ @@ -1270,6 +1472,7 @@ handle_fault: #define FLAGS_HAS_TIMEOUT 0x04 static long futex_wait_restart(struct restart_block *restart); +static long futex_lock_pi_restart(struct restart_block *restart); /** * fixup_owner() - Post lock pi_state and corner case management @@ -1489,6 +1692,7 @@ static int futex_wait(u32 __user *uaddr, int fshared, q.pi_state = NULL; q.bitset = bitset; + q.rt_waiter = NULL; if (abs_time) { to = &timeout; @@ -1596,6 +1800,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, } q.pi_state = NULL; + q.rt_waiter = NULL; retry: q.key = FUTEX_KEY_INIT; ret = get_futex_key(uaddr, fshared, &q.key); @@ -1701,6 +1906,20 @@ uaddr_faulted: goto retry; } +static long futex_lock_pi_restart(struct restart_block *restart) +{ + u32 __user *uaddr = (u32 __user *)restart->futex.uaddr; + ktime_t t, *tp = NULL; + int fshared = restart->futex.flags & FLAGS_SHARED; + + if (restart->futex.flags & FLAGS_HAS_TIMEOUT) { + t.tv64 = restart->futex.time; + tp = &t; + } + restart->fn = do_no_restart_syscall; + + return (long)futex_lock_pi(uaddr, fshared, restart->futex.val, tp, 0); +} /* * Userspace attempted a TID -> 0 atomic transition, and failed. @@ -1803,6 +2022,253 @@ pi_faulted: return ret; } +/** + * handle_early_requeue_pi_wakeup() - Detect early wakeup on the initial futex + * @hb: the hash_bucket futex_q was original enqueued on + * @q: the futex_q woken while waiting to be requeued + * @key2: the futex_key of the requeue target futex + * @timeout: the timeout associated with the wait (NULL if none) + * + * Detect if the task was woken on the initial futex as opposed to the requeue + * target futex. If so, determine if it was a timeout or a signal that caused + * the wakeup and return the appropriate error code to the caller. Must be + * called with the hb lock held. + * + * Returns + * 0 - no early wakeup detected + * <0 - -ETIMEDOUT or -ERESTARTSYS (FIXME: or ERESTARTNOINTR?) + */ +static inline +int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, + struct futex_q *q, union futex_key *key2, + struct hrtimer_sleeper *timeout) +{ + int ret = 0; + + /* + * With the hb lock held, we avoid races while we process the wakeup. + * We only need to hold hb (and not hb2) to ensure atomicity as the + * wakeup code can't change q.key from uaddr to uaddr2 if we hold hb. + * It can't be requeued from uaddr2 to something else since we don't + * support a PI aware source futex for requeue. + */ + if (!match_futex(&q->key, key2)) { + WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr)); + /* + * We were woken prior to requeue by a timeout or a signal. + * Unqueue the futex_q and determine which it was. + */ + plist_del(&q->list, &q->list.plist); + drop_futex_key_refs(&q->key); + + if (timeout && !timeout->task) + ret = -ETIMEDOUT; + else { + /* + * We expect signal_pending(current), but another + * thread may have handled it for us already. + */ + /* FIXME: ERESTARTSYS or ERESTARTNOINTR? Do we care if + * the user specified SA_RESTART or not? */ + ret = -ERESTARTSYS; + } + } + return ret; +} + +/** + * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2 + * @uaddr: the futex we initialyl wait on (non-pi) + * @fshared: whether the futexes are shared (1) or not (0). They must be + * the same type, no requeueing from private to shared, etc. + * @val: the expected value of uaddr + * @abs_time: absolute timeout + * @bitset: 32 bit wakeup bitset set by userspace, defaults to all. + * @clockrt: whether to use CLOCK_REALTIME (1) or CLOCK_MONOTONIC (0) + * @uaddr2: the pi futex we will take prior to returning to user-space + * + * The caller will wait on uaddr and will be requeued by futex_requeue() to + * uaddr2 which must be PI aware. Normal wakeup will wake on uaddr2 and + * complete the acquisition of the rt_mutex prior to returning to userspace. + * This ensures the rt_mutex maintains an owner when it has waiters; without + * one, the pi logic wouldn't know which task to boost/deboost, if there was a + * need to. + * + * We call schedule in futex_wait_queue_me() when we enqueue and return there + * via the following: + * 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue() + * 2) wakeup on uaddr2 after a requeue and subsequent unlock + * 3) signal (before or after requeue) + * 4) timeout (before or after requeue) + * + * If 3, we setup a restart_block with futex_wait_requeue_pi() as the function. + * + * If 2, we may then block on trying to take the rt_mutex and return via: + * 5) successful lock + * 6) signal + * 7) timeout + * 8) other lock acquisition failure + * + * If 6, we setup a restart_block with futex_lock_pi() as the function. + * + * If 4 or 7, we cleanup and return with -ETIMEDOUT. + * + * Returns: + * 0 - On success + * <0 - On error + */ +static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, + u32 val, ktime_t *abs_time, u32 bitset, + int clockrt, u32 __user *uaddr2) +{ + struct hrtimer_sleeper timeout, *to = NULL; + struct rt_mutex_waiter rt_waiter; + struct rt_mutex *pi_mutex = NULL; + DECLARE_WAITQUEUE(wait, current); + struct restart_block *restart; + struct futex_hash_bucket *hb; + union futex_key key2; + struct futex_q q; + int res, ret; + u32 uval; + + if (!bitset) + return -EINVAL; + + if (abs_time) { + to = &timeout; + hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME : + CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + hrtimer_init_sleeper(to, current); + hrtimer_set_expires_range_ns(&to->timer, *abs_time, + current->timer_slack_ns); + } + + /* + * The waiter is allocated on our stack, manipulated by the requeue + * code while we sleep on uaddr. + */ + debug_rt_mutex_init_waiter(&rt_waiter); + rt_waiter.task = NULL; + + q.pi_state = NULL; + q.bitset = bitset; + q.rt_waiter = &rt_waiter; + + key2 = FUTEX_KEY_INIT; + ret = get_futex_key(uaddr2, fshared, &key2); + if (unlikely(ret != 0)) + goto out; + + /* Prepare to wait on uaddr. */ + ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); + if (ret) { + put_futex_key(fshared, &key2); + goto out; + } + + /* Queue the futex_q, drop the hb lock, wait for wakeup. */ + futex_wait_queue_me(hb, &q, to, &wait); + + spin_lock(&hb->lock); + ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to); + spin_unlock(&hb->lock); + if (ret) + goto out_put_keys; + + /* + * In order for us to be here, we know our q.key == key2, and since + * we took the hb->lock above, we also know that futex_requeue() has + * completed and we no longer have to concern ourselves with a wakeup + * race with the atomic proxy lock acquition by the requeue code. + */ + + /* Check if the requeue code acquired the second futex for us. */ + if (!q.rt_waiter) { + /* + * Got the lock. We might not be the anticipated owner if we + * did a lock-steal - fix up the PI-state in that case. + */ + if (q.pi_state && (q.pi_state->owner != current)) { + spin_lock(q.lock_ptr); + ret = fixup_pi_state_owner(uaddr2, &q, current, + fshared); + spin_unlock(q.lock_ptr); + } + } else { + /* + * We have been woken up by futex_unlock_pi(), a timeout, or a + * signal. futex_unlock_pi() will not destroy the lock_ptr nor + * the pi_state. + */ + WARN_ON(!&q.pi_state); + pi_mutex = &q.pi_state->pi_mutex; + ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1); + debug_rt_mutex_free_waiter(&rt_waiter); + + spin_lock(q.lock_ptr); + /* + * Fixup the pi_state owner and possibly acquire the lock if we + * haven't already. + */ + res = fixup_owner(uaddr2, fshared, &q, !ret); + /* + * If fixup_owner() returned an error, proprogate that. If it + * acquired the lock, clear our -ETIMEDOUT or -EINTR. + */ + if (res) + ret = (res < 0) ? res : 0; + + /* Unqueue and drop the lock. */ + unqueue_me_pi(&q); + } + + /* + * If fixup_pi_state_owner() faulted and was unable to handle the + * fault, unlock the rt_mutex and return the fault to userspace. + */ + if (ret == -EFAULT) { + if (rt_mutex_owner(pi_mutex) == current) + rt_mutex_unlock(pi_mutex); + } else if (ret == -EINTR) { + ret = -EFAULT; + if (get_user(uval, uaddr2)) + goto out_put_keys; + + /* + * We've already been requeued, so restart by calling + * futex_lock_pi() directly, rather then returning to this + * function. + */ + ret = -ERESTART_RESTARTBLOCK; + restart = ¤t_thread_info()->restart_block; + restart->fn = futex_lock_pi_restart; + restart->futex.uaddr = (u32 *)uaddr2; + restart->futex.val = uval; + restart->futex.flags = 0; + if (abs_time) { + restart->futex.flags |= FLAGS_HAS_TIMEOUT; + restart->futex.time = abs_time->tv64; + } + + if (fshared) + restart->futex.flags |= FLAGS_SHARED; + if (clockrt) + restart->futex.flags |= FLAGS_CLOCKRT; + } + +out_put_keys: + put_futex_key(fshared, &q.key); + put_futex_key(fshared, &key2); + +out: + if (to) { + hrtimer_cancel(&to->timer); + destroy_hrtimer_on_stack(&to->timer); + } + return ret; +} + /* * Support for robust futexes: the kernel cleans up held futexes at * thread exit time. @@ -2025,7 +2491,7 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, fshared = 1; clockrt = op & FUTEX_CLOCK_REALTIME; - if (clockrt && cmd != FUTEX_WAIT_BITSET) + if (clockrt && cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI) return -ENOSYS; switch (cmd) { @@ -2040,10 +2506,11 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, ret = futex_wake(uaddr, fshared, val, val3); break; case FUTEX_REQUEUE: - ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL); + ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0); break; case FUTEX_CMP_REQUEUE: - ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3); + ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, + 0); break; case FUTEX_WAKE_OP: ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3); @@ -2060,6 +2527,18 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, if (futex_cmpxchg_enabled) ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1); break; + case FUTEX_WAIT_REQUEUE_PI: + val3 = FUTEX_BITSET_MATCH_ANY; + ret = futex_wait_requeue_pi(uaddr, fshared, val, timeout, val3, + clockrt, uaddr2); + break; + case FUTEX_REQUEUE_PI: + ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 1); + break; + case FUTEX_CMP_REQUEUE_PI: + ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, + 1); + break; default: ret = -ENOSYS; } @@ -2077,7 +2556,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, int cmd = op & FUTEX_CMD_MASK; if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || - cmd == FUTEX_WAIT_BITSET)) { + cmd == FUTEX_WAIT_BITSET || + cmd == FUTEX_WAIT_REQUEUE_PI)) { if (copy_from_user(&ts, utime, sizeof(ts)) != 0) return -EFAULT; if (!timespec_valid(&ts)) @@ -2089,10 +2569,11 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, tp = &t; } /* - * requeue parameter in 'utime' if cmd == FUTEX_REQUEUE. + * requeue parameter in 'utime' if cmd == FUTEX_*_REQUEUE_*. * number of waiters to wake in 'utime' if cmd == FUTEX_WAKE_OP. */ if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE || + cmd == FUTEX_REQUEUE_PI || cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP) val2 = (u32) (unsigned long) utime; -- cgit v1.2.3-70-g09d2 From a2e87d06ddbe6e6fdb8d6d2e5e985efe4efb07dd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:44:59 +0200 Subject: perf_counter: update mmap() counter read, take 2 Update the userspace read method. Paul noted that: - userspace cannot observe ->lock & 1 on the same cpu. - we need a barrier() between reading ->lock and ->index to ensure we read them in that prticular order. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094517.368446033@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index f2b914de3f0..e22ab47a2f4 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -170,22 +170,18 @@ struct perf_counter_mmap_page { * u32 seq; * s64 count; * - * again: - * seq = pc->lock; - * if (unlikely(seq & 1)) { - * cpu_relax(); - * goto again; - * } + * do { + * seq = pc->lock; * - * if (pc->index) { - * count = pmc_read(pc->index - 1); - * count += pc->offset; - * } else - * goto regular_read; + * barrier() + * if (pc->index) { + * count = pmc_read(pc->index - 1); + * count += pc->offset; + * } else + * goto regular_read; * - * barrier(); - * if (pc->lock != seq) - * goto again; + * barrier(); + * } while (pc->lock != seq); * * NOTE: for obvious reason this only works on self-monitoring * processes. -- cgit v1.2.3-70-g09d2 From 9c03d88e328d5f28f13191622c2ea1349c36b799 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:00 +0200 Subject: perf_counter: add more context information Change the callchain context entries to u16, so as to gain some space. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094517.457320003@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 4 ++-- kernel/perf_counter.c | 6 ++---- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index e22ab47a2f4..f9d5cf0bfbd 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -507,10 +507,10 @@ extern void perf_counter_mmap(unsigned long addr, unsigned long len, extern void perf_counter_munmap(unsigned long addr, unsigned long len, unsigned long pgoff, struct file *file); -#define MAX_STACK_DEPTH 254 +#define MAX_STACK_DEPTH 255 struct perf_callchain_entry { - u32 nr, hv, kernel, user; + u16 nr, hv, kernel, user; u64 ip[MAX_STACK_DEPTH]; }; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 2a5d4f52556..727624db507 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1657,9 +1657,7 @@ void perf_counter_do_pending(void) * Callchain support -- arch specific */ -struct perf_callchain_entry * -__attribute__((weak)) -perf_callchain(struct pt_regs *regs) +__weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) { return NULL; } @@ -1819,7 +1817,7 @@ void perf_counter_output(struct perf_counter *counter, callchain = perf_callchain(regs); if (callchain) { - callchain_size = (2 + callchain->nr) * sizeof(u64); + callchain_size = (1 + callchain->nr) * sizeof(u64); header.type |= __PERF_EVENT_CALLCHAIN; header.size += callchain_size; -- cgit v1.2.3-70-g09d2 From 3c446b3d3b38f991f97e9d2df0ad26a60a94dcff Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:01 +0200 Subject: perf_counter: SIGIO support Provide support for fcntl() I/O availability signals. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094517.579788800@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 ++ kernel/perf_counter.c | 20 +++++++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index f9d5cf0bfbd..8d5d11b8d01 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -238,6 +238,7 @@ enum perf_event_type { #include #include #include +#include #include struct task_struct; @@ -398,6 +399,7 @@ struct perf_counter { /* poll related */ wait_queue_head_t waitq; + struct fasync_struct *fasync; /* optional: for NMIs */ struct perf_wakeup_entry wakeup; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 727624db507..c58cc64319e 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1526,6 +1526,22 @@ out: return ret; } +static int perf_fasync(int fd, struct file *filp, int on) +{ + struct perf_counter *counter = filp->private_data; + struct inode *inode = filp->f_path.dentry->d_inode; + int retval; + + mutex_lock(&inode->i_mutex); + retval = fasync_helper(fd, filp, on, &counter->fasync); + mutex_unlock(&inode->i_mutex); + + if (retval < 0) + return retval; + + return 0; +} + static const struct file_operations perf_fops = { .release = perf_release, .read = perf_read, @@ -1533,6 +1549,7 @@ static const struct file_operations perf_fops = { .unlocked_ioctl = perf_ioctl, .compat_ioctl = perf_ioctl, .mmap = perf_mmap, + .fasync = perf_fasync, }; /* @@ -1549,7 +1566,7 @@ void perf_counter_wakeup(struct perf_counter *counter) rcu_read_lock(); data = rcu_dereference(counter->data); if (data) { - (void)atomic_xchg(&data->wakeup, POLL_IN); + atomic_set(&data->wakeup, POLL_IN); /* * Ensure all data writes are issued before updating the * user-space data head information. The matching rmb() @@ -1561,6 +1578,7 @@ void perf_counter_wakeup(struct perf_counter *counter) rcu_read_unlock(); wake_up_all(&counter->waitq); + kill_fasync(&counter->fasync, SIGIO, POLL_IN); } /* -- cgit v1.2.3-70-g09d2 From 671dec5daf3b3c43c5777be282f00120a44cf37f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:02 +0200 Subject: perf_counter: generalize pending infrastructure Prepare the pending infrastructure to do more than wakeups. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094517.634732847@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 7 +++--- kernel/perf_counter.c | 53 ++++++++++++++++++++++++++------------------ 2 files changed, 36 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 8d5d11b8d01..977fb15a53f 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -321,8 +321,9 @@ struct perf_mmap_data { void *data_pages[0]; }; -struct perf_wakeup_entry { - struct perf_wakeup_entry *next; +struct perf_pending_entry { + struct perf_pending_entry *next; + void (*func)(struct perf_pending_entry *); }; /** @@ -401,7 +402,7 @@ struct perf_counter { wait_queue_head_t waitq; struct fasync_struct *fasync; /* optional: for NMIs */ - struct perf_wakeup_entry wakeup; + struct perf_pending_entry pending; void (*destroy)(struct perf_counter *); struct rcu_head rcu_head; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index c58cc64319e..0a2ade2e4f1 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1581,6 +1581,14 @@ void perf_counter_wakeup(struct perf_counter *counter) kill_fasync(&counter->fasync, SIGIO, POLL_IN); } +static void perf_pending_wakeup(struct perf_pending_entry *entry) +{ + struct perf_counter *counter = container_of(entry, + struct perf_counter, pending); + + perf_counter_wakeup(counter); +} + /* * Pending wakeups * @@ -1590,45 +1598,47 @@ void perf_counter_wakeup(struct perf_counter *counter) * single linked list and use cmpxchg() to add entries lockless. */ -#define PENDING_TAIL ((struct perf_wakeup_entry *)-1UL) +#define PENDING_TAIL ((struct perf_pending_entry *)-1UL) -static DEFINE_PER_CPU(struct perf_wakeup_entry *, perf_wakeup_head) = { +static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = { PENDING_TAIL, }; -static void perf_pending_queue(struct perf_counter *counter) +static void perf_pending_queue(struct perf_pending_entry *entry, + void (*func)(struct perf_pending_entry *)) { - struct perf_wakeup_entry **head; - struct perf_wakeup_entry *prev, *next; + struct perf_pending_entry **head; - if (cmpxchg(&counter->wakeup.next, NULL, PENDING_TAIL) != NULL) + if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL) return; - head = &get_cpu_var(perf_wakeup_head); + entry->func = func; + + head = &get_cpu_var(perf_pending_head); do { - prev = counter->wakeup.next = *head; - next = &counter->wakeup; - } while (cmpxchg(head, prev, next) != prev); + entry->next = *head; + } while (cmpxchg(head, entry->next, entry) != entry->next); set_perf_counter_pending(); - put_cpu_var(perf_wakeup_head); + put_cpu_var(perf_pending_head); } static int __perf_pending_run(void) { - struct perf_wakeup_entry *list; + struct perf_pending_entry *list; int nr = 0; - list = xchg(&__get_cpu_var(perf_wakeup_head), PENDING_TAIL); + list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL); while (list != PENDING_TAIL) { - struct perf_counter *counter = container_of(list, - struct perf_counter, wakeup); + void (*func)(struct perf_pending_entry *); + struct perf_pending_entry *entry = list; list = list->next; - counter->wakeup.next = NULL; + func = entry->func; + entry->next = NULL; /* * Ensure we observe the unqueue before we issue the wakeup, * so that we won't be waiting forever. @@ -1636,7 +1646,7 @@ static int __perf_pending_run(void) */ smp_wmb(); - perf_counter_wakeup(counter); + func(entry); nr++; } @@ -1658,7 +1668,7 @@ static inline int perf_not_pending(struct perf_counter *counter) * so that we do not miss the wakeup. -- see perf_pending_handle() */ smp_rmb(); - return counter->wakeup.next == NULL; + return counter->pending.next == NULL; } static void perf_pending_sync(struct perf_counter *counter) @@ -1695,9 +1705,10 @@ struct perf_output_handle { static inline void __perf_output_wakeup(struct perf_output_handle *handle) { - if (handle->nmi) - perf_pending_queue(handle->counter); - else + if (handle->nmi) { + perf_pending_queue(&handle->counter->pending, + perf_pending_wakeup); + } else perf_counter_wakeup(handle->counter); } -- cgit v1.2.3-70-g09d2 From f6c7d5fe58b4846ee0cb4b98b6042489705eced4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:04 +0200 Subject: perf_counter: theres more to overflow than writing events Prepare for more generic overflow handling. The new perf_counter_overflow() method will handle the generic bits of the counter overflow, and can return a !0 return value, in which case the counter should be (soft) disabled, so that it won't count until it's properly disabled. XXX: do powerpc and swcounter Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094517.812109629@chello.nl> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 2 +- arch/x86/kernel/cpu/perf_counter.c | 3 ++- include/linux/perf_counter.h | 4 ++-- kernel/perf_counter.c | 29 +++++++++++++++++++++++------ 4 files changed, 28 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 0a4d14f279a..f88c35d0710 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -732,7 +732,7 @@ static void record_and_restart(struct perf_counter *counter, long val, * Finally record data if requested. */ if (record) - perf_counter_output(counter, 1, regs); + perf_counter_overflow(counter, 1, regs); } /* diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 438415866fe..1116a41bc7b 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -800,7 +800,8 @@ again: continue; perf_save_and_restart(counter); - perf_counter_output(counter, nmi, regs); + if (perf_counter_overflow(counter, nmi, regs)) + __pmc_generic_disable(counter, &counter->hw, bit); } hw_perf_ack_status(ack); diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 977fb15a53f..ca2d4df29e0 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -491,8 +491,8 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader, struct perf_counter_context *ctx, int cpu); extern void perf_counter_update_userpage(struct perf_counter *counter); -extern void perf_counter_output(struct perf_counter *counter, - int nmi, struct pt_regs *regs); +extern int perf_counter_overflow(struct perf_counter *counter, + int nmi, struct pt_regs *regs); /* * Return 1 for a software counter, 0 for a hardware counter */ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 0a2ade2e4f1..195e976eb07 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1800,8 +1800,8 @@ static void perf_output_end(struct perf_output_handle *handle) rcu_read_unlock(); } -void perf_counter_output(struct perf_counter *counter, - int nmi, struct pt_regs *regs) +static void perf_counter_output(struct perf_counter *counter, + int nmi, struct pt_regs *regs) { int ret; u64 record_type = counter->hw_event.record_type; @@ -2033,6 +2033,17 @@ void perf_counter_munmap(unsigned long addr, unsigned long len, perf_counter_mmap_event(&mmap_event); } +/* + * Generic counter overflow handling. + */ + +int perf_counter_overflow(struct perf_counter *counter, + int nmi, struct pt_regs *regs) +{ + perf_counter_output(counter, nmi, regs); + return 0; +} + /* * Generic software counter infrastructure */ @@ -2077,6 +2088,7 @@ static void perf_swcounter_set_period(struct perf_counter *counter) static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) { + enum hrtimer_restart ret = HRTIMER_RESTART; struct perf_counter *counter; struct pt_regs *regs; @@ -2092,12 +2104,14 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) !counter->hw_event.exclude_user) regs = task_pt_regs(current); - if (regs) - perf_counter_output(counter, 0, regs); + if (regs) { + if (perf_counter_overflow(counter, 0, regs)) + ret = HRTIMER_NORESTART; + } hrtimer_forward_now(hrtimer, ns_to_ktime(counter->hw.irq_period)); - return HRTIMER_RESTART; + return ret; } static void perf_swcounter_overflow(struct perf_counter *counter, @@ -2105,7 +2119,10 @@ static void perf_swcounter_overflow(struct perf_counter *counter, { perf_swcounter_update(counter); perf_swcounter_set_period(counter); - perf_counter_output(counter, nmi, regs); + if (perf_counter_overflow(counter, nmi, regs)) + /* soft-disable the counter */ + ; + } static int perf_swcounter_match(struct perf_counter *counter, -- cgit v1.2.3-70-g09d2 From 339f7c90b8a2f3aa2dd4267e79f797999e8a3c59 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:06 +0200 Subject: perf_counter: PERF_RECORD_TIME By popular request, provide means to log a timestamp along with the counter overflow event. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094518.024173282@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 ++ kernel/perf_counter.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index ca2d4df29e0..928a7fae096 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -102,6 +102,7 @@ enum perf_counter_record_format { PERF_RECORD_TID = 1U << 1, PERF_RECORD_GROUP = 1U << 2, PERF_RECORD_CALLCHAIN = 1U << 3, + PERF_RECORD_TIME = 1U << 4, }; /* @@ -221,6 +222,7 @@ enum perf_event_type { __PERF_EVENT_TID = PERF_RECORD_TID, __PERF_EVENT_GROUP = PERF_RECORD_GROUP, __PERF_EVENT_CALLCHAIN = PERF_RECORD_CALLCHAIN, + __PERF_EVENT_TIME = PERF_RECORD_TIME, }; #ifdef __KERNEL__ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index c841563de04..19990d1f021 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1826,6 +1826,7 @@ static void perf_counter_output(struct perf_counter *counter, } group_entry; struct perf_callchain_entry *callchain = NULL; int callchain_size = 0; + u64 time; header.type = PERF_EVENT_COUNTER_OVERFLOW; header.size = sizeof(header); @@ -1862,6 +1863,16 @@ static void perf_counter_output(struct perf_counter *counter, } } + if (record_type & PERF_RECORD_TIME) { + /* + * Maybe do better on x86 and provide cpu_clock_nmi() + */ + time = sched_clock(); + + header.type |= __PERF_EVENT_TIME; + header.size += sizeof(u64); + } + ret = perf_output_begin(&handle, counter, header.size, nmi); if (ret) return; @@ -1895,6 +1906,9 @@ static void perf_counter_output(struct perf_counter *counter, if (callchain) perf_output_copy(&handle, callchain, callchain_size); + if (record_type & PERF_RECORD_TIME) + perf_output_put(&handle, time); + perf_output_end(&handle); } -- cgit v1.2.3-70-g09d2 From 79f146415623fe74f39af67c0f6adc208939a410 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:07 +0200 Subject: perf_counter: counter overflow limit Provide means to auto-disable the counter after 'n' overflow events. Create the counter with hw_event.disabled = 1, and then issue an ioctl(fd, PREF_COUNTER_IOC_REFRESH, n); to set the limit and enable the counter. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094518.083139737@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 12 ++++++++--- kernel/perf_counter.c | 51 +++++++++++++++++++++++++++++++++++--------- 2 files changed, 50 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 928a7fae096..ef4dcbff75a 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -155,8 +155,9 @@ struct perf_counter_hw_event { /* * Ioctls that can be done on a perf counter fd: */ -#define PERF_COUNTER_IOC_ENABLE _IO('$', 0) -#define PERF_COUNTER_IOC_DISABLE _IO('$', 1) +#define PERF_COUNTER_IOC_ENABLE _IO ('$', 0) +#define PERF_COUNTER_IOC_DISABLE _IO ('$', 1) +#define PERF_COUNTER_IOC_REFRESH _IOW('$', 2, u32) /* * Structure of the page that can be mapped via mmap @@ -403,9 +404,14 @@ struct perf_counter { /* poll related */ wait_queue_head_t waitq; struct fasync_struct *fasync; - /* optional: for NMIs */ + + /* delayed work for NMIs and such */ + int pending_wakeup; + int pending_disable; struct perf_pending_entry pending; + atomic_t event_limit; + void (*destroy)(struct perf_counter *); struct rcu_head rcu_head; #endif diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 19990d1f021..c05e10354bc 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -744,6 +744,12 @@ static void perf_counter_enable(struct perf_counter *counter) spin_unlock_irq(&ctx->lock); } +static void perf_counter_refresh(struct perf_counter *counter, int refresh) +{ + atomic_add(refresh, &counter->event_limit); + perf_counter_enable(counter); +} + /* * Enable a counter and all its children. */ @@ -1311,6 +1317,9 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case PERF_COUNTER_IOC_DISABLE: perf_counter_disable_family(counter); break; + case PERF_COUNTER_IOC_REFRESH: + perf_counter_refresh(counter, arg); + break; default: err = -ENOTTY; } @@ -1590,14 +1599,6 @@ void perf_counter_wakeup(struct perf_counter *counter) kill_fasync(&counter->fasync, SIGIO, POLL_IN); } -static void perf_pending_wakeup(struct perf_pending_entry *entry) -{ - struct perf_counter *counter = container_of(entry, - struct perf_counter, pending); - - perf_counter_wakeup(counter); -} - /* * Pending wakeups * @@ -1607,6 +1608,22 @@ static void perf_pending_wakeup(struct perf_pending_entry *entry) * single linked list and use cmpxchg() to add entries lockless. */ +static void perf_pending_counter(struct perf_pending_entry *entry) +{ + struct perf_counter *counter = container_of(entry, + struct perf_counter, pending); + + if (counter->pending_disable) { + counter->pending_disable = 0; + perf_counter_disable(counter); + } + + if (counter->pending_wakeup) { + counter->pending_wakeup = 0; + perf_counter_wakeup(counter); + } +} + #define PENDING_TAIL ((struct perf_pending_entry *)-1UL) static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = { @@ -1715,8 +1732,9 @@ struct perf_output_handle { static inline void __perf_output_wakeup(struct perf_output_handle *handle) { if (handle->nmi) { + handle->counter->pending_wakeup = 1; perf_pending_queue(&handle->counter->pending, - perf_pending_wakeup); + perf_pending_counter); } else perf_counter_wakeup(handle->counter); } @@ -2063,8 +2081,21 @@ void perf_counter_munmap(unsigned long addr, unsigned long len, int perf_counter_overflow(struct perf_counter *counter, int nmi, struct pt_regs *regs) { + int events = atomic_read(&counter->event_limit); + int ret = 0; + + if (events && atomic_dec_and_test(&counter->event_limit)) { + ret = 1; + if (nmi) { + counter->pending_disable = 1; + perf_pending_queue(&counter->pending, + perf_pending_counter); + } else + perf_counter_disable(counter); + } + perf_counter_output(counter, nmi, regs); - return 0; + return ret; } /* -- cgit v1.2.3-70-g09d2 From 0c593b3411341e3a05a61f5527df36ab02bd11e8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:08 +0200 Subject: perf_counter: comment the perf_event_type stuff Describe the event format. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094518.211174347@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index ef4dcbff75a..81220188d05 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -208,6 +208,20 @@ struct perf_event_header { enum perf_event_type { + /* + * The MMAP events record the PROT_EXEC mappings so that we can + * correlate userspace IPs to code. They have the following structure: + * + * struct { + * struct perf_event_header header; + * + * u32 pid, tid; + * u64 addr; + * u64 len; + * u64 pgoff; + * char filename[]; + * }; + */ PERF_EVENT_MMAP = 1, PERF_EVENT_MUNMAP = 2, @@ -217,6 +231,24 @@ enum perf_event_type { * * These events will have types of the form: * PERF_EVENT_COUNTER_OVERFLOW { | __PERF_EVENT_* } * + * + * struct { + * struct perf_event_header header; + * + * { u64 ip; } && __PERF_EVENT_IP + * { u32 pid, tid; } && __PERF_EVENT_TID + * + * { u64 nr; + * { u64 event, val; } cnt[nr]; } && __PERF_EVENT_GROUP + * + * { u16 nr, + * hv, + * kernel, + * user; + * u64 ips[nr]; } && __PERF_EVENT_CALLCHAIN + * + * { u64 time; } && __PERF_EVENT_TIME + * }; */ PERF_EVENT_COUNTER_OVERFLOW = 1UL << 31, __PERF_EVENT_IP = PERF_RECORD_IP, -- cgit v1.2.3-70-g09d2 From 4c9e25428ff46b968a30f1dfafdba550cb6e4141 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:09 +0200 Subject: perf_counter: change event definition Currently the definition of an event is slightly ambiguous. We have wakeup events, for poll() and SIGIO, which are either generated when a record crosses a page boundary (hw_events.wakeup_events == 0), or every wakeup_events new records. Now a record can be either a counter overflow record, or a number of different things, like the mmap PROT_EXEC region notifications. Then there is the PERF_COUNTER_IOC_REFRESH event limit, which only considers counter overflows. This patch changes then wakeup_events and SIGIO notification to only consider overflow events. Furthermore it changes the SIGIO notification to report SIGHUP when the event limit is reached and the counter will be disabled. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094518.266679874@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 1 + kernel/perf_counter.c | 22 +++++++++++++++------- 2 files changed, 16 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 81220188d05..0f5a4005048 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -439,6 +439,7 @@ struct perf_counter { /* delayed work for NMIs and such */ int pending_wakeup; + int pending_kill; int pending_disable; struct perf_pending_entry pending; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index c05e10354bc..8c8eaf0625f 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1596,7 +1596,11 @@ void perf_counter_wakeup(struct perf_counter *counter) rcu_read_unlock(); wake_up_all(&counter->waitq); - kill_fasync(&counter->fasync, SIGIO, POLL_IN); + + if (counter->pending_kill) { + kill_fasync(&counter->fasync, SIGIO, counter->pending_kill); + counter->pending_kill = 0; + } } /* @@ -1727,6 +1731,7 @@ struct perf_output_handle { unsigned int head; int wakeup; int nmi; + int overflow; }; static inline void __perf_output_wakeup(struct perf_output_handle *handle) @@ -1741,7 +1746,7 @@ static inline void __perf_output_wakeup(struct perf_output_handle *handle) static int perf_output_begin(struct perf_output_handle *handle, struct perf_counter *counter, unsigned int size, - int nmi) + int nmi, int overflow) { struct perf_mmap_data *data; unsigned int offset, head; @@ -1751,8 +1756,9 @@ static int perf_output_begin(struct perf_output_handle *handle, if (!data) goto out; - handle->counter = counter; - handle->nmi = nmi; + handle->counter = counter; + handle->nmi = nmi; + handle->overflow = overflow; if (!data->nr_pages) goto fail; @@ -1816,7 +1822,7 @@ static void perf_output_end(struct perf_output_handle *handle) { int wakeup_events = handle->counter->hw_event.wakeup_events; - if (wakeup_events) { + if (handle->overflow && wakeup_events) { int events = atomic_inc_return(&handle->data->events); if (events >= wakeup_events) { atomic_sub(wakeup_events, &handle->data->events); @@ -1891,7 +1897,7 @@ static void perf_counter_output(struct perf_counter *counter, header.size += sizeof(u64); } - ret = perf_output_begin(&handle, counter, header.size, nmi); + ret = perf_output_begin(&handle, counter, header.size, nmi, 1); if (ret) return; @@ -1955,7 +1961,7 @@ static void perf_counter_mmap_output(struct perf_counter *counter, { struct perf_output_handle handle; int size = mmap_event->event.header.size; - int ret = perf_output_begin(&handle, counter, size, 0); + int ret = perf_output_begin(&handle, counter, size, 0, 0); if (ret) return; @@ -2084,8 +2090,10 @@ int perf_counter_overflow(struct perf_counter *counter, int events = atomic_read(&counter->event_limit); int ret = 0; + counter->pending_kill = POLL_IN; if (events && atomic_dec_and_test(&counter->event_limit)) { ret = 1; + counter->pending_kill = POLL_HUP; if (nmi) { counter->pending_disable = 1; perf_pending_queue(&counter->pending, -- cgit v1.2.3-70-g09d2 From 4af4998b8aa35600f4c4a4f3c3a23baca6081d02 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:10 +0200 Subject: perf_counter: rework context time Since perf_counter_context is switched along with tasks, we can maintain the context time without using the task runtime clock. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094518.353552838@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 10 ++---- kernel/perf_counter.c | 78 +++++++++++++++++++------------------------- 2 files changed, 37 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 0f5a4005048..7f5d353d78a 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -477,14 +477,10 @@ struct perf_counter_context { struct task_struct *task; /* - * time_now is the current time in nanoseconds since an arbitrary - * point in the past. For per-task counters, this is based on the - * task clock, and for per-cpu counters it is based on the cpu clock. - * time_lost is an offset from the task/cpu clock, used to make it - * appear that time only passes while the context is scheduled in. + * Context clock, runs when context enabled. */ - u64 time_now; - u64 time_lost; + u64 time; + u64 timestamp; #endif }; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 8c8eaf0625f..84d85ab4e16 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -117,7 +117,7 @@ counter_sched_out(struct perf_counter *counter, return; counter->state = PERF_COUNTER_STATE_INACTIVE; - counter->tstamp_stopped = ctx->time_now; + counter->tstamp_stopped = ctx->time; counter->hw_ops->disable(counter); counter->oncpu = -1; @@ -253,27 +253,20 @@ retry: spin_unlock_irq(&ctx->lock); } -/* - * Get the current time for this context. - * If this is a task context, we use the task's task clock, - * or for a per-cpu context, we use the cpu clock. - */ -static u64 get_context_time(struct perf_counter_context *ctx, int update) +static inline u64 perf_clock(void) { - struct task_struct *curr = ctx->task; - - if (!curr) - return cpu_clock(smp_processor_id()); - - return __task_delta_exec(curr, update) + curr->se.sum_exec_runtime; + return cpu_clock(smp_processor_id()); } /* * Update the record of the current time in a context. */ -static void update_context_time(struct perf_counter_context *ctx, int update) +static void update_context_time(struct perf_counter_context *ctx) { - ctx->time_now = get_context_time(ctx, update) - ctx->time_lost; + u64 now = perf_clock(); + + ctx->time += now - ctx->timestamp; + ctx->timestamp = now; } /* @@ -284,15 +277,17 @@ static void update_counter_times(struct perf_counter *counter) struct perf_counter_context *ctx = counter->ctx; u64 run_end; - if (counter->state >= PERF_COUNTER_STATE_INACTIVE) { - counter->total_time_enabled = ctx->time_now - - counter->tstamp_enabled; - if (counter->state == PERF_COUNTER_STATE_INACTIVE) - run_end = counter->tstamp_stopped; - else - run_end = ctx->time_now; - counter->total_time_running = run_end - counter->tstamp_running; - } + if (counter->state < PERF_COUNTER_STATE_INACTIVE) + return; + + counter->total_time_enabled = ctx->time - counter->tstamp_enabled; + + if (counter->state == PERF_COUNTER_STATE_INACTIVE) + run_end = counter->tstamp_stopped; + else + run_end = ctx->time; + + counter->total_time_running = run_end - counter->tstamp_running; } /* @@ -332,7 +327,7 @@ static void __perf_counter_disable(void *info) * If it is in error state, leave it in error state. */ if (counter->state >= PERF_COUNTER_STATE_INACTIVE) { - update_context_time(ctx, 1); + update_context_time(ctx); update_counter_times(counter); if (counter == counter->group_leader) group_sched_out(counter, cpuctx, ctx); @@ -426,7 +421,7 @@ counter_sched_in(struct perf_counter *counter, return -EAGAIN; } - counter->tstamp_running += ctx->time_now - counter->tstamp_stopped; + counter->tstamp_running += ctx->time - counter->tstamp_stopped; if (!is_software_counter(counter)) cpuctx->active_oncpu++; @@ -493,9 +488,9 @@ static void add_counter_to_ctx(struct perf_counter *counter, list_add_counter(counter, ctx); ctx->nr_counters++; counter->prev_state = PERF_COUNTER_STATE_OFF; - counter->tstamp_enabled = ctx->time_now; - counter->tstamp_running = ctx->time_now; - counter->tstamp_stopped = ctx->time_now; + counter->tstamp_enabled = ctx->time; + counter->tstamp_running = ctx->time; + counter->tstamp_stopped = ctx->time; } /* @@ -522,7 +517,7 @@ static void __perf_install_in_context(void *info) curr_rq_lock_irq_save(&flags); spin_lock(&ctx->lock); - update_context_time(ctx, 1); + update_context_time(ctx); /* * Protect the list operation against NMI by disabling the @@ -648,13 +643,13 @@ static void __perf_counter_enable(void *info) curr_rq_lock_irq_save(&flags); spin_lock(&ctx->lock); - update_context_time(ctx, 1); + update_context_time(ctx); counter->prev_state = counter->state; if (counter->state >= PERF_COUNTER_STATE_INACTIVE) goto unlock; counter->state = PERF_COUNTER_STATE_INACTIVE; - counter->tstamp_enabled = ctx->time_now - counter->total_time_enabled; + counter->tstamp_enabled = ctx->time - counter->total_time_enabled; /* * If the counter is in a group and isn't the group leader, @@ -737,8 +732,8 @@ static void perf_counter_enable(struct perf_counter *counter) */ if (counter->state == PERF_COUNTER_STATE_OFF) { counter->state = PERF_COUNTER_STATE_INACTIVE; - counter->tstamp_enabled = ctx->time_now - - counter->total_time_enabled; + counter->tstamp_enabled = + ctx->time - counter->total_time_enabled; } out: spin_unlock_irq(&ctx->lock); @@ -778,7 +773,7 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx, ctx->is_active = 0; if (likely(!ctx->nr_counters)) goto out; - update_context_time(ctx, 0); + update_context_time(ctx); flags = hw_perf_save_disable(); if (ctx->nr_active) { @@ -883,12 +878,7 @@ __perf_counter_sched_in(struct perf_counter_context *ctx, if (likely(!ctx->nr_counters)) goto out; - /* - * Add any time since the last sched_out to the lost time - * so it doesn't get included in the total_time_enabled and - * total_time_running measures for counters in the context. - */ - ctx->time_lost = get_context_time(ctx, 0) - ctx->time_now; + ctx->timestamp = perf_clock(); flags = hw_perf_save_disable(); @@ -1043,8 +1033,8 @@ int perf_counter_task_enable(void) if (counter->state > PERF_COUNTER_STATE_OFF) continue; counter->state = PERF_COUNTER_STATE_INACTIVE; - counter->tstamp_enabled = ctx->time_now - - counter->total_time_enabled; + counter->tstamp_enabled = + ctx->time - counter->total_time_enabled; counter->hw_event.disabled = 0; } hw_perf_restore(perf_flags); @@ -1113,7 +1103,7 @@ static void __read(void *info) curr_rq_lock_irq_save(&flags); if (ctx->is_active) - update_context_time(ctx, 1); + update_context_time(ctx); counter->hw_ops->read(counter); update_counter_times(counter); curr_rq_unlock_irq_restore(&flags); -- cgit v1.2.3-70-g09d2 From 849691a6cd40270ff5f4a8846d5f6bf8df663ffc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:12 +0200 Subject: perf_counter: remove rq->lock usage Now that all the task runtime clock users are gone, remove the ugly rq->lock usage from perf counters, which solves the nasty deadlock seen when a software task clock counter was read from an NMI overflow context. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094518.531137582@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/kernel_stat.h | 2 -- kernel/perf_counter.c | 42 ++++++++++++++++-------------------------- kernel/sched.c | 20 -------------------- 3 files changed, 16 insertions(+), 48 deletions(-) (limited to 'include') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index b6d2887a5d8..080d1fd461d 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -85,8 +85,6 @@ static inline unsigned int kstat_irqs(unsigned int irq) /* * Lock/unlock the current runqueue - to extract task statistics: */ -extern void curr_rq_lock_irq_save(unsigned long *flags); -extern void curr_rq_unlock_irq_restore(unsigned long *flags); extern unsigned long long __task_delta_exec(struct task_struct *tsk, int update); extern unsigned long long task_delta_exec(struct task_struct *); diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 56b7eb53d67..f4f7596f784 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -172,8 +172,7 @@ static void __perf_counter_remove_from_context(void *info) if (ctx->task && cpuctx->task_ctx != ctx) return; - curr_rq_lock_irq_save(&flags); - spin_lock(&ctx->lock); + spin_lock_irqsave(&ctx->lock, flags); counter_sched_out(counter, cpuctx, ctx); @@ -198,8 +197,7 @@ static void __perf_counter_remove_from_context(void *info) perf_max_counters - perf_reserved_percpu); } - spin_unlock(&ctx->lock); - curr_rq_unlock_irq_restore(&flags); + spin_unlock_irqrestore(&ctx->lock, flags); } @@ -319,8 +317,7 @@ static void __perf_counter_disable(void *info) if (ctx->task && cpuctx->task_ctx != ctx) return; - curr_rq_lock_irq_save(&flags); - spin_lock(&ctx->lock); + spin_lock_irqsave(&ctx->lock, flags); /* * If the counter is on, turn it off. @@ -336,8 +333,7 @@ static void __perf_counter_disable(void *info) counter->state = PERF_COUNTER_STATE_OFF; } - spin_unlock(&ctx->lock); - curr_rq_unlock_irq_restore(&flags); + spin_unlock_irqrestore(&ctx->lock, flags); } /* @@ -515,8 +511,7 @@ static void __perf_install_in_context(void *info) if (ctx->task && cpuctx->task_ctx != ctx) return; - curr_rq_lock_irq_save(&flags); - spin_lock(&ctx->lock); + spin_lock_irqsave(&ctx->lock, flags); update_context_time(ctx); /* @@ -565,8 +560,7 @@ static void __perf_install_in_context(void *info) unlock: hw_perf_restore(perf_flags); - spin_unlock(&ctx->lock); - curr_rq_unlock_irq_restore(&flags); + spin_unlock_irqrestore(&ctx->lock, flags); } /* @@ -641,8 +635,7 @@ static void __perf_counter_enable(void *info) if (ctx->task && cpuctx->task_ctx != ctx) return; - curr_rq_lock_irq_save(&flags); - spin_lock(&ctx->lock); + spin_lock_irqsave(&ctx->lock, flags); update_context_time(ctx); counter->prev_state = counter->state; @@ -678,8 +671,7 @@ static void __perf_counter_enable(void *info) } unlock: - spin_unlock(&ctx->lock); - curr_rq_unlock_irq_restore(&flags); + spin_unlock_irqrestore(&ctx->lock, flags); } /* @@ -971,7 +963,7 @@ int perf_counter_task_disable(void) if (likely(!ctx->nr_counters)) return 0; - curr_rq_lock_irq_save(&flags); + local_irq_save(flags); cpu = smp_processor_id(); perf_counter_task_sched_out(curr, cpu); @@ -992,9 +984,7 @@ int perf_counter_task_disable(void) hw_perf_restore(perf_flags); - spin_unlock(&ctx->lock); - - curr_rq_unlock_irq_restore(&flags); + spin_unlock_irqrestore(&ctx->lock, flags); return 0; } @@ -1011,7 +1001,7 @@ int perf_counter_task_enable(void) if (likely(!ctx->nr_counters)) return 0; - curr_rq_lock_irq_save(&flags); + local_irq_save(flags); cpu = smp_processor_id(); perf_counter_task_sched_out(curr, cpu); @@ -1037,7 +1027,7 @@ int perf_counter_task_enable(void) perf_counter_task_sched_in(curr, cpu); - curr_rq_unlock_irq_restore(&flags); + local_irq_restore(flags); return 0; } @@ -1095,12 +1085,12 @@ static void __read(void *info) struct perf_counter_context *ctx = counter->ctx; unsigned long flags; - curr_rq_lock_irq_save(&flags); + local_irq_save(flags); if (ctx->is_active) update_context_time(ctx); counter->hw_ops->read(counter); update_counter_times(counter); - curr_rq_unlock_irq_restore(&flags); + local_irq_restore(flags); } static u64 perf_counter_read(struct perf_counter *counter) @@ -2890,7 +2880,7 @@ __perf_counter_exit_task(struct task_struct *child, * Be careful about zapping the list - IRQ/NMI context * could still be processing it: */ - curr_rq_lock_irq_save(&flags); + local_irq_save(flags); perf_flags = hw_perf_save_disable(); cpuctx = &__get_cpu_var(perf_cpu_context); @@ -2903,7 +2893,7 @@ __perf_counter_exit_task(struct task_struct *child, child_ctx->nr_counters--; hw_perf_restore(perf_flags); - curr_rq_unlock_irq_restore(&flags); + local_irq_restore(flags); } parent_counter = child_counter->parent; diff --git a/kernel/sched.c b/kernel/sched.c index f76e3c0188a..0de2f814fb1 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -997,26 +997,6 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) } } -void curr_rq_lock_irq_save(unsigned long *flags) - __acquires(rq->lock) -{ - struct rq *rq; - - local_irq_save(*flags); - rq = cpu_rq(smp_processor_id()); - spin_lock(&rq->lock); -} - -void curr_rq_unlock_irq_restore(unsigned long *flags) - __releases(rq->lock) -{ - struct rq *rq; - - rq = cpu_rq(smp_processor_id()); - spin_unlock(&rq->lock); - local_irq_restore(*flags); -} - void task_rq_unlock_wait(struct task_struct *p) { struct rq *rq = task_rq(p); -- cgit v1.2.3-70-g09d2 From a26b89f05d194413c7238e0bea071054f6b5d3c8 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 3 Apr 2009 16:43:34 +0200 Subject: sched, hw-branch-tracer: add wait_task_context_switch() function to sched.h Add a function to wait until some other task has been switched out at least once. This differs from wait_task_inactive() subtly, in that the latter will wait until the task has left the CPU. Signed-off-by: Markus Metzger Cc: markus.t.metzger@gmail.com Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: oleg@redhat.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144549.794157000@intel.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 ++ kernel/sched.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index b94f3541f67..a5b9a83065f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1993,8 +1993,10 @@ extern void set_task_comm(struct task_struct *tsk, char *from); extern char *get_task_comm(char *to, struct task_struct *tsk); #ifdef CONFIG_SMP +extern void wait_task_context_switch(struct task_struct *p); extern unsigned long wait_task_inactive(struct task_struct *, long match_state); #else +static inline void wait_task_context_switch(struct task_struct *p) {} static inline unsigned long wait_task_inactive(struct task_struct *p, long match_state) { diff --git a/kernel/sched.c b/kernel/sched.c index 6cc1fd5d507..f91bc8141dc 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2002,6 +2002,49 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req) return 1; } +/* + * wait_task_context_switch - wait for a thread to complete at least one + * context switch. + * + * @p must not be current. + */ +void wait_task_context_switch(struct task_struct *p) +{ + unsigned long nvcsw, nivcsw, flags; + int running; + struct rq *rq; + + nvcsw = p->nvcsw; + nivcsw = p->nivcsw; + for (;;) { + /* + * The runqueue is assigned before the actual context + * switch. We need to take the runqueue lock. + * + * We could check initially without the lock but it is + * very likely that we need to take the lock in every + * iteration. + */ + rq = task_rq_lock(p, &flags); + running = task_running(rq, p); + task_rq_unlock(rq, &flags); + + if (likely(!running)) + break; + /* + * The switch count is incremented before the actual + * context switch. We thus wait for two switches to be + * sure at least one completed. + */ + if ((p->nvcsw - nvcsw) > 1) + break; + if ((p->nivcsw - nivcsw) > 1) + break; + + cpu_relax(); + } +} + /* * wait_task_inactive - wait for a thread to unschedule. * -- cgit v1.2.3-70-g09d2 From e2b371f00a6f529f6362654239bdec8dcd510760 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 3 Apr 2009 16:43:35 +0200 Subject: mm, x86, ptrace, bts: defer branch trace stopping When a ptraced task is unlinked, we need to stop branch tracing for that task. Since the unlink is called with interrupts disabled, and we need interrupts enabled to stop branch tracing, we defer the work. Collect all branch tracing related stuff in a branch tracing context. Reviewed-by: Oleg Nesterov Signed-off-by: Markus Metzger Cc: Andrew Morton Cc: Peter Zijlstra Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144550.712401000@intel.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 4 - arch/x86/kernel/ptrace.c | 254 ++++++++++++++++++++++++++------------- include/linux/mm.h | 3 +- include/linux/sched.h | 9 +- mm/mlock.c | 13 +- 5 files changed, 179 insertions(+), 104 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 34c52370f2f..2483807e06e 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -458,10 +458,6 @@ struct thread_struct { /* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */ struct ds_context *ds_ctx; #endif /* CONFIG_X86_DS */ -#ifdef CONFIG_X86_PTRACE_BTS -/* the signal to send on a bts buffer overflow */ - unsigned int bts_ovfl_signal; -#endif /* CONFIG_X86_PTRACE_BTS */ }; static inline unsigned long native_get_debugreg(int regno) diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index fe9345c967d..7c21d1e8cae 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -577,17 +578,119 @@ static int ioperm_get(struct task_struct *target, } #ifdef CONFIG_X86_PTRACE_BTS +/* + * A branch trace store context. + * + * Contexts may only be installed by ptrace_bts_config() and only for + * ptraced tasks. + * + * Contexts are destroyed when the tracee is detached from the tracer. + * The actual destruction work requires interrupts enabled, so the + * work is deferred and will be scheduled during __ptrace_unlink(). + * + * Contexts hold an additional task_struct reference on the traced + * task, as well as a reference on the tracer's mm. + * + * Ptrace already holds a task_struct for the duration of ptrace operations, + * but since destruction is deferred, it may be executed after both + * tracer and tracee exited. + */ +struct bts_context { + /* The branch trace handle. */ + struct bts_tracer *tracer; + + /* The buffer used to store the branch trace and its size. */ + void *buffer; + unsigned int size; + + /* The mm that paid for the above buffer. */ + struct mm_struct *mm; + + /* The task this context belongs to. */ + struct task_struct *task; + + /* The signal to send on a bts buffer overflow. */ + unsigned int bts_ovfl_signal; + + /* The work struct to destroy a context. */ + struct work_struct work; +}; + +static inline void alloc_bts_buffer(struct bts_context *context, + unsigned int size) +{ + void *buffer; + + buffer = alloc_locked_buffer(size); + if (buffer) { + context->buffer = buffer; + context->size = size; + context->mm = get_task_mm(current); + } +} + +static inline void free_bts_buffer(struct bts_context *context) +{ + if (!context->buffer) + return; + + kfree(context->buffer); + context->buffer = NULL; + + refund_locked_buffer_memory(context->mm, context->size); + context->size = 0; + + mmput(context->mm); + context->mm = NULL; +} + +static void free_bts_context_work(struct work_struct *w) +{ + struct bts_context *context; + + context = container_of(w, struct bts_context, work); + + ds_release_bts(context->tracer); + put_task_struct(context->task); + free_bts_buffer(context); + kfree(context); +} + +static inline void free_bts_context(struct bts_context *context) +{ + INIT_WORK(&context->work, free_bts_context_work); + schedule_work(&context->work); +} + +static inline struct bts_context *alloc_bts_context(struct task_struct *task) +{ + struct bts_context *context = kzalloc(sizeof(*context), GFP_KERNEL); + if (context) { + context->task = task; + task->bts = context; + + get_task_struct(task); + } + + return context; +} + static int ptrace_bts_read_record(struct task_struct *child, size_t index, struct bts_struct __user *out) { + struct bts_context *context; const struct bts_trace *trace; struct bts_struct bts; const unsigned char *at; int error; - trace = ds_read_bts(child->bts); + context = child->bts; + if (!context) + return -ESRCH; + + trace = ds_read_bts(context->tracer); if (!trace) - return -EPERM; + return -ESRCH; at = trace->ds.top - ((index + 1) * trace->ds.size); if ((void *)at < trace->ds.begin) @@ -596,7 +699,7 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index, if (!trace->read) return -EOPNOTSUPP; - error = trace->read(child->bts, at, &bts); + error = trace->read(context->tracer, at, &bts); if (error < 0) return error; @@ -610,13 +713,18 @@ static int ptrace_bts_drain(struct task_struct *child, long size, struct bts_struct __user *out) { + struct bts_context *context; const struct bts_trace *trace; const unsigned char *at; int error, drained = 0; - trace = ds_read_bts(child->bts); + context = child->bts; + if (!context) + return -ESRCH; + + trace = ds_read_bts(context->tracer); if (!trace) - return -EPERM; + return -ESRCH; if (!trace->read) return -EOPNOTSUPP; @@ -627,9 +735,8 @@ static int ptrace_bts_drain(struct task_struct *child, for (at = trace->ds.begin; (void *)at < trace->ds.top; out++, drained++, at += trace->ds.size) { struct bts_struct bts; - int error; - error = trace->read(child->bts, at, &bts); + error = trace->read(context->tracer, at, &bts); if (error < 0) return error; @@ -639,35 +746,18 @@ static int ptrace_bts_drain(struct task_struct *child, memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); - error = ds_reset_bts(child->bts); + error = ds_reset_bts(context->tracer); if (error < 0) return error; return drained; } -static int ptrace_bts_allocate_buffer(struct task_struct *child, size_t size) -{ - child->bts_buffer = alloc_locked_buffer(size); - if (!child->bts_buffer) - return -ENOMEM; - - child->bts_size = size; - - return 0; -} - -static void ptrace_bts_free_buffer(struct task_struct *child) -{ - free_locked_buffer(child->bts_buffer, child->bts_size); - child->bts_buffer = NULL; - child->bts_size = 0; -} - static int ptrace_bts_config(struct task_struct *child, long cfg_size, const struct ptrace_bts_config __user *ucfg) { + struct bts_context *context; struct ptrace_bts_config cfg; unsigned int flags = 0; @@ -677,28 +767,31 @@ static int ptrace_bts_config(struct task_struct *child, if (copy_from_user(&cfg, ucfg, sizeof(cfg))) return -EFAULT; - if (child->bts) { - ds_release_bts(child->bts); - child->bts = NULL; - } + context = child->bts; + if (!context) + context = alloc_bts_context(child); + if (!context) + return -ENOMEM; if (cfg.flags & PTRACE_BTS_O_SIGNAL) { if (!cfg.signal) return -EINVAL; - child->thread.bts_ovfl_signal = cfg.signal; return -EOPNOTSUPP; + context->bts_ovfl_signal = cfg.signal; } - if ((cfg.flags & PTRACE_BTS_O_ALLOC) && - (cfg.size != child->bts_size)) { - int error; + ds_release_bts(context->tracer); + context->tracer = NULL; - ptrace_bts_free_buffer(child); + if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != context->size)) { + free_bts_buffer(context); + if (!cfg.size) + return 0; - error = ptrace_bts_allocate_buffer(child, cfg.size); - if (error < 0) - return error; + alloc_bts_buffer(context, cfg.size); + if (!context->buffer) + return -ENOMEM; } if (cfg.flags & PTRACE_BTS_O_TRACE) @@ -707,15 +800,13 @@ static int ptrace_bts_config(struct task_struct *child, if (cfg.flags & PTRACE_BTS_O_SCHED) flags |= BTS_TIMESTAMPS; - child->bts = ds_request_bts(child, child->bts_buffer, child->bts_size, - /* ovfl = */ NULL, /* th = */ (size_t)-1, - flags); - if (IS_ERR(child->bts)) { - int error = PTR_ERR(child->bts); - - ptrace_bts_free_buffer(child); - child->bts = NULL; + context->tracer = ds_request_bts(child, context->buffer, context->size, + NULL, (size_t)-1, flags); + if (unlikely(IS_ERR(context->tracer))) { + int error = PTR_ERR(context->tracer); + free_bts_buffer(context); + context->tracer = NULL; return error; } @@ -726,20 +817,25 @@ static int ptrace_bts_status(struct task_struct *child, long cfg_size, struct ptrace_bts_config __user *ucfg) { + struct bts_context *context; const struct bts_trace *trace; struct ptrace_bts_config cfg; + context = child->bts; + if (!context) + return -ESRCH; + if (cfg_size < sizeof(cfg)) return -EIO; - trace = ds_read_bts(child->bts); + trace = ds_read_bts(context->tracer); if (!trace) - return -EPERM; + return -ESRCH; memset(&cfg, 0, sizeof(cfg)); - cfg.size = trace->ds.end - trace->ds.begin; - cfg.signal = child->thread.bts_ovfl_signal; - cfg.bts_size = sizeof(struct bts_struct); + cfg.size = trace->ds.end - trace->ds.begin; + cfg.signal = context->bts_ovfl_signal; + cfg.bts_size = sizeof(struct bts_struct); if (cfg.signal) cfg.flags |= PTRACE_BTS_O_SIGNAL; @@ -758,67 +854,56 @@ static int ptrace_bts_status(struct task_struct *child, static int ptrace_bts_clear(struct task_struct *child) { + struct bts_context *context; const struct bts_trace *trace; - trace = ds_read_bts(child->bts); + context = child->bts; + if (!context) + return -ESRCH; + + trace = ds_read_bts(context->tracer); if (!trace) - return -EPERM; + return -ESRCH; memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); - return ds_reset_bts(child->bts); + return ds_reset_bts(context->tracer); } static int ptrace_bts_size(struct task_struct *child) { + struct bts_context *context; const struct bts_trace *trace; - trace = ds_read_bts(child->bts); + context = child->bts; + if (!context) + return -ESRCH; + + trace = ds_read_bts(context->tracer); if (!trace) - return -EPERM; + return -ESRCH; return (trace->ds.top - trace->ds.begin) / trace->ds.size; } -static void ptrace_bts_fork(struct task_struct *tsk) +static inline void ptrace_bts_fork(struct task_struct *tsk) { tsk->bts = NULL; - tsk->bts_buffer = NULL; - tsk->bts_size = 0; - tsk->thread.bts_ovfl_signal = 0; } -static void ptrace_bts_untrace(struct task_struct *child) +/* + * Called from __ptrace_unlink() after the child has been moved back + * to its original parent. + */ +static inline void ptrace_bts_untrace(struct task_struct *child) { if (unlikely(child->bts)) { - ds_release_bts(child->bts); + free_bts_context(child->bts); child->bts = NULL; - - /* We cannot update total_vm and locked_vm since - child's mm is already gone. But we can reclaim the - memory. */ - kfree(child->bts_buffer); - child->bts_buffer = NULL; - child->bts_size = 0; } } - -static void ptrace_bts_detach(struct task_struct *child) -{ - /* - * Ptrace_detach() races with ptrace_untrace() in case - * the child dies and is reaped by another thread. - * - * We only do the memory accounting at this point and - * leave the buffer deallocation and the bts tracer - * release to ptrace_bts_untrace() which will be called - * later on with tasklist_lock held. - */ - release_locked_buffer(child->bts_buffer, child->bts_size); -} #else static inline void ptrace_bts_fork(struct task_struct *tsk) {} -static inline void ptrace_bts_detach(struct task_struct *child) {} static inline void ptrace_bts_untrace(struct task_struct *child) {} #endif /* CONFIG_X86_PTRACE_BTS */ @@ -843,7 +928,6 @@ void ptrace_disable(struct task_struct *child) #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); #endif - ptrace_bts_detach(child); } #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION diff --git a/include/linux/mm.h b/include/linux/mm.h index bff1f0d475c..64d8ed2538a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -13,6 +13,7 @@ #include #include #include +#include struct mempolicy; struct anon_vma; @@ -1321,6 +1322,6 @@ void vmemmap_populate_print_last(void); extern void *alloc_locked_buffer(size_t size); extern void free_locked_buffer(void *buffer, size_t size); -extern void release_locked_buffer(void *buffer, size_t size); +extern void refund_locked_buffer_memory(struct mm_struct *mm, size_t size); #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index a5b9a83065f..52b8cd049c2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -96,8 +96,8 @@ struct exec_domain; struct futex_pi_state; struct robust_list_head; struct bio; -struct bts_tracer; struct fs_struct; +struct bts_context; /* * List of flags we want to share for kernel threads, @@ -1210,12 +1210,7 @@ struct task_struct { * This is the tracer handle for the ptrace BTS extension. * This field actually belongs to the ptracer task. */ - struct bts_tracer *bts; - /* - * The buffer to hold the BTS data. - */ - void *bts_buffer; - size_t bts_size; + struct bts_context *bts; #endif /* CONFIG_X86_PTRACE_BTS */ /* PID/PID hash table linkage. */ diff --git a/mm/mlock.c b/mm/mlock.c index cbe9e0581b7..749383b442c 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -660,21 +660,20 @@ void *alloc_locked_buffer(size_t size) return buffer; } -void release_locked_buffer(void *buffer, size_t size) +void refund_locked_buffer_memory(struct mm_struct *mm, size_t size) { unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; - down_write(¤t->mm->mmap_sem); + down_write(&mm->mmap_sem); - current->mm->total_vm -= pgsz; - current->mm->locked_vm -= pgsz; + mm->total_vm -= pgsz; + mm->locked_vm -= pgsz; - up_write(¤t->mm->mmap_sem); + up_write(&mm->mmap_sem); } void free_locked_buffer(void *buffer, size_t size) { - release_locked_buffer(buffer, size); - + refund_locked_buffer_memory(current->mm, size); kfree(buffer); } -- cgit v1.2.3-70-g09d2 From 0f4814065ff8c24ca8bfd75c9b73502be152c287 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 3 Apr 2009 16:43:48 +0200 Subject: x86, ptrace: add bts context unconditionally Add the ptrace bts context field to task_struct unconditionally. Initialize the field directly in copy_process(). Remove all the unneeded functionality used to initialize that field. Signed-off-by: Markus Metzger Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: oleg@redhat.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144603.292754000@intel.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/ptrace.h | 9 ++++----- arch/x86/kernel/ptrace.c | 20 +------------------- include/linux/ptrace.h | 10 ---------- include/linux/sched.h | 2 -- kernel/fork.c | 4 ++-- kernel/ptrace.c | 10 ---------- 6 files changed, 7 insertions(+), 48 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index e304b66abee..5cdd19f20b5 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -235,12 +235,11 @@ extern int do_get_thread_area(struct task_struct *p, int idx, extern int do_set_thread_area(struct task_struct *p, int idx, struct user_desc __user *info, int can_allocate); -extern void x86_ptrace_untrace(struct task_struct *); -extern void x86_ptrace_fork(struct task_struct *child, - unsigned long clone_flags); +#ifdef CONFIG_X86_PTRACE_BTS +extern void ptrace_bts_untrace(struct task_struct *tsk); -#define arch_ptrace_untrace(tsk) x86_ptrace_untrace(tsk) -#define arch_ptrace_fork(child, flags) x86_ptrace_fork(child, flags) +#define arch_ptrace_untrace(tsk) ptrace_bts_untrace(tsk) +#endif /* CONFIG_X86_PTRACE_BTS */ #endif /* __KERNEL__ */ diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index adbb24322d8..b32a8ee5338 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -887,37 +887,19 @@ static int ptrace_bts_size(struct task_struct *child) return (trace->ds.top - trace->ds.begin) / trace->ds.size; } -static inline void ptrace_bts_fork(struct task_struct *tsk) -{ - tsk->bts = NULL; -} - /* * Called from __ptrace_unlink() after the child has been moved back * to its original parent. */ -static inline void ptrace_bts_untrace(struct task_struct *child) +void ptrace_bts_untrace(struct task_struct *child) { if (unlikely(child->bts)) { free_bts_context(child->bts); child->bts = NULL; } } -#else -static inline void ptrace_bts_fork(struct task_struct *tsk) {} -static inline void ptrace_bts_untrace(struct task_struct *child) {} #endif /* CONFIG_X86_PTRACE_BTS */ -void x86_ptrace_fork(struct task_struct *child, unsigned long clone_flags) -{ - ptrace_bts_fork(child); -} - -void x86_ptrace_untrace(struct task_struct *child) -{ - ptrace_bts_untrace(child); -} - /* * Called by kernel/ptrace.c when detaching.. * diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 67c15653fc2..59e133d39d5 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -95,7 +95,6 @@ extern void __ptrace_link(struct task_struct *child, struct task_struct *new_parent); extern void __ptrace_unlink(struct task_struct *child); extern void exit_ptrace(struct task_struct *tracer); -extern void ptrace_fork(struct task_struct *task, unsigned long clone_flags); #define PTRACE_MODE_READ 1 #define PTRACE_MODE_ATTACH 2 /* Returns 0 on success, -errno on denial. */ @@ -327,15 +326,6 @@ static inline void user_enable_block_step(struct task_struct *task) #define arch_ptrace_untrace(task) do { } while (0) #endif -#ifndef arch_ptrace_fork -/* - * Do machine-specific work to initialize a new task. - * - * This is called from copy_process(). - */ -#define arch_ptrace_fork(child, clone_flags) do { } while (0) -#endif - extern int task_current_syscall(struct task_struct *target, long *callno, unsigned long args[6], unsigned int maxargs, unsigned long *sp, unsigned long *pc); diff --git a/include/linux/sched.h b/include/linux/sched.h index 52b8cd049c2..451186a22ef 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1205,13 +1205,11 @@ struct task_struct { struct list_head ptraced; struct list_head ptrace_entry; -#ifdef CONFIG_X86_PTRACE_BTS /* * This is the tracer handle for the ptrace BTS extension. * This field actually belongs to the ptracer task. */ struct bts_context *bts; -#endif /* CONFIG_X86_PTRACE_BTS */ /* PID/PID hash table linkage. */ struct pid_link pids[PIDTYPE_MAX]; diff --git a/kernel/fork.c b/kernel/fork.c index 660c2b8765b..69bde7a22e9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1086,8 +1086,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, #ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; /* not blocked yet */ #endif - if (unlikely(current->ptrace)) - ptrace_fork(p, clone_flags); + + p->bts = NULL; /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p, clone_flags); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index aaad0ec3419..321127d965c 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -26,16 +26,6 @@ #include -/* - * Initialize a new task whose father had been ptraced. - * - * Called from copy_process(). - */ -void ptrace_fork(struct task_struct *child, unsigned long clone_flags) -{ - arch_ptrace_fork(child, clone_flags); -} - /* * ptrace a task: make the debugger its new parent and * move it to the ptrace list. -- cgit v1.2.3-70-g09d2 From 06f409d76f1d382167eb1cadde2e23a73272865d Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 7 Apr 2009 18:10:13 +0100 Subject: ASoC: Provide core support for symmetric sample rates Many devices require symmetric configurations of capture and playback data formats, often due to shared clocking but sometimes also due to other shared playback and record configuration in the device. Start providing core support for this by allowing the DAIs or the machine to specify that the sample rates used should be kept symmetric. A flag symmetric_rates is provided in the snd_soc_dai and snd_soc_dai_link structures. If this is set in either of the DAIs or in the machine then a constraint will be applied when a stream is already open preventing any changes in sample rate. Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 1 + include/sound/soc.h | 6 ++++++ sound/soc/soc-core.c | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+) (limited to 'include') diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index 13676472ddf..22b729fbbf8 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -208,6 +208,7 @@ struct snd_soc_dai { /* DAI capabilities */ struct snd_soc_pcm_stream capture; struct snd_soc_pcm_stream playback; + unsigned int symmetric_rates:1; /* DAI runtime info */ struct snd_pcm_runtime *runtime; diff --git a/include/sound/soc.h b/include/sound/soc.h index a40bc6f316f..b1f2f8819fe 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -417,6 +417,12 @@ struct snd_soc_dai_link { /* codec/machine specific init - e.g. add machine controls */ int (*init)(struct snd_soc_codec *codec); + /* Symmetry requirements */ + unsigned int symmetric_rates:1; + + /* Symmetry data - only valid if symmetry is being enforced */ + unsigned int rate; + /* DAI pcm */ struct snd_pcm *pcm; }; diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 99712f652d0..dd28009f896 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -113,6 +113,35 @@ static int soc_ac97_dev_register(struct snd_soc_codec *codec) } #endif +static int soc_pcm_apply_symmetry(struct snd_pcm_substream *substream) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct snd_soc_device *socdev = rtd->socdev; + struct snd_soc_card *card = socdev->card; + struct snd_soc_dai_link *machine = rtd->dai; + struct snd_soc_dai *cpu_dai = machine->cpu_dai; + struct snd_soc_dai *codec_dai = machine->codec_dai; + int ret; + + if (codec_dai->symmetric_rates || cpu_dai->symmetric_rates || + machine->symmetric_rates) { + dev_dbg(card->dev, "Symmetry forces %dHz rate\n", + machine->rate); + + ret = snd_pcm_hw_constraint_minmax(substream->runtime, + SNDRV_PCM_HW_PARAM_RATE, + machine->rate, + machine->rate); + if (ret < 0) { + dev_err(card->dev, + "Unable to apply rate symmetry constraint: %d\n", ret); + return ret; + } + } + + return 0; +} + /* * Called by ALSA when a PCM substream is opened, the runtime->hw record is * then initialized and any private data can be allocated. This also calls @@ -221,6 +250,13 @@ static int soc_pcm_open(struct snd_pcm_substream *substream) goto machine_err; } + /* Symmetry only applies if we've already got an active stream. */ + if (cpu_dai->active || codec_dai->active) { + ret = soc_pcm_apply_symmetry(substream); + if (ret != 0) + goto machine_err; + } + pr_debug("asoc: %s <-> %s info:\n", codec_dai->name, cpu_dai->name); pr_debug("asoc: rate mask 0x%x\n", runtime->hw.rates); pr_debug("asoc: min ch %d max ch %d\n", runtime->hw.channels_min, @@ -521,6 +557,8 @@ static int soc_pcm_hw_params(struct snd_pcm_substream *substream, } } + machine->rate = params_rate(params); + out: mutex_unlock(&pcm_mutex); return ret; -- cgit v1.2.3-70-g09d2 From 44bc9dc729e33a4ec6ebed4d0b6c08e8d20b42cf Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Apr 2009 10:47:17 +0200 Subject: mm, x86, ptrace, bts: defer branch trace stopping, cleanup Andrew Morton noticed that mm.h needlessly includes sched.h - remove it. Reported-by: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/mm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 64d8ed2538a..776b641f37e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -13,7 +13,6 @@ #include #include #include -#include struct mempolicy; struct anon_vma; -- cgit v1.2.3-70-g09d2 From a34b50ddc265bae058c66661b096ef6384c5a8b1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Apr 2009 10:56:54 +0200 Subject: mm, x86, ptrace, bts: defer branch trace stopping, remove dead code Remove the unused free_locked_buffer() API. Signed-off-by: Ingo Molnar --- include/linux/mm.h | 1 - mm/mlock.c | 6 ------ 2 files changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 776b641f37e..a3963ba23a6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1320,7 +1320,6 @@ int vmemmap_populate(struct page *start_page, unsigned long pages, int node); void vmemmap_populate_print_last(void); extern void *alloc_locked_buffer(size_t size); -extern void free_locked_buffer(void *buffer, size_t size); extern void refund_locked_buffer_memory(struct mm_struct *mm, size_t size); #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/mm/mlock.c b/mm/mlock.c index 749383b442c..28be15ead9c 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -671,9 +671,3 @@ void refund_locked_buffer_memory(struct mm_struct *mm, size_t size) up_write(&mm->mmap_sem); } - -void free_locked_buffer(void *buffer, size_t size) -{ - refund_locked_buffer_memory(current->mm, size); - kfree(buffer); -} -- cgit v1.2.3-70-g09d2 From 42d7c5e353cef9062129b0de3ec9ddf10567b9ca Mon Sep 17 00:00:00 2001 From: Becky Bruce Date: Wed, 8 Apr 2009 09:09:21 -0500 Subject: swiotlb: change swiotlb_bus_to[phys,virt] prototypes Add a hwdev argument that is needed on some architectures in order to access a per-device offset that is taken into account when producing a physical address (also needed to get from bus address to virtual address because the physical address is an intermediate step). Also make swiotlb_bus_to_virt weak so architectures can override it. Signed-off-by: Becky Bruce Acked-by: FUJITA Tomonori Signed-off-by: Kumar Gala Cc: jeremy@goop.org Cc: ian.campbell@citrix.com LKML-Reference: <1239199761-22886-8-git-send-email-galak@kernel.crashing.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-swiotlb.c | 2 +- include/linux/swiotlb.h | 3 ++- lib/swiotlb.c | 10 +++++----- 3 files changed, 8 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 34f12e9996e..887388a1c57 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -28,7 +28,7 @@ dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr) return paddr; } -phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr) +phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr) { return baddr; } diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index ac9ff54f7cb..cb1a6631b8f 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -29,7 +29,8 @@ extern void *swiotlb_alloc(unsigned order, unsigned long nslabs); extern dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t address); -extern phys_addr_t swiotlb_bus_to_phys(dma_addr_t address); +extern phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, + dma_addr_t address); extern int swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size); diff --git a/lib/swiotlb.c b/lib/swiotlb.c index d912f068145..bffe6d7ef9d 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -129,7 +129,7 @@ dma_addr_t __weak swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr) return paddr; } -phys_addr_t __weak swiotlb_bus_to_phys(dma_addr_t baddr) +phys_addr_t __weak swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr) { return baddr; } @@ -140,9 +140,9 @@ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, return swiotlb_phys_to_bus(hwdev, virt_to_phys(address)); } -static void *swiotlb_bus_to_virt(dma_addr_t address) +void * __weak swiotlb_bus_to_virt(struct device *hwdev, dma_addr_t address) { - return phys_to_virt(swiotlb_bus_to_phys(address)); + return phys_to_virt(swiotlb_bus_to_phys(hwdev, address)); } int __weak swiotlb_arch_address_needs_mapping(struct device *hwdev, @@ -691,7 +691,7 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page); static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size, int dir) { - char *dma_addr = swiotlb_bus_to_virt(dev_addr); + char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr); BUG_ON(dir == DMA_NONE); @@ -728,7 +728,7 @@ static void swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, size_t size, int dir, int target) { - char *dma_addr = swiotlb_bus_to_virt(dev_addr); + char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr); BUG_ON(dir == DMA_NONE); -- cgit v1.2.3-70-g09d2 From 6fab01927e8bdbbc77bafba2abb4810c5591ad52 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 8 Apr 2009 15:01:26 +0200 Subject: perf_counter: provide misc bits in the event header Limit the size of each record to 64k (or should we count in multiples of u64 and have a 512K limit?), this gives 16 bits or spare room in the header, which we can use for misc bits, so as to not have to grow the record with u64 every time we have a few bits to report. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090408130408.769271806@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 6 +++++- kernel/perf_counter.c | 3 +++ 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 7f5d353d78a..5bd8817b12d 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -201,9 +201,13 @@ struct perf_counter_mmap_page { __u32 data_head; /* head in the data section */ }; +#define PERF_EVENT_MISC_KERNEL (1 << 0) +#define PERF_EVENT_MISC_USER (1 << 1) + struct perf_event_header { __u32 type; - __u32 size; + __u16 misc; + __u16 size; }; enum perf_event_type { diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 84a39081344..4af98f943d3 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1831,6 +1831,9 @@ static void perf_counter_output(struct perf_counter *counter, header.type = PERF_EVENT_COUNTER_OVERFLOW; header.size = sizeof(header); + header.misc = user_mode(regs) ? + PERF_EVENT_MISC_USER : PERF_EVENT_MISC_KERNEL; + if (record_type & PERF_RECORD_IP) { ip = instruction_pointer(regs); header.type |= __PERF_EVENT_IP; -- cgit v1.2.3-70-g09d2 From 6b6e5486b3a168f0328c82a8d4376caf901472b1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 8 Apr 2009 15:01:27 +0200 Subject: perf_counter: use misc field to widen type Push the PERF_EVENT_COUNTER_OVERFLOW bit into the misc field so that we can have the full 32bit for PERF_RECORD_ bits. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090408130408.891867663@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 28 ++++++++++------------------ kernel/perf_counter.c | 15 ++++++++------- 2 files changed, 18 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 5bd8817b12d..4809ae18a94 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -201,8 +201,9 @@ struct perf_counter_mmap_page { __u32 data_head; /* head in the data section */ }; -#define PERF_EVENT_MISC_KERNEL (1 << 0) -#define PERF_EVENT_MISC_USER (1 << 1) +#define PERF_EVENT_MISC_KERNEL (1 << 0) +#define PERF_EVENT_MISC_USER (1 << 1) +#define PERF_EVENT_MISC_OVERFLOW (1 << 2) struct perf_event_header { __u32 type; @@ -230,36 +231,27 @@ enum perf_event_type { PERF_EVENT_MUNMAP = 2, /* - * Half the event type space is reserved for the counter overflow - * bitfields, as found in hw_event.record_type. - * - * These events will have types of the form: - * PERF_EVENT_COUNTER_OVERFLOW { | __PERF_EVENT_* } * + * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field + * will be PERF_RECORD_* * * struct { * struct perf_event_header header; * - * { u64 ip; } && __PERF_EVENT_IP - * { u32 pid, tid; } && __PERF_EVENT_TID + * { u64 ip; } && PERF_RECORD_IP + * { u32 pid, tid; } && PERF_RECORD_TID * * { u64 nr; - * { u64 event, val; } cnt[nr]; } && __PERF_EVENT_GROUP + * { u64 event, val; } cnt[nr]; } && PERF_RECORD_GROUP * * { u16 nr, * hv, * kernel, * user; - * u64 ips[nr]; } && __PERF_EVENT_CALLCHAIN + * u64 ips[nr]; } && PERF_RECORD_CALLCHAIN * - * { u64 time; } && __PERF_EVENT_TIME + * { u64 time; } && PERF_RECORD_TIME * }; */ - PERF_EVENT_COUNTER_OVERFLOW = 1UL << 31, - __PERF_EVENT_IP = PERF_RECORD_IP, - __PERF_EVENT_TID = PERF_RECORD_TID, - __PERF_EVENT_GROUP = PERF_RECORD_GROUP, - __PERF_EVENT_CALLCHAIN = PERF_RECORD_CALLCHAIN, - __PERF_EVENT_TIME = PERF_RECORD_TIME, }; #ifdef __KERNEL__ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 4af98f943d3..bf12df6f353 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1828,15 +1828,16 @@ static void perf_counter_output(struct perf_counter *counter, int callchain_size = 0; u64 time; - header.type = PERF_EVENT_COUNTER_OVERFLOW; + header.type = 0; header.size = sizeof(header); - header.misc = user_mode(regs) ? + header.misc = PERF_EVENT_MISC_OVERFLOW; + header.misc |= user_mode(regs) ? PERF_EVENT_MISC_USER : PERF_EVENT_MISC_KERNEL; if (record_type & PERF_RECORD_IP) { ip = instruction_pointer(regs); - header.type |= __PERF_EVENT_IP; + header.type |= PERF_RECORD_IP; header.size += sizeof(ip); } @@ -1845,12 +1846,12 @@ static void perf_counter_output(struct perf_counter *counter, tid_entry.pid = current->group_leader->pid; tid_entry.tid = current->pid; - header.type |= __PERF_EVENT_TID; + header.type |= PERF_RECORD_TID; header.size += sizeof(tid_entry); } if (record_type & PERF_RECORD_GROUP) { - header.type |= __PERF_EVENT_GROUP; + header.type |= PERF_RECORD_GROUP; header.size += sizeof(u64) + counter->nr_siblings * sizeof(group_entry); } @@ -1861,7 +1862,7 @@ static void perf_counter_output(struct perf_counter *counter, if (callchain) { callchain_size = (1 + callchain->nr) * sizeof(u64); - header.type |= __PERF_EVENT_CALLCHAIN; + header.type |= PERF_RECORD_CALLCHAIN; header.size += callchain_size; } } @@ -1872,7 +1873,7 @@ static void perf_counter_output(struct perf_counter *counter, */ time = sched_clock(); - header.type |= __PERF_EVENT_TIME; + header.type |= PERF_RECORD_TIME; header.size += sizeof(u64); } -- cgit v1.2.3-70-g09d2 From 8740f9418c78dcad694b46ab25d1645d5aef1f5e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 8 Apr 2009 15:01:29 +0200 Subject: perf_counter: add some comments Add a few comments because I was forgetting what field what for what functionality. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090408130409.036984214@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 4809ae18a94..8bf764fc622 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -344,10 +344,12 @@ struct file; struct perf_mmap_data { struct rcu_head rcu_head; - int nr_pages; - atomic_t wakeup; - atomic_t head; - atomic_t events; + int nr_pages; /* nr of data pages */ + + atomic_t wakeup; /* POLL_ for wakeups */ + atomic_t head; /* write position */ + atomic_t events; /* event limit */ + struct perf_counter_mmap_page *user_page; void *data_pages[0]; }; -- cgit v1.2.3-70-g09d2 From 8d1b2d9361b494bfc761700c348c65ebbe3deb5b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 8 Apr 2009 15:01:30 +0200 Subject: perf_counter: track task-comm data Similar to the mmap data stream, add one that tracks the task COMM field, so that the userspace reporting knows what to call a task. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090408130409.127422406@chello.nl> Signed-off-by: Ingo Molnar --- fs/exec.c | 1 + include/linux/perf_counter.h | 16 +++++++- kernel/perf_counter.c | 93 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 109 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/fs/exec.c b/fs/exec.c index e015c0b5a08..bf47ed0278f 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -951,6 +951,7 @@ void set_task_comm(struct task_struct *tsk, char *buf) task_lock(tsk); strlcpy(tsk->comm, buf, sizeof(tsk->comm)); task_unlock(tsk); + perf_counter_comm(tsk); } int flush_old_exec(struct linux_binprm * bprm) diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 8bf764fc622..a70a55f2759 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -142,8 +142,9 @@ struct perf_counter_hw_event { exclude_idle : 1, /* don't count when idle */ mmap : 1, /* include mmap data */ munmap : 1, /* include munmap data */ + comm : 1, /* include comm data */ - __reserved_1 : 53; + __reserved_1 : 52; __u32 extra_config_len; __u32 wakeup_events; /* wakeup every n events */ @@ -230,6 +231,16 @@ enum perf_event_type { PERF_EVENT_MMAP = 1, PERF_EVENT_MUNMAP = 2, + /* + * struct { + * struct perf_event_header header; + * + * u32 pid, tid; + * char comm[]; + * }; + */ + PERF_EVENT_COMM = 3, + /* * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field * will be PERF_RECORD_* @@ -545,6 +556,8 @@ extern void perf_counter_mmap(unsigned long addr, unsigned long len, extern void perf_counter_munmap(unsigned long addr, unsigned long len, unsigned long pgoff, struct file *file); +extern void perf_counter_comm(struct task_struct *tsk); + #define MAX_STACK_DEPTH 255 struct perf_callchain_entry { @@ -583,6 +596,7 @@ static inline void perf_counter_munmap(unsigned long addr, unsigned long len, unsigned long pgoff, struct file *file) { } +static inline void perf_counter_comm(struct task_struct *tsk) { } #endif #endif /* __KERNEL__ */ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index bf12df6f353..2d4aebb2982 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1916,6 +1916,99 @@ static void perf_counter_output(struct perf_counter *counter, perf_output_end(&handle); } +/* + * comm tracking + */ + +struct perf_comm_event { + struct task_struct *task; + char *comm; + int comm_size; + + struct { + struct perf_event_header header; + + u32 pid; + u32 tid; + } event; +}; + +static void perf_counter_comm_output(struct perf_counter *counter, + struct perf_comm_event *comm_event) +{ + struct perf_output_handle handle; + int size = comm_event->event.header.size; + int ret = perf_output_begin(&handle, counter, size, 0, 0); + + if (ret) + return; + + perf_output_put(&handle, comm_event->event); + perf_output_copy(&handle, comm_event->comm, + comm_event->comm_size); + perf_output_end(&handle); +} + +static int perf_counter_comm_match(struct perf_counter *counter, + struct perf_comm_event *comm_event) +{ + if (counter->hw_event.comm && + comm_event->event.header.type == PERF_EVENT_COMM) + return 1; + + return 0; +} + +static void perf_counter_comm_ctx(struct perf_counter_context *ctx, + struct perf_comm_event *comm_event) +{ + struct perf_counter *counter; + + if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) + return; + + rcu_read_lock(); + list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { + if (perf_counter_comm_match(counter, comm_event)) + perf_counter_comm_output(counter, comm_event); + } + rcu_read_unlock(); +} + +static void perf_counter_comm_event(struct perf_comm_event *comm_event) +{ + struct perf_cpu_context *cpuctx; + unsigned int size; + char *comm = comm_event->task->comm; + + size = ALIGN(strlen(comm), sizeof(u64)); + + comm_event->comm = comm; + comm_event->comm_size = size; + + comm_event->event.header.size = sizeof(comm_event->event) + size; + + cpuctx = &get_cpu_var(perf_cpu_context); + perf_counter_comm_ctx(&cpuctx->ctx, comm_event); + put_cpu_var(perf_cpu_context); + + perf_counter_comm_ctx(¤t->perf_counter_ctx, comm_event); +} + +void perf_counter_comm(struct task_struct *task) +{ + struct perf_comm_event comm_event = { + .task = task, + .event = { + .header = { .type = PERF_EVENT_COMM, }, + .pid = task->group_leader->pid, + .tid = task->pid, + }, + }; + + perf_counter_comm_event(&comm_event); +} + /* * mmap tracking */ -- cgit v1.2.3-70-g09d2 From 4d855457d84b819fefcd1cd1b0a2a0a0ec475c07 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 8 Apr 2009 15:01:32 +0200 Subject: perf_counter: move PERF_RECORD_TIME Move PERF_RECORD_TIME so that all the fixed length items come before the variable length ones. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090408130409.307926436@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 9 ++++----- kernel/perf_counter.c | 26 +++++++++++++------------- 2 files changed, 17 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index a70a55f2759..8bd1be58c93 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -100,9 +100,9 @@ enum sw_event_ids { enum perf_counter_record_format { PERF_RECORD_IP = 1U << 0, PERF_RECORD_TID = 1U << 1, - PERF_RECORD_GROUP = 1U << 2, - PERF_RECORD_CALLCHAIN = 1U << 3, - PERF_RECORD_TIME = 1U << 4, + PERF_RECORD_TIME = 1U << 2, + PERF_RECORD_GROUP = 1U << 3, + PERF_RECORD_CALLCHAIN = 1U << 4, }; /* @@ -250,6 +250,7 @@ enum perf_event_type { * * { u64 ip; } && PERF_RECORD_IP * { u32 pid, tid; } && PERF_RECORD_TID + * { u64 time; } && PERF_RECORD_TIME * * { u64 nr; * { u64 event, val; } cnt[nr]; } && PERF_RECORD_GROUP @@ -259,8 +260,6 @@ enum perf_event_type { * kernel, * user; * u64 ips[nr]; } && PERF_RECORD_CALLCHAIN - * - * { u64 time; } && PERF_RECORD_TIME * }; */ }; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 2d4aebb2982..4dc8600d282 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1850,6 +1850,16 @@ static void perf_counter_output(struct perf_counter *counter, header.size += sizeof(tid_entry); } + if (record_type & PERF_RECORD_TIME) { + /* + * Maybe do better on x86 and provide cpu_clock_nmi() + */ + time = sched_clock(); + + header.type |= PERF_RECORD_TIME; + header.size += sizeof(u64); + } + if (record_type & PERF_RECORD_GROUP) { header.type |= PERF_RECORD_GROUP; header.size += sizeof(u64) + @@ -1867,16 +1877,6 @@ static void perf_counter_output(struct perf_counter *counter, } } - if (record_type & PERF_RECORD_TIME) { - /* - * Maybe do better on x86 and provide cpu_clock_nmi() - */ - time = sched_clock(); - - header.type |= PERF_RECORD_TIME; - header.size += sizeof(u64); - } - ret = perf_output_begin(&handle, counter, header.size, nmi, 1); if (ret) return; @@ -1889,6 +1889,9 @@ static void perf_counter_output(struct perf_counter *counter, if (record_type & PERF_RECORD_TID) perf_output_put(&handle, tid_entry); + if (record_type & PERF_RECORD_TIME) + perf_output_put(&handle, time); + if (record_type & PERF_RECORD_GROUP) { struct perf_counter *leader, *sub; u64 nr = counter->nr_siblings; @@ -1910,9 +1913,6 @@ static void perf_counter_output(struct perf_counter *counter, if (callchain) perf_output_copy(&handle, callchain, callchain_size); - if (record_type & PERF_RECORD_TIME) - perf_output_put(&handle, time); - perf_output_end(&handle); } -- cgit v1.2.3-70-g09d2 From 78f13e9525ba777da25c4ddab89f28e9366a8b7c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 8 Apr 2009 15:01:33 +0200 Subject: perf_counter: allow for data addresses to be recorded Paul suggested we allow for data addresses to be recorded along with the traditional IPs as power can provide these. For now, only the software pagefault events provide data addresses, but in the future power might as well for some events. x86 doesn't seem capable of providing this atm. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090408130409.394816925@chello.nl> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 2 +- arch/powerpc/mm/fault.c | 8 ++++--- arch/x86/kernel/cpu/perf_counter.c | 2 +- arch/x86/mm/fault.c | 8 ++++--- include/linux/perf_counter.h | 14 +++++++----- kernel/perf_counter.c | 46 ++++++++++++++++++++++++-------------- 6 files changed, 49 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 0697ade84dd..c9d019f1907 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -749,7 +749,7 @@ static void record_and_restart(struct perf_counter *counter, long val, * Finally record data if requested. */ if (record) - perf_counter_overflow(counter, 1, regs); + perf_counter_overflow(counter, 1, regs, 0); } /* diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 17bbf6f91fb..ac0e112031b 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -171,7 +171,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, die("Weird page fault", regs, SIGSEGV); } - perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs); + perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs, address); /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the @@ -312,7 +312,8 @@ good_area: } if (ret & VM_FAULT_MAJOR) { current->maj_flt++; - perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, regs); + perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, + regs, address); #ifdef CONFIG_PPC_SMLPAR if (firmware_has_feature(FW_FEATURE_CMO)) { preempt_disable(); @@ -322,7 +323,8 @@ good_area: #endif } else { current->min_flt++; - perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, regs); + perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, + regs, address); } up_read(&mm->mmap_sem); return 0; diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 1116a41bc7b..0fcbaab83f9 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -800,7 +800,7 @@ again: continue; perf_save_and_restart(counter); - if (perf_counter_overflow(counter, nmi, regs)) + if (perf_counter_overflow(counter, nmi, regs, 0)) __pmc_generic_disable(counter, &counter->hw, bit); } diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index f2d3324d921..6f9df2babe4 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1045,7 +1045,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) if (unlikely(error_code & PF_RSVD)) pgtable_bad(regs, error_code, address); - perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs); + perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs, address); /* * If we're in an interrupt, have no user context or are running @@ -1142,10 +1142,12 @@ good_area: if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; - perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, regs); + perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, + regs, address); } else { tsk->min_flt++; - perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, regs); + perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, + regs, address); } check_v8086_mode(regs, address, tsk); diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 8bd1be58c93..c22363a4f74 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -101,8 +101,9 @@ enum perf_counter_record_format { PERF_RECORD_IP = 1U << 0, PERF_RECORD_TID = 1U << 1, PERF_RECORD_TIME = 1U << 2, - PERF_RECORD_GROUP = 1U << 3, - PERF_RECORD_CALLCHAIN = 1U << 4, + PERF_RECORD_ADDR = 1U << 3, + PERF_RECORD_GROUP = 1U << 4, + PERF_RECORD_CALLCHAIN = 1U << 5, }; /* @@ -251,6 +252,7 @@ enum perf_event_type { * { u64 ip; } && PERF_RECORD_IP * { u32 pid, tid; } && PERF_RECORD_TID * { u64 time; } && PERF_RECORD_TIME + * { u64 addr; } && PERF_RECORD_ADDR * * { u64 nr; * { u64 event, val; } cnt[nr]; } && PERF_RECORD_GROUP @@ -537,7 +539,7 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader, extern void perf_counter_update_userpage(struct perf_counter *counter); extern int perf_counter_overflow(struct perf_counter *counter, - int nmi, struct pt_regs *regs); + int nmi, struct pt_regs *regs, u64 addr); /* * Return 1 for a software counter, 0 for a hardware counter */ @@ -547,7 +549,7 @@ static inline int is_software_counter(struct perf_counter *counter) perf_event_type(&counter->hw_event) != PERF_TYPE_HARDWARE; } -extern void perf_swcounter_event(u32, u64, int, struct pt_regs *); +extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64); extern void perf_counter_mmap(unsigned long addr, unsigned long len, unsigned long pgoff, struct file *file); @@ -584,8 +586,8 @@ static inline int perf_counter_task_disable(void) { return -EINVAL; } static inline int perf_counter_task_enable(void) { return -EINVAL; } static inline void -perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs) { } - +perf_swcounter_event(u32 event, u64 nr, int nmi, + struct pt_regs *regs, u64 addr) { } static inline void perf_counter_mmap(unsigned long addr, unsigned long len, diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 4dc8600d282..321c57e3556 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -800,7 +800,7 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu) update_context_time(ctx); regs = task_pt_regs(task); - perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs); + perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs, 0); __perf_counter_sched_out(ctx, cpuctx); cpuctx->task_ctx = NULL; @@ -1810,7 +1810,7 @@ static void perf_output_end(struct perf_output_handle *handle) } static void perf_counter_output(struct perf_counter *counter, - int nmi, struct pt_regs *regs) + int nmi, struct pt_regs *regs, u64 addr) { int ret; u64 record_type = counter->hw_event.record_type; @@ -1860,6 +1860,11 @@ static void perf_counter_output(struct perf_counter *counter, header.size += sizeof(u64); } + if (record_type & PERF_RECORD_ADDR) { + header.type |= PERF_RECORD_ADDR; + header.size += sizeof(u64); + } + if (record_type & PERF_RECORD_GROUP) { header.type |= PERF_RECORD_GROUP; header.size += sizeof(u64) + @@ -1892,6 +1897,9 @@ static void perf_counter_output(struct perf_counter *counter, if (record_type & PERF_RECORD_TIME) perf_output_put(&handle, time); + if (record_type & PERF_RECORD_ADDR) + perf_output_put(&handle, addr); + if (record_type & PERF_RECORD_GROUP) { struct perf_counter *leader, *sub; u64 nr = counter->nr_siblings; @@ -2158,7 +2166,7 @@ void perf_counter_munmap(unsigned long addr, unsigned long len, */ int perf_counter_overflow(struct perf_counter *counter, - int nmi, struct pt_regs *regs) + int nmi, struct pt_regs *regs, u64 addr) { int events = atomic_read(&counter->event_limit); int ret = 0; @@ -2175,7 +2183,7 @@ int perf_counter_overflow(struct perf_counter *counter, perf_counter_disable(counter); } - perf_counter_output(counter, nmi, regs); + perf_counter_output(counter, nmi, regs, addr); return ret; } @@ -2240,7 +2248,7 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) regs = task_pt_regs(current); if (regs) { - if (perf_counter_overflow(counter, 0, regs)) + if (perf_counter_overflow(counter, 0, regs, 0)) ret = HRTIMER_NORESTART; } @@ -2250,11 +2258,11 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) } static void perf_swcounter_overflow(struct perf_counter *counter, - int nmi, struct pt_regs *regs) + int nmi, struct pt_regs *regs, u64 addr) { perf_swcounter_update(counter); perf_swcounter_set_period(counter); - if (perf_counter_overflow(counter, nmi, regs)) + if (perf_counter_overflow(counter, nmi, regs, addr)) /* soft-disable the counter */ ; @@ -2286,16 +2294,17 @@ static int perf_swcounter_match(struct perf_counter *counter, } static void perf_swcounter_add(struct perf_counter *counter, u64 nr, - int nmi, struct pt_regs *regs) + int nmi, struct pt_regs *regs, u64 addr) { int neg = atomic64_add_negative(nr, &counter->hw.count); if (counter->hw.irq_period && !neg) - perf_swcounter_overflow(counter, nmi, regs); + perf_swcounter_overflow(counter, nmi, regs, addr); } static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, enum perf_event_types type, u32 event, - u64 nr, int nmi, struct pt_regs *regs) + u64 nr, int nmi, struct pt_regs *regs, + u64 addr) { struct perf_counter *counter; @@ -2305,7 +2314,7 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, rcu_read_lock(); list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { if (perf_swcounter_match(counter, type, event, regs)) - perf_swcounter_add(counter, nr, nmi, regs); + perf_swcounter_add(counter, nr, nmi, regs, addr); } rcu_read_unlock(); } @@ -2325,7 +2334,8 @@ static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx) } static void __perf_swcounter_event(enum perf_event_types type, u32 event, - u64 nr, int nmi, struct pt_regs *regs) + u64 nr, int nmi, struct pt_regs *regs, + u64 addr) { struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); int *recursion = perf_swcounter_recursion_context(cpuctx); @@ -2336,10 +2346,11 @@ static void __perf_swcounter_event(enum perf_event_types type, u32 event, (*recursion)++; barrier(); - perf_swcounter_ctx_event(&cpuctx->ctx, type, event, nr, nmi, regs); + perf_swcounter_ctx_event(&cpuctx->ctx, type, event, + nr, nmi, regs, addr); if (cpuctx->task_ctx) { perf_swcounter_ctx_event(cpuctx->task_ctx, type, event, - nr, nmi, regs); + nr, nmi, regs, addr); } barrier(); @@ -2349,9 +2360,10 @@ out: put_cpu_var(perf_cpu_context); } -void perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs) +void +perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr) { - __perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, regs); + __perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, regs, addr); } static void perf_swcounter_read(struct perf_counter *counter) @@ -2548,7 +2560,7 @@ void perf_tpcounter_event(int event_id) if (!regs) regs = task_pt_regs(current); - __perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, regs); + __perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, regs, 0); } extern int ftrace_profile_enable(int); -- cgit v1.2.3-70-g09d2 From 1c1452be2e9ae282a7316c3b23987811bd7acda6 Mon Sep 17 00:00:00 2001 From: Jonas Larsson Date: Tue, 31 Mar 2009 11:16:48 +0200 Subject: atmel-mci: Add support for inverted detect pin Same patch as before, modified to use bool. Also adds description of the new field in struct atmel_mci that I missed in the first patch. This patch adds Atmel MCI support for inverted detect pins. Signed-off-by: Jonas Larsson Acked-by: Pierre Ossman Signed-off-by: Haavard Skinnemoen --- drivers/mmc/host/atmel-mci.c | 12 +++++++++--- include/linux/atmel-mci.h | 2 ++ 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c index cf6a100bb38..7b603e4b41d 100644 --- a/drivers/mmc/host/atmel-mci.c +++ b/drivers/mmc/host/atmel-mci.c @@ -177,6 +177,7 @@ struct atmel_mci { * available. * @wp_pin: GPIO pin used for card write protect sending, or negative * if not available. + * @detect_is_active_high: The state of the detect pin when it is active. * @detect_timer: Timer used for debouncing @detect_pin interrupts. */ struct atmel_mci_slot { @@ -196,6 +197,7 @@ struct atmel_mci_slot { int detect_pin; int wp_pin; + bool detect_is_active_high; struct timer_list detect_timer; }; @@ -924,7 +926,8 @@ static int atmci_get_cd(struct mmc_host *mmc) struct atmel_mci_slot *slot = mmc_priv(mmc); if (gpio_is_valid(slot->detect_pin)) { - present = !gpio_get_value(slot->detect_pin); + present = !(gpio_get_value(slot->detect_pin) ^ + slot->detect_is_active_high); dev_dbg(&mmc->class_dev, "card is %spresent\n", present ? "" : "not "); } @@ -1028,7 +1031,8 @@ static void atmci_detect_change(unsigned long data) return; enable_irq(gpio_to_irq(slot->detect_pin)); - present = !gpio_get_value(slot->detect_pin); + present = !(gpio_get_value(slot->detect_pin) ^ + slot->detect_is_active_high); present_old = test_bit(ATMCI_CARD_PRESENT, &slot->flags); dev_vdbg(&slot->mmc->class_dev, "detect change: %d (was %d)\n", @@ -1456,6 +1460,7 @@ static int __init atmci_init_slot(struct atmel_mci *host, slot->host = host; slot->detect_pin = slot_data->detect_pin; slot->wp_pin = slot_data->wp_pin; + slot->detect_is_active_high = slot_data->detect_is_active_high; slot->sdc_reg = sdc_reg; mmc->ops = &atmci_ops; @@ -1477,7 +1482,8 @@ static int __init atmci_init_slot(struct atmel_mci *host, if (gpio_request(slot->detect_pin, "mmc_detect")) { dev_dbg(&mmc->class_dev, "no detect pin available\n"); slot->detect_pin = -EBUSY; - } else if (gpio_get_value(slot->detect_pin)) { + } else if (gpio_get_value(slot->detect_pin) ^ + slot->detect_is_active_high) { clear_bit(ATMCI_CARD_PRESENT, &slot->flags); } } diff --git a/include/linux/atmel-mci.h b/include/linux/atmel-mci.h index 2f1f95737ac..57b1846a3c8 100644 --- a/include/linux/atmel-mci.h +++ b/include/linux/atmel-mci.h @@ -10,6 +10,7 @@ * @bus_width: Number of data lines wired up the slot * @detect_pin: GPIO pin wired to the card detect switch * @wp_pin: GPIO pin wired to the write protect sensor + * @detect_is_active_high: The state of the detect pin when it is active * * If a given slot is not present on the board, @bus_width should be * set to 0. The other fields are ignored in this case. @@ -24,6 +25,7 @@ struct mci_slot_pdata { unsigned int bus_width; int detect_pin; int wp_pin; + bool detect_is_active_high; }; /** -- cgit v1.2.3-70-g09d2 From 1ccd15497869f3ed83b5225d410df53a96e52757 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 9 Apr 2009 10:53:45 +0200 Subject: perf_counter: sysctl for system wide perf counters Impact: add sysctl for paranoid/relaxed perfcounters policy Allow the use of system wide perf counters to everybody, but provide a sysctl to disable it for the paranoid security minded. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090409085524.514046352@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 ++ kernel/perf_counter.c | 4 +++- kernel/sysctl.c | 11 +++++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index c22363a4f74..98143288530 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -568,6 +568,8 @@ struct perf_callchain_entry { extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); +extern int sysctl_perf_counter_priv; + #else static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 76376ecb23b..7efb7ebaaae 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -42,6 +42,8 @@ static atomic_t nr_mmap_tracking __read_mostly; static atomic_t nr_munmap_tracking __read_mostly; static atomic_t nr_comm_tracking __read_mostly; +int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */ + /* * Mutex for (sysadmin-configurable) counter reservations: */ @@ -1132,7 +1134,7 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu) */ if (cpu != -1) { /* Must be root to operate on a CPU counter: */ - if (!capable(CAP_SYS_ADMIN)) + if (sysctl_perf_counter_priv && !capable(CAP_SYS_ADMIN)) return ERR_PTR(-EACCES); if (cpu < 0 || cpu > num_possible_cpus()) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 4286b62b34a..8ba457838d9 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -920,6 +921,16 @@ static struct ctl_table kern_table[] = { .child = slow_work_sysctls, }, #endif +#ifdef CONFIG_PERF_COUNTERS + { + .ctl_name = CTL_UNNUMBERED, + .procname = "perf_counter_privileged", + .data = &sysctl_perf_counter_priv, + .maxlen = sizeof(sysctl_perf_counter_priv), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif /* * NOTE: do not add new entries to this table unless you have read * Documentation/sysctl/ctl_unnumbered.txt -- cgit v1.2.3-70-g09d2 From 2062501ae6505dbc5bff3a792246c2661d114050 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 6 Apr 2009 01:49:33 +0200 Subject: tracing/lockdep: report the time waited for a lock While trying to optimize the new lock on reiserfs to replace the bkl, I find the lock tracing very useful though it lacks something important for performance (and latency) instrumentation: the time a task waits for a lock. That's what this patch implements: bash-4816 [000] 202.652815: lock_contended: lock_contended: &sb->s_type->i_mutex_key bash-4816 [000] 202.652819: lock_acquired: &rq->lock (0.000 us) <...>-4787 [000] 202.652825: lock_acquired: &rq->lock (0.000 us) <...>-4787 [000] 202.652829: lock_acquired: &rq->lock (0.000 us) bash-4816 [000] 202.652833: lock_acquired: &sb->s_type->i_mutex_key (16.005 us) As shown above, the "lock acquired" field is followed by the time it has been waiting for the lock. Usually, a lock contended entry is followed by a near lock_acquired entry with a non-zero time waited. Signed-off-by: Frederic Weisbecker Acked-by: Peter Zijlstra Cc: Steven Rostedt LKML-Reference: <1238975373-15739-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/trace/lockdep_event_types.h | 23 ++++++++++++++++++----- kernel/lockdep.c | 8 ++++---- 2 files changed, 22 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/trace/lockdep_event_types.h b/include/trace/lockdep_event_types.h index adccfcd2ec8..863f1e4583a 100644 --- a/include/trace/lockdep_event_types.h +++ b/include/trace/lockdep_event_types.h @@ -32,11 +32,24 @@ TRACE_FORMAT(lock_contended, TP_FMT("%s", lock->name) ); -TRACE_FORMAT(lock_acquired, - TP_PROTO(struct lockdep_map *lock, unsigned long ip), - TP_ARGS(lock, ip), - TP_FMT("%s", lock->name) - ); +TRACE_EVENT(lock_acquired, + TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), + + TP_ARGS(lock, ip, waittime), + + TP_STRUCT__entry( + __field(const char *, name) + __field(unsigned long, wait_usec) + __field(unsigned long, wait_nsec_rem) + ), + TP_fast_assign( + __entry->name = lock->name; + __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC); + __entry->wait_usec = (unsigned long) waittime; + ), + TP_printk("%s (%lu.%03lu us)", __entry->name, __entry->wait_usec, + __entry->wait_nsec_rem) +); #endif #endif diff --git a/kernel/lockdep.c b/kernel/lockdep.c index b0f01186696..c4582a6ea95 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -3061,6 +3061,8 @@ found_it: put_lock_stats(stats); } +DEFINE_TRACE(lock_acquired); + static void __lock_acquired(struct lockdep_map *lock, unsigned long ip) { @@ -3099,6 +3101,8 @@ found_it: hlock->holdtime_stamp = now; } + trace_lock_acquired(lock, ip, waittime); + stats = get_lock_stats(hlock_class(hlock)); if (waittime) { if (hlock->read) @@ -3137,14 +3141,10 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip) } EXPORT_SYMBOL_GPL(lock_contended); -DEFINE_TRACE(lock_acquired); - void lock_acquired(struct lockdep_map *lock, unsigned long ip) { unsigned long flags; - trace_lock_acquired(lock, ip); - if (unlikely(!lock_stat)) return; -- cgit v1.2.3-70-g09d2 From 5cb3d1d9d34ac04bcaa2034139345b2a5fea54c1 Mon Sep 17 00:00:00 2001 From: Zhaolei Date: Thu, 9 Apr 2009 14:08:18 +0800 Subject: tracing, net, skb tracepoint: make skb tracepoint use the TRACE_EVENT() macro TRACE_EVENT is a more generic way to define a tracepoint. Doing so adds these new capabilities to this tracepoint: - zero-copy and per-cpu splice() tracing - binary tracing without printf overhead - structured logging records exposed under /debug/tracing/events - trace events embedded in function tracer output and other plugins - user-defined, per tracepoint filter expressions Signed-off-by: Zhao Lei Acked-by: Neil Horman Cc: "David S. Miller" Cc: Arnaldo Carvalho de Melo Cc: "Steven Rostedt ;" Cc: Frederic Weisbecker Cc: Tom Zanussi LKML-Reference: <49DD90D2.5020604@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/skb.h | 4 +--- include/trace/skb_event_types.h | 38 ++++++++++++++++++++++++++++++++++++++ include/trace/trace_event_types.h | 1 + include/trace/trace_events.h | 1 + 4 files changed, 41 insertions(+), 3 deletions(-) create mode 100644 include/trace/skb_event_types.h (limited to 'include') diff --git a/include/trace/skb.h b/include/trace/skb.h index b66206d9be7..d2de7174a6e 100644 --- a/include/trace/skb.h +++ b/include/trace/skb.h @@ -4,8 +4,6 @@ #include #include -DECLARE_TRACE(kfree_skb, - TP_PROTO(struct sk_buff *skb, void *location), - TP_ARGS(skb, location)); +#include #endif diff --git a/include/trace/skb_event_types.h b/include/trace/skb_event_types.h new file mode 100644 index 00000000000..4a1c504c0e1 --- /dev/null +++ b/include/trace/skb_event_types.h @@ -0,0 +1,38 @@ + +/* use instead */ +#ifndef TRACE_EVENT +# error Do not include this file directly. +# error Unless you know what you are doing. +#endif + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM skb + +/* + * Tracepoint for free an sk_buff: + */ +TRACE_EVENT(kfree_skb, + + TP_PROTO(struct sk_buff *skb, void *location), + + TP_ARGS(skb, location), + + TP_STRUCT__entry( + __field( void *, skbaddr ) + __field( unsigned short, protocol ) + __field( void *, location ) + ), + + TP_fast_assign( + __entry->skbaddr = skb; + if (skb) { + __entry->protocol = ntohs(skb->protocol); + } + __entry->location = location; + ), + + TP_printk("skbaddr=%p protocol=%u location=%p", + __entry->skbaddr, __entry->protocol, __entry->location) +); + +#undef TRACE_SYSTEM diff --git a/include/trace/trace_event_types.h b/include/trace/trace_event_types.h index df56f5694be..33b6bfcba93 100644 --- a/include/trace/trace_event_types.h +++ b/include/trace/trace_event_types.h @@ -3,3 +3,4 @@ #include #include #include +#include diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index fd13750ca4b..0e2aa80076d 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -3,3 +3,4 @@ #include #include #include +#include -- cgit v1.2.3-70-g09d2 From 02af61bb50f5d5f0322dbe5ab2a0d75808d25c7b Mon Sep 17 00:00:00 2001 From: Zhaolei Date: Fri, 10 Apr 2009 14:26:18 +0800 Subject: tracing, kmemtrace: Separate include/trace/kmemtrace.h to kmemtrace part and tracepoint part Impact: refactor code for future changes Current kmemtrace.h is used both as header file of kmemtrace and kmem's tracepoints definition. Tracepoints' definition file may be used by other code, and should only have definition of tracepoint. We can separate include/trace/kmemtrace.h into 2 files: include/linux/kmemtrace.h: header file for kmemtrace include/trace/kmem.h: definition of kmem tracepoints Signed-off-by: Zhao Lei Acked-by: Eduard - Gabriel Munteanu Acked-by: Pekka Enberg Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Tom Zanussi LKML-Reference: <49DEE68A.5040902@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/kmemtrace.h | 25 +++++++++++++++++++ include/linux/slab_def.h | 2 +- include/linux/slub_def.h | 2 +- include/trace/kmem.h | 44 +++++++++++++++++++++++++++++++++ include/trace/kmemtrace.h | 63 ----------------------------------------------- init/main.c | 2 +- kernel/trace/kmemtrace.c | 2 +- kernel/trace/trace.h | 2 +- mm/slab.c | 2 +- mm/slob.c | 2 +- mm/slub.c | 2 +- 11 files changed, 77 insertions(+), 71 deletions(-) create mode 100644 include/linux/kmemtrace.h create mode 100644 include/trace/kmem.h delete mode 100644 include/trace/kmemtrace.h (limited to 'include') diff --git a/include/linux/kmemtrace.h b/include/linux/kmemtrace.h new file mode 100644 index 00000000000..15c45a27a92 --- /dev/null +++ b/include/linux/kmemtrace.h @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2008 Eduard - Gabriel Munteanu + * + * This file is released under GPL version 2. + */ + +#ifndef _LINUX_KMEMTRACE_H +#define _LINUX_KMEMTRACE_H + +#ifdef __KERNEL__ + +#include + +#ifdef CONFIG_KMEMTRACE +extern void kmemtrace_init(void); +#else +static inline void kmemtrace_init(void) +{ +} +#endif + +#endif /* __KERNEL__ */ + +#endif /* _LINUX_KMEMTRACE_H */ + diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 5ac9b0bcaf9..713f841ecaa 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -14,7 +14,7 @@ #include /* kmalloc_sizes.h needs PAGE_SIZE */ #include /* kmalloc_sizes.h needs L1_CACHE_BYTES */ #include -#include +#include /* Size description struct for general caches. */ struct cache_sizes { diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 5046f90c117..be5d40c43bd 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -10,7 +10,7 @@ #include #include #include -#include +#include enum stat_item { ALLOC_FASTPATH, /* Allocation from cpu slab */ diff --git a/include/trace/kmem.h b/include/trace/kmem.h new file mode 100644 index 00000000000..24d25192818 --- /dev/null +++ b/include/trace/kmem.h @@ -0,0 +1,44 @@ +#ifndef _TRACE_KMEM_H +#define _TRACE_KMEM_H + +#include +#include + +DECLARE_TRACE(kmalloc, + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags), + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)); +DECLARE_TRACE(kmem_cache_alloc, + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags), + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)); +DECLARE_TRACE(kmalloc_node, + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node), + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)); +DECLARE_TRACE(kmem_cache_alloc_node, + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node), + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)); +DECLARE_TRACE(kfree, + TP_PROTO(unsigned long call_site, const void *ptr), + TP_ARGS(call_site, ptr)); +DECLARE_TRACE(kmem_cache_free, + TP_PROTO(unsigned long call_site, const void *ptr), + TP_ARGS(call_site, ptr)); + +#endif /* _TRACE_KMEM_H */ diff --git a/include/trace/kmemtrace.h b/include/trace/kmemtrace.h deleted file mode 100644 index 28ee69f9cd4..00000000000 --- a/include/trace/kmemtrace.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (C) 2008 Eduard - Gabriel Munteanu - * - * This file is released under GPL version 2. - */ - -#ifndef _LINUX_KMEMTRACE_H -#define _LINUX_KMEMTRACE_H - -#ifdef __KERNEL__ - -#include -#include - -#ifdef CONFIG_KMEMTRACE -extern void kmemtrace_init(void); -#else -static inline void kmemtrace_init(void) -{ -} -#endif - -DECLARE_TRACE(kmalloc, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)); -DECLARE_TRACE(kmem_cache_alloc, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)); -DECLARE_TRACE(kmalloc_node, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)); -DECLARE_TRACE(kmem_cache_alloc_node, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)); -DECLARE_TRACE(kfree, - TP_PROTO(unsigned long call_site, const void *ptr), - TP_ARGS(call_site, ptr)); -DECLARE_TRACE(kmem_cache_free, - TP_PROTO(unsigned long call_site, const void *ptr), - TP_ARGS(call_site, ptr)); - -#endif /* __KERNEL__ */ - -#endif /* _LINUX_KMEMTRACE_H */ - diff --git a/init/main.c b/init/main.c index 3585f073d63..eece40cd8a6 100644 --- a/init/main.c +++ b/init/main.c @@ -64,6 +64,7 @@ #include #include #include +#include #include #include @@ -71,7 +72,6 @@ #include #include #include -#include #ifdef CONFIG_X86_LOCAL_APIC #include diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c index 5011f4d91e3..7a0aa0e260d 100644 --- a/kernel/trace/kmemtrace.c +++ b/kernel/trace/kmemtrace.c @@ -12,7 +12,7 @@ #include #include -#include +#include #include "trace_output.h" #include "trace.h" diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f76a8f8689d..34b94c3f40a 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include enum trace_type { diff --git a/mm/slab.c b/mm/slab.c index 9a90b00d2f9..f85831da908 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -102,7 +102,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/mm/slob.c b/mm/slob.c index a2d4ab32198..494f05f1941 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -65,7 +65,7 @@ #include #include #include -#include +#include #include /* diff --git a/mm/slub.c b/mm/slub.c index 7ab54ecbd3f..ea9e7160e2e 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include -- cgit v1.2.3-70-g09d2 From fc182a4330fc22ea1b68fa3d5064dd85a73a4c4a Mon Sep 17 00:00:00 2001 From: Zhaolei Date: Fri, 10 Apr 2009 14:27:38 +0800 Subject: tracing, kmemtrace: Make kmem tracepoints use TRACE_EVENT macro TRACE_EVENT is a more generic way to define tracepoints. Doing so adds these new capabilities to this tracepoint: - zero-copy and per-cpu splice() tracing - binary tracing without printf overhead - structured logging records exposed under /debug/tracing/events - trace events embedded in function tracer output and other plugins - user-defined, per tracepoint filter expressions Signed-off-by: Zhao Lei Acked-by: Eduard - Gabriel Munteanu Acked-by: Pekka Enberg Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Tom Zanussi LKML-Reference: <49DEE6DA.80600@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/kmem.h | 39 +------- include/trace/kmem_event_types.h | 193 ++++++++++++++++++++++++++++++++++++++ include/trace/trace_event_types.h | 1 + include/trace/trace_events.h | 1 + 4 files changed, 197 insertions(+), 37 deletions(-) create mode 100644 include/trace/kmem_event_types.h (limited to 'include') diff --git a/include/trace/kmem.h b/include/trace/kmem.h index 24d25192818..46efc2423f0 100644 --- a/include/trace/kmem.h +++ b/include/trace/kmem.h @@ -1,44 +1,9 @@ #ifndef _TRACE_KMEM_H #define _TRACE_KMEM_H -#include #include +#include -DECLARE_TRACE(kmalloc, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)); -DECLARE_TRACE(kmem_cache_alloc, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)); -DECLARE_TRACE(kmalloc_node, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)); -DECLARE_TRACE(kmem_cache_alloc_node, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)); -DECLARE_TRACE(kfree, - TP_PROTO(unsigned long call_site, const void *ptr), - TP_ARGS(call_site, ptr)); -DECLARE_TRACE(kmem_cache_free, - TP_PROTO(unsigned long call_site, const void *ptr), - TP_ARGS(call_site, ptr)); +#include #endif /* _TRACE_KMEM_H */ diff --git a/include/trace/kmem_event_types.h b/include/trace/kmem_event_types.h new file mode 100644 index 00000000000..4ff420fe467 --- /dev/null +++ b/include/trace/kmem_event_types.h @@ -0,0 +1,193 @@ + +/* use instead */ +#ifndef TRACE_EVENT +# error Do not include this file directly. +# error Unless you know what you are doing. +#endif + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kmem + +TRACE_EVENT(kmalloc, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags) +); + +TRACE_EVENT(kmem_cache_alloc, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags) +); + +TRACE_EVENT(kmalloc_node, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + __field( int, node ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + __entry->node = node; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags, + __entry->node) +); + +TRACE_EVENT(kmem_cache_alloc_node, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + __field( int, node ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + __entry->node = node; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags, + __entry->node) +); + +TRACE_EVENT(kfree, + + TP_PROTO(unsigned long call_site, const void *ptr), + + TP_ARGS(call_site, ptr), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + ), + + TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) +); + +TRACE_EVENT(kmem_cache_free, + + TP_PROTO(unsigned long call_site, const void *ptr), + + TP_ARGS(call_site, ptr), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + ), + + TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) +); + +#undef TRACE_SYSTEM diff --git a/include/trace/trace_event_types.h b/include/trace/trace_event_types.h index 33b6bfcba93..552a50e169a 100644 --- a/include/trace/trace_event_types.h +++ b/include/trace/trace_event_types.h @@ -4,3 +4,4 @@ #include #include #include +#include diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index 0e2aa80076d..13d6b85668c 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -4,3 +4,4 @@ #include #include #include +#include -- cgit v1.2.3-70-g09d2 From f6d655a6e6974e474a11b25052c29d10b80814b3 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 13 Apr 2009 11:27:03 +0100 Subject: ASoC: Support DAPM events for DACs and ADCs Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 10 ++++++++++ sound/soc/soc-dapm.c | 16 ++++++++++------ 2 files changed, 20 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index a7def6a9a03..fcc929da033 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -140,9 +140,19 @@ #define SND_SOC_DAPM_DAC(wname, stname, wreg, wshift, winvert) \ { .id = snd_soc_dapm_dac, .name = wname, .sname = stname, .reg = wreg, \ .shift = wshift, .invert = winvert} +#define SND_SOC_DAPM_DAC_E(wname, stname, wreg, wshift, winvert, \ + wevent, wflags) \ +{ .id = snd_soc_dapm_dac, .name = wname, .sname = stname, .reg = wreg, \ + .shift = wshift, .invert = winvert, \ + .event = wevent, .event_flags = wflags} #define SND_SOC_DAPM_ADC(wname, stname, wreg, wshift, winvert) \ { .id = snd_soc_dapm_adc, .name = wname, .sname = stname, .reg = wreg, \ .shift = wshift, .invert = winvert} +#define SND_SOC_DAPM_ADC_E(wname, stname, wreg, wshift, winvert, \ + wevent, wflags) \ +{ .id = snd_soc_dapm_adc, .name = wname, .sname = stname, .reg = wreg, \ + .shift = wshift, .invert = winvert, \ + .event = wevent, .event_flags = wflags} /* generic register modifier widget */ #define SND_SOC_DAPM_REG(wid, wname, wreg, wshift, wmask, won_val, woff_val) \ diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 713d1258670..a6d73379ab3 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -598,18 +598,22 @@ static int dapm_power_widget(struct snd_soc_codec *codec, int event, if (w->id == snd_soc_dapm_adc && w->active) { in = is_connected_input_ep(w); dapm_clear_walk(w->codec); - w->power = (in != 0) ? 1 : 0; - dapm_update_bits(w); - return 0; + power = (in != 0) ? 1 : 0; + if (power == w->power) + return 0; + w->power = power; + return dapm_generic_apply_power(w); } /* active DAC */ if (w->id == snd_soc_dapm_dac && w->active) { out = is_connected_output_ep(w); dapm_clear_walk(w->codec); - w->power = (out != 0) ? 1 : 0; - dapm_update_bits(w); - return 0; + power = (out != 0) ? 1 : 0; + if (power == w->power) + return 0; + w->power = power; + return dapm_generic_apply_power(w); } /* pre and post event widgets */ -- cgit v1.2.3-70-g09d2 From fa1b47dd85453ec7d4bcfe4aa4a2d172ba452fc3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 2 Apr 2009 00:09:41 -0400 Subject: ring-buffer: add ring_buffer_discard_commit The ring_buffer_discard_commit is similar to ring_buffer_event_discard but it can only be done on an event that has yet to be commited. Unpredictable results can happen otherwise. The main difference between ring_buffer_discard_commit and ring_buffer_event_discard is that ring_buffer_discard_commit will try to free the data in the ring buffer if nothing has addded data after the reserved event. If something did, then it acts almost the same as ring_buffer_event_discard followed by a ring_buffer_unlock_commit. Note, either ring_buffer_commit_discard and ring_buffer_unlock_commit can be called on an event, not both. This commit also exports both discard functions to be usable by GPL modules. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ring_buffer.h | 29 ++++++++++ kernel/trace/ring_buffer.c | 125 ++++++++++++++++++++++++++++++++++++-------- 2 files changed, 133 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index e1b7b217388..f0aa486d131 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -68,8 +68,37 @@ ring_buffer_event_time_delta(struct ring_buffer_event *event) return event->time_delta; } +/* + * ring_buffer_event_discard can discard any event in the ring buffer. + * it is up to the caller to protect against a reader from + * consuming it or a writer from wrapping and replacing it. + * + * No external protection is needed if this is called before + * the event is commited. But in that case it would be better to + * use ring_buffer_discard_commit. + * + * Note, if an event that has not been committed is discarded + * with ring_buffer_event_discard, it must still be committed. + */ void ring_buffer_event_discard(struct ring_buffer_event *event); +/* + * ring_buffer_discard_commit will remove an event that has not + * ben committed yet. If this is used, then ring_buffer_unlock_commit + * must not be called on the discarded event. This function + * will try to remove the event from the ring buffer completely + * if another event has not been written after it. + * + * Example use: + * + * if (some_condition) + * ring_buffer_discard_commit(buffer, event); + * else + * ring_buffer_unlock_commit(buffer, event); + */ +void ring_buffer_discard_commit(struct ring_buffer *buffer, + struct ring_buffer_event *event); + /* * size is in bytes for each per CPU buffer. */ diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 74a11808c28..f935bd5ec3e 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -205,27 +205,6 @@ static void rb_event_set_padding(struct ring_buffer_event *event) event->time_delta = 0; } -/** - * ring_buffer_event_discard - discard an event in the ring buffer - * @buffer: the ring buffer - * @event: the event to discard - * - * Sometimes a event that is in the ring buffer needs to be ignored. - * This function lets the user discard an event in the ring buffer - * and then that event will not be read later. - * - * Note, it is up to the user to be careful with this, and protect - * against races. If the user discards an event that has been consumed - * it is possible that it could corrupt the ring buffer. - */ -void ring_buffer_event_discard(struct ring_buffer_event *event) -{ - event->type = RINGBUF_TYPE_PADDING; - /* time delta must be non zero */ - if (!event->time_delta) - event->time_delta = 1; -} - static unsigned rb_event_data_length(struct ring_buffer_event *event) { @@ -1570,6 +1549,110 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer, } EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit); +/** + * ring_buffer_event_discard - discard any event in the ring buffer + * @event: the event to discard + * + * Sometimes a event that is in the ring buffer needs to be ignored. + * This function lets the user discard an event in the ring buffer + * and then that event will not be read later. + * + * Note, it is up to the user to be careful with this, and protect + * against races. If the user discards an event that has been consumed + * it is possible that it could corrupt the ring buffer. + */ +void ring_buffer_event_discard(struct ring_buffer_event *event) +{ + event->type = RINGBUF_TYPE_PADDING; + /* time delta must be non zero */ + if (!event->time_delta) + event->time_delta = 1; +} +EXPORT_SYMBOL_GPL(ring_buffer_event_discard); + +/** + * ring_buffer_commit_discard - discard an event that has not been committed + * @buffer: the ring buffer + * @event: non committed event to discard + * + * This is similar to ring_buffer_event_discard but must only be + * performed on an event that has not been committed yet. The difference + * is that this will also try to free the event from the ring buffer + * if another event has not been added behind it. + * + * If another event has been added behind it, it will set the event + * up as discarded, and perform the commit. + * + * If this function is called, do not call ring_buffer_unlock_commit on + * the event. + */ +void ring_buffer_discard_commit(struct ring_buffer *buffer, + struct ring_buffer_event *event) +{ + struct ring_buffer_per_cpu *cpu_buffer; + unsigned long new_index, old_index; + struct buffer_page *bpage; + unsigned long index; + unsigned long addr; + int cpu; + + /* The event is discarded regardless */ + ring_buffer_event_discard(event); + + /* + * This must only be called if the event has not been + * committed yet. Thus we can assume that preemption + * is still disabled. + */ + RB_WARN_ON(buffer, !preempt_count()); + + cpu = smp_processor_id(); + cpu_buffer = buffer->buffers[cpu]; + + new_index = rb_event_index(event); + old_index = new_index + rb_event_length(event); + addr = (unsigned long)event; + addr &= PAGE_MASK; + + bpage = cpu_buffer->tail_page; + + if (bpage == (void *)addr && rb_page_write(bpage) == old_index) { + /* + * This is on the tail page. It is possible that + * a write could come in and move the tail page + * and write to the next page. That is fine + * because we just shorten what is on this page. + */ + index = local_cmpxchg(&bpage->write, old_index, new_index); + if (index == old_index) + goto out; + } + + /* + * The commit is still visible by the reader, so we + * must increment entries. + */ + cpu_buffer->entries++; + out: + /* + * If a write came in and pushed the tail page + * we still need to update the commit pointer + * if we were the commit. + */ + if (rb_is_commit(cpu_buffer, event)) + rb_set_commit_to_write(cpu_buffer); + + /* + * Only the last preempt count needs to restore preemption. + */ + if (preempt_count() == 1) + ftrace_preempt_enable(per_cpu(rb_need_resched, cpu)); + else + preempt_enable_no_resched_notrace(); + +} +EXPORT_SYMBOL_GPL(ring_buffer_discard_commit); + /** * ring_buffer_write - write data to the buffer without reserving * @buffer: The ring buffer to write to. -- cgit v1.2.3-70-g09d2 From 5f77a88b3f8268b11940b51d2e03d26a663ceb90 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 8 Apr 2009 03:14:01 -0500 Subject: tracing/infrastructure: separate event tracer from event support Add a new config option, CONFIG_EVENT_TRACING that gets selected when CONFIG_TRACING is selected and adds everything needed by the stuff in trace_export - basically all the event tracing support needed by e.g. bprint, minus the actual events, which are only included if CONFIG_EVENT_TRACER is selected. So CONFIG_EVENT_TRACER can be used to turn on or off the generated events (what I think of as the 'event tracer'), while CONFIG_EVENT_TRACING turns on or off the base event tracing support used by both the event tracer and the other things such as bprint that can't be configured out. Signed-off-by: Tom Zanussi Cc: Steven Rostedt Cc: fweisbec@gmail.com LKML-Reference: <1239178441.10295.34.camel@tropicana> Signed-off-by: Ingo Molnar --- include/asm-generic/vmlinux.lds.h | 2 +- kernel/trace/Kconfig | 4 ++++ kernel/trace/Makefile | 6 +++--- 3 files changed, 8 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 7fa660fd449..7e9b1e9f711 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -61,7 +61,7 @@ #define BRANCH_PROFILE() #endif -#ifdef CONFIG_EVENT_TRACER +#ifdef CONFIG_EVENT_TRACING #define FTRACE_EVENTS() VMLINUX_SYMBOL(__start_ftrace_events) = .; \ *(_ftrace_events) \ VMLINUX_SYMBOL(__stop_ftrace_events) = .; diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 23b96ebbf89..644606e899f 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -48,6 +48,9 @@ config FTRACE_NMI_ENTER depends on HAVE_FTRACE_NMI_ENTER default y +config EVENT_TRACING + bool + config TRACING bool select DEBUG_FS @@ -56,6 +59,7 @@ config TRACING select TRACEPOINTS select NOP_TRACER select BINARY_PRINTF + select EVENT_TRACING # # Minimum requirements an architecture has to meet for us to diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 2630f5121ec..3ad367e7c97 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -40,11 +40,11 @@ obj-$(CONFIG_POWER_TRACER) += trace_power.o obj-$(CONFIG_KMEMTRACE) += kmemtrace.o obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o -obj-$(CONFIG_EVENT_TRACER) += trace_events.o +obj-$(CONFIG_EVENT_TRACING) += trace_events.o obj-$(CONFIG_EVENT_TRACER) += events.o -obj-$(CONFIG_EVENT_TRACER) += trace_export.o +obj-$(CONFIG_EVENT_TRACING) += trace_export.o obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o -obj-$(CONFIG_EVENT_TRACER) += trace_events_filter.o +obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o libftrace-y := ftrace.o -- cgit v1.2.3-70-g09d2 From 6e837fb152410e571a81aaadbd9884f0bc46a55e Mon Sep 17 00:00:00 2001 From: Etienne Basset Date: Wed, 8 Apr 2009 20:39:40 +0200 Subject: smack: implement logging V3 This patch creates auditing functions usable by LSM to audit security events. It provides standard dumping of FS, NET, task etc ... events (code borrowed from SELinux) and provides 2 callbacks to define LSM specific auditing, which should be flexible enough to convert SELinux too. Signed-off-by: Etienne Basset Acked-by: Casey Schaufler cked-by: Eric Paris Signed-off-by: James Morris --- include/linux/lsm_audit.h | 111 +++++++++++++ security/lsm_audit.c | 386 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 497 insertions(+) create mode 100644 include/linux/lsm_audit.h create mode 100644 security/lsm_audit.c (limited to 'include') diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h new file mode 100644 index 00000000000..e461b2c3d71 --- /dev/null +++ b/include/linux/lsm_audit.h @@ -0,0 +1,111 @@ +/* + * Common LSM logging functions + * Heavily borrowed from selinux/avc.h + * + * Author : Etienne BASSET + * + * All credits to : Stephen Smalley, + * All BUGS to : Etienne BASSET + */ +#ifndef _LSM_COMMON_LOGGING_ +#define _LSM_COMMON_LOGGING_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* Auxiliary data to use in generating the audit record. */ +struct common_audit_data { + char type; +#define LSM_AUDIT_DATA_FS 1 +#define LSM_AUDIT_DATA_NET 2 +#define LSM_AUDIT_DATA_CAP 3 +#define LSM_AUDIT_DATA_IPC 4 +#define LSM_AUDIT_DATA_TASK 5 +#define LSM_AUDIT_DATA_KEY 6 + struct task_struct *tsk; + union { + struct { + struct path path; + struct inode *inode; + } fs; + struct { + int netif; + struct sock *sk; + u16 family; + __be16 dport; + __be16 sport; + union { + struct { + __be32 daddr; + __be32 saddr; + } v4; + struct { + struct in6_addr daddr; + struct in6_addr saddr; + } v6; + } fam; + } net; + int cap; + int ipc_id; + struct task_struct *tsk; +#ifdef CONFIG_KEYS + struct { + key_serial_t key; + char *key_desc; + } key_struct; +#endif + } u; + const char *function; + /* this union contains LSM specific data */ + union { + /* SMACK data */ + struct smack_audit_data { + char *subject; + char *object; + char *request; + int result; + } smack_audit_data; + /* SELinux data */ + struct { + u32 ssid; + u32 tsid; + u16 tclass; + u32 requested; + u32 audited; + struct av_decision *avd; + int result; + } selinux_audit_data; + } lsm_priv; + /* these callback will be implemented by a specific LSM */ + void (*lsm_pre_audit)(struct audit_buffer *, void *); + void (*lsm_post_audit)(struct audit_buffer *, void *); +}; + +#define v4info fam.v4 +#define v6info fam.v6 + +int ipv4_skb_to_auditdata(struct sk_buff *skb, + struct common_audit_data *ad, u8 *proto); + +int ipv6_skb_to_auditdata(struct sk_buff *skb, + struct common_audit_data *ad, u8 *proto); + +/* Initialize an LSM audit data structure. */ +#define COMMON_AUDIT_DATA_INIT(_d, _t) \ + { memset((_d), 0, sizeof(struct common_audit_data)); \ + (_d)->type = LSM_AUDIT_DATA_##_t; (_d)->function = __func__; } + +void common_lsm_audit(struct common_audit_data *a); + +#endif diff --git a/security/lsm_audit.c b/security/lsm_audit.c new file mode 100644 index 00000000000..94b868494b3 --- /dev/null +++ b/security/lsm_audit.c @@ -0,0 +1,386 @@ +/* + * common LSM auditing functions + * + * Based on code written for SELinux by : + * Stephen Smalley, + * James Morris + * Author : Etienne Basset, + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, + * as published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * ipv4_skb_to_auditdata : fill auditdata from skb + * @skb : the skb + * @ad : the audit data to fill + * @proto : the layer 4 protocol + * + * return 0 on success + */ +int ipv4_skb_to_auditdata(struct sk_buff *skb, + struct common_audit_data *ad, u8 *proto) +{ + int ret = 0; + struct iphdr *ih; + + ih = ip_hdr(skb); + if (ih == NULL) + return -EINVAL; + + ad->u.net.v4info.saddr = ih->saddr; + ad->u.net.v4info.daddr = ih->daddr; + + if (proto) + *proto = ih->protocol; + /* non initial fragment */ + if (ntohs(ih->frag_off) & IP_OFFSET) + return 0; + + switch (ih->protocol) { + case IPPROTO_TCP: { + struct tcphdr *th = tcp_hdr(skb); + if (th == NULL) + break; + + ad->u.net.sport = th->source; + ad->u.net.dport = th->dest; + break; + } + case IPPROTO_UDP: { + struct udphdr *uh = udp_hdr(skb); + if (uh == NULL) + break; + + ad->u.net.sport = uh->source; + ad->u.net.dport = uh->dest; + break; + } + case IPPROTO_DCCP: { + struct dccp_hdr *dh = dccp_hdr(skb); + if (dh == NULL) + break; + + ad->u.net.sport = dh->dccph_sport; + ad->u.net.dport = dh->dccph_dport; + break; + } + case IPPROTO_SCTP: { + struct sctphdr *sh = sctp_hdr(skb); + if (sh == NULL) + break; + ad->u.net.sport = sh->source; + ad->u.net.dport = sh->dest; + break; + } + default: + ret = -EINVAL; + } + return ret; +} +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +/** + * ipv6_skb_to_auditdata : fill auditdata from skb + * @skb : the skb + * @ad : the audit data to fill + * @proto : the layer 4 protocol + * + * return 0 on success + */ +int ipv6_skb_to_auditdata(struct sk_buff *skb, + struct common_audit_data *ad, u8 *proto) +{ + int offset, ret = 0; + struct ipv6hdr *ip6; + u8 nexthdr; + + ip6 = ipv6_hdr(skb); + if (ip6 == NULL) + return -EINVAL; + ipv6_addr_copy(&ad->u.net.v6info.saddr, &ip6->saddr); + ipv6_addr_copy(&ad->u.net.v6info.daddr, &ip6->daddr); + ret = 0; + /* IPv6 can have several extension header before the Transport header + * skip them */ + offset = skb_network_offset(skb); + offset += sizeof(*ip6); + nexthdr = ip6->nexthdr; + offset = ipv6_skip_exthdr(skb, offset, &nexthdr); + if (offset < 0) + return 0; + if (proto) + *proto = nexthdr; + switch (nexthdr) { + case IPPROTO_TCP: { + struct tcphdr _tcph, *th; + + th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph); + if (th == NULL) + break; + + ad->u.net.sport = th->source; + ad->u.net.dport = th->dest; + break; + } + case IPPROTO_UDP: { + struct udphdr _udph, *uh; + + uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph); + if (uh == NULL) + break; + + ad->u.net.sport = uh->source; + ad->u.net.dport = uh->dest; + break; + } + case IPPROTO_DCCP: { + struct dccp_hdr _dccph, *dh; + + dh = skb_header_pointer(skb, offset, sizeof(_dccph), &_dccph); + if (dh == NULL) + break; + + ad->u.net.sport = dh->dccph_sport; + ad->u.net.dport = dh->dccph_dport; + break; + } + case IPPROTO_SCTP: { + struct sctphdr _sctph, *sh; + + sh = skb_header_pointer(skb, offset, sizeof(_sctph), &_sctph); + if (sh == NULL) + break; + ad->u.net.sport = sh->source; + ad->u.net.dport = sh->dest; + break; + } + default: + ret = -EINVAL; + } + return ret; +} +#endif + + +static inline void print_ipv6_addr(struct audit_buffer *ab, + struct in6_addr *addr, __be16 port, + char *name1, char *name2) +{ + if (!ipv6_addr_any(addr)) + audit_log_format(ab, " %s=%pI6", name1, addr); + if (port) + audit_log_format(ab, " %s=%d", name2, ntohs(port)); +} + +static inline void print_ipv4_addr(struct audit_buffer *ab, __be32 addr, + __be16 port, char *name1, char *name2) +{ + if (addr) + audit_log_format(ab, " %s=%pI4", name1, &addr); + if (port) + audit_log_format(ab, " %s=%d", name2, ntohs(port)); +} + +/** + * dump_common_audit_data - helper to dump common audit data + * @a : common audit data + * + */ +static void dump_common_audit_data(struct audit_buffer *ab, + struct common_audit_data *a) +{ + struct inode *inode = NULL; + struct task_struct *tsk = current; + + if (a->tsk) + tsk = a->tsk; + if (tsk && tsk->pid) { + audit_log_format(ab, " pid=%d comm=", tsk->pid); + audit_log_untrustedstring(ab, tsk->comm); + } + + switch (a->type) { + case LSM_AUDIT_DATA_IPC: + audit_log_format(ab, " key=%d ", a->u.ipc_id); + break; + case LSM_AUDIT_DATA_CAP: + audit_log_format(ab, " capability=%d ", a->u.cap); + break; + case LSM_AUDIT_DATA_FS: + if (a->u.fs.path.dentry) { + struct dentry *dentry = a->u.fs.path.dentry; + if (a->u.fs.path.mnt) { + audit_log_d_path(ab, "path=", &a->u.fs.path); + } else { + audit_log_format(ab, " name="); + audit_log_untrustedstring(ab, + dentry->d_name.name); + } + inode = dentry->d_inode; + } else if (a->u.fs.inode) { + struct dentry *dentry; + inode = a->u.fs.inode; + dentry = d_find_alias(inode); + if (dentry) { + audit_log_format(ab, " name="); + audit_log_untrustedstring(ab, + dentry->d_name.name); + dput(dentry); + } + } + if (inode) + audit_log_format(ab, " dev=%s ino=%lu", + inode->i_sb->s_id, + inode->i_ino); + break; + case LSM_AUDIT_DATA_TASK: + tsk = a->u.tsk; + if (tsk && tsk->pid) { + audit_log_format(ab, " pid=%d comm=", tsk->pid); + audit_log_untrustedstring(ab, tsk->comm); + } + break; + case LSM_AUDIT_DATA_NET: + if (a->u.net.sk) { + struct sock *sk = a->u.net.sk; + struct unix_sock *u; + int len = 0; + char *p = NULL; + + switch (sk->sk_family) { + case AF_INET: { + struct inet_sock *inet = inet_sk(sk); + + print_ipv4_addr(ab, inet->rcv_saddr, + inet->sport, + "laddr", "lport"); + print_ipv4_addr(ab, inet->daddr, + inet->dport, + "faddr", "fport"); + break; + } + case AF_INET6: { + struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *inet6 = inet6_sk(sk); + + print_ipv6_addr(ab, &inet6->rcv_saddr, + inet->sport, + "laddr", "lport"); + print_ipv6_addr(ab, &inet6->daddr, + inet->dport, + "faddr", "fport"); + break; + } + case AF_UNIX: + u = unix_sk(sk); + if (u->dentry) { + struct path path = { + .dentry = u->dentry, + .mnt = u->mnt + }; + audit_log_d_path(ab, "path=", &path); + break; + } + if (!u->addr) + break; + len = u->addr->len-sizeof(short); + p = &u->addr->name->sun_path[0]; + audit_log_format(ab, " path="); + if (*p) + audit_log_untrustedstring(ab, p); + else + audit_log_n_hex(ab, p, len); + break; + } + } + + switch (a->u.net.family) { + case AF_INET: + print_ipv4_addr(ab, a->u.net.v4info.saddr, + a->u.net.sport, + "saddr", "src"); + print_ipv4_addr(ab, a->u.net.v4info.daddr, + a->u.net.dport, + "daddr", "dest"); + break; + case AF_INET6: + print_ipv6_addr(ab, &a->u.net.v6info.saddr, + a->u.net.sport, + "saddr", "src"); + print_ipv6_addr(ab, &a->u.net.v6info.daddr, + a->u.net.dport, + "daddr", "dest"); + break; + } + if (a->u.net.netif > 0) { + struct net_device *dev; + + /* NOTE: we always use init's namespace */ + dev = dev_get_by_index(&init_net, a->u.net.netif); + if (dev) { + audit_log_format(ab, " netif=%s", dev->name); + dev_put(dev); + } + } + break; +#ifdef CONFIG_KEYS + case LSM_AUDIT_DATA_KEY: + audit_log_format(ab, " key_serial=%u", a->u.key_struct.key); + if (a->u.key_struct.key_desc) { + audit_log_format(ab, " key_desc="); + audit_log_untrustedstring(ab, a->u.key_struct.key_desc); + } + break; +#endif + } /* switch (a->type) */ +} + +/** + * common_lsm_audit - generic LSM auditing function + * @a: auxiliary audit data + * + * setup the audit buffer for common security information + * uses callback to print LSM specific information + */ +void common_lsm_audit(struct common_audit_data *a) +{ + struct audit_buffer *ab; + + if (a == NULL) + return; + /* we use GFP_ATOMIC so we won't sleep */ + ab = audit_log_start(current->audit_context, GFP_ATOMIC, AUDIT_AVC); + + if (ab == NULL) + return; + + if (a->lsm_pre_audit) + a->lsm_pre_audit(ab, a); + + dump_common_audit_data(ab, a); + + if (a->lsm_post_audit) + a->lsm_post_audit(ab, a); + + audit_log_end(ab); +} -- cgit v1.2.3-70-g09d2 From 7ba5c840e64d4a967379f1ae3eca73278180b11d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 13 Apr 2009 21:31:17 -0700 Subject: rcu: Add __rcu_pending tracing to hierarchical RCU Add tracing to __rcu_pending() to provide information on why RCU processing was kicked off. This is helpful for debugging hierarchical RCU, and might also be helpful in learning how hierarchical RCU operates. Located-by: Anton Blanchard Tested-by: Anton Blanchard Signed-off-by: Paul E. McKenney Cc: anton@samba.org Cc: akpm@linux-foundation.org Cc: dipankar@in.ibm.com Cc: manfred@colorfullife.com Cc: cl@linux-foundation.org Cc: josht@linux.vnet.ibm.com Cc: schamp@sgi.com Cc: niv@us.ibm.com Cc: dvhltc@us.ibm.com Cc: ego@in.ibm.com Cc: laijs@cn.fujitsu.com Cc: rostedt@goodmis.org Cc: peterz@infradead.org Cc: penberg@cs.helsinki.fi Cc: andi@firstfloor.org Cc: "Paul E. McKenney" LKML-Reference: <1239683479943-git-send-email-> Signed-off-by: Ingo Molnar --- include/linux/rcutree.h | 9 ++++++- kernel/rcutree.c | 25 ++++++++++++++----- kernel/rcutree_trace.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 90 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 58b2aa5312b..5a5153806c4 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -161,8 +161,15 @@ struct rcu_data { unsigned long offline_fqs; /* Kicked due to being offline. */ unsigned long resched_ipi; /* Sent a resched IPI. */ - /* 5) For future __rcu_pending statistics. */ + /* 5) __rcu_pending() statistics. */ long n_rcu_pending; /* rcu_pending() calls since boot. */ + long n_rp_qs_pending; + long n_rp_cb_ready; + long n_rp_cpu_needs_gp; + long n_rp_gp_completed; + long n_rp_gp_started; + long n_rp_need_fqs; + long n_rp_need_nothing; int cpu; }; diff --git a/kernel/rcutree.c b/kernel/rcutree.c index d2a372fb0b9..0dccfbba6d2 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1259,31 +1259,44 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) check_cpu_stall(rsp, rdp); /* Is the RCU core waiting for a quiescent state from this CPU? */ - if (rdp->qs_pending) + if (rdp->qs_pending) { + rdp->n_rp_qs_pending++; return 1; + } /* Does this CPU have callbacks ready to invoke? */ - if (cpu_has_callbacks_ready_to_invoke(rdp)) + if (cpu_has_callbacks_ready_to_invoke(rdp)) { + rdp->n_rp_cb_ready++; return 1; + } /* Has RCU gone idle with this CPU needing another grace period? */ - if (cpu_needs_another_gp(rsp, rdp)) + if (cpu_needs_another_gp(rsp, rdp)) { + rdp->n_rp_cpu_needs_gp++; return 1; + } /* Has another RCU grace period completed? */ - if (ACCESS_ONCE(rsp->completed) != rdp->completed) /* outside of lock */ + if (ACCESS_ONCE(rsp->completed) != rdp->completed) { /* outside lock */ + rdp->n_rp_gp_completed++; return 1; + } /* Has a new RCU grace period started? */ - if (ACCESS_ONCE(rsp->gpnum) != rdp->gpnum) /* outside of lock */ + if (ACCESS_ONCE(rsp->gpnum) != rdp->gpnum) { /* outside lock */ + rdp->n_rp_gp_started++; return 1; + } /* Has an RCU GP gone long enough to send resched IPIs &c? */ if (ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum) && - ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)) + ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)) { + rdp->n_rp_need_fqs++; return 1; + } /* nothing to do */ + rdp->n_rp_need_nothing++; return 0; } diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 4b1875ba940..fe1dcdbf1ca 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -213,7 +213,63 @@ static struct file_operations rcugp_fops = { .release = single_release, }; -static struct dentry *rcudir, *datadir, *datadir_csv, *hierdir, *gpdir; +static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp) +{ + seq_printf(m, "%3d%cnp=%ld " + "qsp=%ld cbr=%ld cng=%ld gpc=%ld gps=%ld nf=%ld nn=%ld\n", + rdp->cpu, + cpu_is_offline(rdp->cpu) ? '!' : ' ', + rdp->n_rcu_pending, + rdp->n_rp_qs_pending, + rdp->n_rp_cb_ready, + rdp->n_rp_cpu_needs_gp, + rdp->n_rp_gp_completed, + rdp->n_rp_gp_started, + rdp->n_rp_need_fqs, + rdp->n_rp_need_nothing); +} + +static void print_rcu_pendings(struct seq_file *m, struct rcu_state *rsp) +{ + int cpu; + struct rcu_data *rdp; + + for_each_possible_cpu(cpu) { + rdp = rsp->rda[cpu]; + if (rdp->beenonline) + print_one_rcu_pending(m, rdp); + } +} + +static int show_rcu_pending(struct seq_file *m, void *unused) +{ + seq_puts(m, "rcu:\n"); + print_rcu_pendings(m, &rcu_state); + seq_puts(m, "rcu_bh:\n"); + print_rcu_pendings(m, &rcu_bh_state); + return 0; +} + +static int rcu_pending_open(struct inode *inode, struct file *file) +{ + return single_open(file, show_rcu_pending, NULL); +} + +static struct file_operations rcu_pending_fops = { + .owner = THIS_MODULE, + .open = rcu_pending_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static struct dentry *rcudir; +static struct dentry *datadir; +static struct dentry *datadir_csv; +static struct dentry *gpdir; +static struct dentry *hierdir; +static struct dentry *rcu_pendingdir; + static int __init rcuclassic_trace_init(void) { rcudir = debugfs_create_dir("rcu", NULL); @@ -238,6 +294,11 @@ static int __init rcuclassic_trace_init(void) NULL, &rcuhier_fops); if (!hierdir) goto free_out; + + rcu_pendingdir = debugfs_create_file("rcu_pending", 0444, rcudir, + NULL, &rcu_pending_fops); + if (!rcu_pendingdir) + goto free_out; return 0; free_out: if (datadir) @@ -257,6 +318,7 @@ static void __exit rcuclassic_trace_cleanup(void) debugfs_remove(datadir_csv); debugfs_remove(gpdir); debugfs_remove(hierdir); + debugfs_remove(rcu_pendingdir); debugfs_remove(rcudir); } -- cgit v1.2.3-70-g09d2 From 02bec490450836ebbd628e97ec03f10b57def8ce Mon Sep 17 00:00:00 2001 From: Tim Blechmann Date: Tue, 24 Mar 2009 12:24:35 +0100 Subject: ALSA: lx6464es - driver for the digigram lx6464es interface prototype of a driver for the digigram lx6464es 64 channel ethersound interface. Signed-off-by: Tim Blechmann Signed-off-by: Takashi Iwai --- include/linux/pci_ids.h | 5 + sound/pci/Kconfig | 10 + sound/pci/Makefile | 1 + sound/pci/lx6464es/Makefile | 2 + sound/pci/lx6464es/lx6464es.c | 1152 ++++++++++++++++++++++++++++++++ sound/pci/lx6464es/lx6464es.h | 114 ++++ sound/pci/lx6464es/lx_core.c | 1442 +++++++++++++++++++++++++++++++++++++++++ sound/pci/lx6464es/lx_core.h | 242 +++++++ sound/pci/lx6464es/lx_defs.h | 376 +++++++++++ 9 files changed, 3344 insertions(+) create mode 100644 sound/pci/lx6464es/Makefile create mode 100644 sound/pci/lx6464es/lx6464es.c create mode 100644 sound/pci/lx6464es/lx6464es.h create mode 100644 sound/pci/lx6464es/lx_core.c create mode 100644 sound/pci/lx6464es/lx_core.h create mode 100644 sound/pci/lx6464es/lx_defs.h (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index ee98cd57088..2b1a69598e7 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1005,6 +1005,7 @@ #define PCI_DEVICE_ID_PLX_PCI200SYN 0x3196 #define PCI_DEVICE_ID_PLX_9030 0x9030 #define PCI_DEVICE_ID_PLX_9050 0x9050 +#define PCI_DEVICE_ID_PLX_9056 0x9056 #define PCI_DEVICE_ID_PLX_9080 0x9080 #define PCI_DEVICE_ID_PLX_GTEK_SERIAL2 0xa001 @@ -1847,6 +1848,10 @@ #define PCI_SUBDEVICE_ID_HYPERCOPE_METRO 0x0107 #define PCI_SUBDEVICE_ID_HYPERCOPE_CHAMP2 0x0108 +#define PCI_VENDOR_ID_DIGIGRAM 0x1369 +#define PCI_SUBDEVICE_ID_DIGIGRAM_LX6464ES_SERIAL_SUBSYSTEM 0xc001 +#define PCI_SUBDEVICE_ID_DIGIGRAM_LX6464ES_CAE_SERIAL_SUBSYSTEM 0xc002 + #define PCI_VENDOR_ID_KAWASAKI 0x136b #define PCI_DEVICE_ID_MCHIP_KL5A72002 0xff01 diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig index 93422e3a3f0..e912b70b6f9 100644 --- a/sound/pci/Kconfig +++ b/sound/pci/Kconfig @@ -622,6 +622,16 @@ config SND_KORG1212 To compile this driver as a module, choose M here: the module will be called snd-korg1212. +config SND_LX6464ES + tristate "Digigram LX6464ES" + select SND_PCM + help + Say Y here to include support for Digigram LX6464ES boards. + + To compile this driver as a module, choose M here: the module + will be called snd-lx6464es. + + config SND_MAESTRO3 tristate "ESS Allegro/Maestro3" select SND_AC97_CODEC diff --git a/sound/pci/Makefile b/sound/pci/Makefile index 65b25d221cd..7d83e084dcf 100644 --- a/sound/pci/Makefile +++ b/sound/pci/Makefile @@ -62,6 +62,7 @@ obj-$(CONFIG_SND) += \ ca0106/ \ cs46xx/ \ cs5535audio/ \ + lx6464es/ \ echoaudio/ \ emu10k1/ \ hda/ \ diff --git a/sound/pci/lx6464es/Makefile b/sound/pci/lx6464es/Makefile new file mode 100644 index 00000000000..eb04a6c73d8 --- /dev/null +++ b/sound/pci/lx6464es/Makefile @@ -0,0 +1,2 @@ +snd-lx6464es-objs := lx6464es.o lx_core.o +obj-$(CONFIG_SND_LX6464ES) += snd-lx6464es.o diff --git a/sound/pci/lx6464es/lx6464es.c b/sound/pci/lx6464es/lx6464es.c new file mode 100644 index 00000000000..7bc8b8caa99 --- /dev/null +++ b/sound/pci/lx6464es/lx6464es.c @@ -0,0 +1,1152 @@ +/* -*- linux-c -*- * + * + * ALSA driver for the digigram lx6464es interface + * + * Copyright (c) 2008, 2009 Tim Blechmann + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + * + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include "lx6464es.h" + +MODULE_AUTHOR("Tim Blechmann"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("digigram lx6464es"); +MODULE_SUPPORTED_DEVICE("{digigram lx6464es{}}"); + + +static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; +static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; +static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP; + +static const char card_name[] = "LX6464ES"; + + +#define PCI_DEVICE_ID_PLX_LX6464ES PCI_DEVICE_ID_PLX_9056 + +static struct pci_device_id snd_lx6464es_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_LX6464ES), + .subvendor = PCI_VENDOR_ID_DIGIGRAM, + .subdevice = PCI_SUBDEVICE_ID_DIGIGRAM_LX6464ES_SERIAL_SUBSYSTEM + }, /* LX6464ES */ + { PCI_DEVICE(PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_LX6464ES), + .subvendor = PCI_VENDOR_ID_DIGIGRAM, + .subdevice = PCI_SUBDEVICE_ID_DIGIGRAM_LX6464ES_CAE_SERIAL_SUBSYSTEM + }, /* LX6464ES-CAE */ + { 0, }, +}; + +MODULE_DEVICE_TABLE(pci, snd_lx6464es_ids); + + + +/* PGO pour USERo dans le registre pci_0x06/loc_0xEC */ +#define CHIPSC_RESET_XILINX (1L<<16) + + +/* alsa callbacks */ +static struct snd_pcm_hardware lx_caps = { + .info = (SNDRV_PCM_INFO_MMAP | + SNDRV_PCM_INFO_INTERLEAVED | + SNDRV_PCM_INFO_MMAP_VALID | + SNDRV_PCM_INFO_SYNC_START), + .formats = (SNDRV_PCM_FMTBIT_S16_LE | + SNDRV_PCM_FMTBIT_S16_BE | + SNDRV_PCM_FMTBIT_S24_3LE | + SNDRV_PCM_FMTBIT_S24_3BE), + .rates = (SNDRV_PCM_RATE_CONTINUOUS | + SNDRV_PCM_RATE_8000_192000), + .rate_min = 8000, + .rate_max = 192000, + .channels_min = 2, + .channels_max = 64, + .buffer_bytes_max = 64*2*3*MICROBLAZE_IBL_MAX*MAX_STREAM_BUFFER, + .period_bytes_min = (2*2*MICROBLAZE_IBL_MIN*2), + .period_bytes_max = (4*64*MICROBLAZE_IBL_MAX*MAX_STREAM_BUFFER), + .periods_min = 2, + .periods_max = MAX_STREAM_BUFFER, +}; + +static int lx_set_granularity(struct lx6464es *chip, u32 gran); + + +static int lx_hardware_open(struct lx6464es *chip, + struct snd_pcm_substream *substream) +{ + int err = 0; + struct snd_pcm_runtime *runtime = substream->runtime; + int channels = runtime->channels; + int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE); + + snd_pcm_uframes_t period_size = runtime->period_size; + + snd_printd(LXP "allocating pipe for %d channels\n", channels); + err = lx_pipe_allocate(chip, 0, is_capture, channels); + if (err < 0) { + snd_printk(KERN_ERR LXP "allocating pipe failed\n"); + return err; + } + + err = lx_set_granularity(chip, period_size); + if (err < 0) { + snd_printk(KERN_ERR LXP "setting granularity to %ld failed\n", + period_size); + return err; + } + + return 0; +} + +static int lx_hardware_start(struct lx6464es *chip, + struct snd_pcm_substream *substream) +{ + int err = 0; + struct snd_pcm_runtime *runtime = substream->runtime; + int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE); + + snd_printd(LXP "setting stream format\n"); + err = lx_stream_set_format(chip, runtime, 0, is_capture); + if (err < 0) { + snd_printk(KERN_ERR LXP "setting stream format failed\n"); + return err; + } + + snd_printd(LXP "starting pipe\n"); + err = lx_pipe_start(chip, 0, is_capture); + if (err < 0) { + snd_printk(KERN_ERR LXP "starting pipe failed\n"); + return err; + } + + snd_printd(LXP "waiting for pipe to start\n"); + err = lx_pipe_wait_for_start(chip, 0, is_capture); + if (err < 0) { + snd_printk(KERN_ERR LXP "waiting for pipe failed\n"); + return err; + } + + return err; +} + + +static int lx_hardware_stop(struct lx6464es *chip, + struct snd_pcm_substream *substream) +{ + int err = 0; + int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE); + + snd_printd(LXP "pausing pipe\n"); + err = lx_pipe_pause(chip, 0, is_capture); + if (err < 0) { + snd_printk(KERN_ERR LXP "pausing pipe failed\n"); + return err; + } + + snd_printd(LXP "waiting for pipe to become idle\n"); + err = lx_pipe_wait_for_idle(chip, 0, is_capture); + if (err < 0) { + snd_printk(KERN_ERR LXP "waiting for pipe failed\n"); + return err; + } + + snd_printd(LXP "stopping pipe\n"); + err = lx_pipe_stop(chip, 0, is_capture); + if (err < 0) { + snd_printk(LXP "stopping pipe failed\n"); + return err; + } + + return err; +} + + +static int lx_hardware_close(struct lx6464es *chip, + struct snd_pcm_substream *substream) +{ + int err = 0; + int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE); + + snd_printd(LXP "releasing pipe\n"); + err = lx_pipe_release(chip, 0, is_capture); + if (err < 0) { + snd_printk(LXP "releasing pipe failed\n"); + return err; + } + + return err; +} + + +static int lx_pcm_open(struct snd_pcm_substream *substream) +{ + struct lx6464es *chip = snd_pcm_substream_chip(substream); + struct snd_pcm_runtime *runtime = substream->runtime; + int err = 0; + int board_rate; + + snd_printdd("->lx_pcm_open\n"); + mutex_lock(&chip->setup_mutex); + + /* copy the struct snd_pcm_hardware struct */ + runtime->hw = lx_caps; + +#if 0 + /* buffer-size should better be multiple of period-size */ + err = snd_pcm_hw_constraint_integer(runtime, + SNDRV_PCM_HW_PARAM_PERIODS); + if (err < 0) { + snd_printk(KERN_WARNING LXP "could not constrain periods\n"); + goto exit; + } +#endif + + /* the clock rate cannot be changed */ + board_rate = chip->board_sample_rate; + err = snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_RATE, + board_rate, board_rate); + + if (err < 0) { + snd_printk(KERN_WARNING LXP "could not constrain periods\n"); + goto exit; + } + + /* constrain period size */ + err = snd_pcm_hw_constraint_minmax(runtime, + SNDRV_PCM_HW_PARAM_PERIOD_SIZE, + MICROBLAZE_IBL_MIN, + MICROBLAZE_IBL_MAX); + if (err < 0) { + snd_printk(KERN_WARNING LXP + "could not constrain period size\n"); + goto exit; + } + + snd_pcm_hw_constraint_step(runtime, 0, + SNDRV_PCM_HW_PARAM_BUFFER_SIZE, 32); + + snd_pcm_set_sync(substream); + err = 0; + +exit: + runtime->private_data = chip; + + mutex_unlock(&chip->setup_mutex); + snd_printdd("<-lx_pcm_open, %d\n", err); + return err; +} + +static int lx_pcm_close(struct snd_pcm_substream *substream) +{ + int err = 0; + snd_printdd("->lx_pcm_close\n"); + return err; +} + +static snd_pcm_uframes_t lx_pcm_stream_pointer(struct snd_pcm_substream + *substream) +{ + struct lx6464es *chip = snd_pcm_substream_chip(substream); + snd_pcm_uframes_t pos; + unsigned long flags; + int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE); + + struct lx_stream *lx_stream = is_capture ? &chip->capture_stream : + &chip->playback_stream; + + snd_printdd("->lx_pcm_stream_pointer\n"); + + spin_lock_irqsave(&chip->lock, flags); + pos = lx_stream->frame_pos * substream->runtime->period_size; + spin_unlock_irqrestore(&chip->lock, flags); + + snd_printdd(LXP "stream_pointer at %ld\n", pos); + return pos; +} + +static int lx_pcm_prepare(struct snd_pcm_substream *substream) +{ + struct lx6464es *chip = snd_pcm_substream_chip(substream); + int err = 0; + const int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE); + + snd_printdd("->lx_pcm_prepare\n"); + + mutex_lock(&chip->setup_mutex); + + if (chip->hardware_running[is_capture]) { + err = lx_hardware_stop(chip, substream); + if (err < 0) { + snd_printk(KERN_ERR LXP "failed to stop hardware. " + "Error code %d\n", err); + goto exit; + } + + err = lx_hardware_close(chip, substream); + if (err < 0) { + snd_printk(KERN_ERR LXP "failed to close hardware. " + "Error code %d\n", err); + goto exit; + } + } + + snd_printd(LXP "opening hardware\n"); + err = lx_hardware_open(chip, substream); + if (err < 0) { + snd_printk(KERN_ERR LXP "failed to open hardware. " + "Error code %d\n", err); + goto exit; + } + + err = lx_hardware_start(chip, substream); + if (err < 0) { + snd_printk(KERN_ERR LXP "failed to start hardware. " + "Error code %d\n", err); + goto exit; + } + + chip->hardware_running[is_capture] = 1; + + if (chip->board_sample_rate != substream->runtime->rate) { + if (!err) + chip->board_sample_rate = substream->runtime->rate; + } + +exit: + mutex_unlock(&chip->setup_mutex); + return err; +} + +static int lx_pcm_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *hw_params, int is_capture) +{ + struct lx6464es *chip = snd_pcm_substream_chip(substream); + int err = 0; + + snd_printdd("->lx_pcm_hw_params\n"); + + mutex_lock(&chip->setup_mutex); + + /* set dma buffer */ + err = snd_pcm_lib_malloc_pages(substream, + params_buffer_bytes(hw_params)); + + if (is_capture) + chip->capture_stream.stream = substream; + else + chip->playback_stream.stream = substream; + + mutex_unlock(&chip->setup_mutex); + return err; +} + +static int lx_pcm_hw_params_playback(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *hw_params) +{ + return lx_pcm_hw_params(substream, hw_params, 0); +} + +static int lx_pcm_hw_params_capture(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *hw_params) +{ + return lx_pcm_hw_params(substream, hw_params, 1); +} + +static int lx_pcm_hw_free(struct snd_pcm_substream *substream) +{ + struct lx6464es *chip = snd_pcm_substream_chip(substream); + int err = 0; + int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE); + + snd_printdd("->lx_pcm_hw_free\n"); + mutex_lock(&chip->setup_mutex); + + if (chip->hardware_running[is_capture]) { + err = lx_hardware_stop(chip, substream); + if (err < 0) { + snd_printk(KERN_ERR LXP "failed to stop hardware. " + "Error code %d\n", err); + goto exit; + } + + err = lx_hardware_close(chip, substream); + if (err < 0) { + snd_printk(KERN_ERR LXP "failed to close hardware. " + "Error code %d\n", err); + goto exit; + } + + chip->hardware_running[is_capture] = 0; + } + + err = snd_pcm_lib_free_pages(substream); + + if (is_capture) + chip->capture_stream.stream = 0; + else + chip->playback_stream.stream = 0; + +exit: + mutex_unlock(&chip->setup_mutex); + return err; +} + +static void lx_trigger_start(struct lx6464es *chip, struct lx_stream *lx_stream) +{ + struct snd_pcm_substream *substream = lx_stream->stream; + const int is_capture = lx_stream->is_capture; + + int err; + + const u32 channels = substream->runtime->channels; + const u32 bytes_per_frame = channels * 3; + const u32 period_size = substream->runtime->period_size; + const u32 periods = substream->runtime->periods; + const u32 period_bytes = period_size * bytes_per_frame; + + dma_addr_t buf = substream->dma_buffer.addr; + int i; + + u32 needed, freed; + u32 size_array[5]; + + for (i = 0; i != periods; ++i) { + u32 buffer_index = 0; + + err = lx_buffer_ask(chip, 0, is_capture, &needed, &freed, + size_array); + snd_printdd(LXP "starting: needed %d, freed %d\n", + needed, freed); + + err = lx_buffer_give(chip, 0, is_capture, period_bytes, + lower_32_bits(buf), upper_32_bits(buf), + &buffer_index); + + snd_printdd(LXP "starting: buffer index %x on %p (%d bytes)\n", + buffer_index, (void *)buf, period_bytes); + buf += period_bytes; + } + + err = lx_buffer_ask(chip, 0, is_capture, &needed, &freed, size_array); + snd_printdd(LXP "starting: needed %d, freed %d\n", needed, freed); + + snd_printd(LXP "starting: starting stream\n"); + err = lx_stream_start(chip, 0, is_capture); + if (err < 0) + snd_printk(KERN_ERR LXP "couldn't start stream\n"); + else + lx_stream->status = LX_STREAM_STATUS_RUNNING; + + lx_stream->frame_pos = 0; +} + +static void lx_trigger_stop(struct lx6464es *chip, struct lx_stream *lx_stream) +{ + const int is_capture = lx_stream->is_capture; + int err; + + snd_printd(LXP "stopping: stopping stream\n"); + err = lx_stream_stop(chip, 0, is_capture); + if (err < 0) + snd_printk(KERN_ERR LXP "couldn't stop stream\n"); + else + lx_stream->status = LX_STREAM_STATUS_FREE; + +} + +static void lx_trigger_tasklet_dispatch_stream(struct lx6464es *chip, + struct lx_stream *lx_stream) +{ + switch (lx_stream->status) { + case LX_STREAM_STATUS_SCHEDULE_RUN: + lx_trigger_start(chip, lx_stream); + break; + + case LX_STREAM_STATUS_SCHEDULE_STOP: + lx_trigger_stop(chip, lx_stream); + break; + + default: + break; + } +} + +static void lx_trigger_tasklet(unsigned long data) +{ + struct lx6464es *chip = (struct lx6464es *)data; + unsigned long flags; + + snd_printdd("->lx_trigger_tasklet\n"); + + spin_lock_irqsave(&chip->lock, flags); + lx_trigger_tasklet_dispatch_stream(chip, &chip->capture_stream); + lx_trigger_tasklet_dispatch_stream(chip, &chip->playback_stream); + spin_unlock_irqrestore(&chip->lock, flags); +} + +static int lx_pcm_trigger_dispatch(struct lx6464es *chip, + struct lx_stream *lx_stream, int cmd) +{ + int err = 0; + + switch (cmd) { + case SNDRV_PCM_TRIGGER_START: + lx_stream->status = LX_STREAM_STATUS_SCHEDULE_RUN; + break; + + case SNDRV_PCM_TRIGGER_STOP: + lx_stream->status = LX_STREAM_STATUS_SCHEDULE_STOP; + break; + + default: + err = -EINVAL; + goto exit; + } + tasklet_schedule(&chip->trigger_tasklet); + +exit: + return err; +} + + +static int lx_pcm_trigger(struct snd_pcm_substream *substream, int cmd) +{ + struct lx6464es *chip = snd_pcm_substream_chip(substream); + const int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE); + struct lx_stream *stream = is_capture ? &chip->capture_stream : + &chip->playback_stream; + + snd_printdd("->lx_pcm_trigger\n"); + + return lx_pcm_trigger_dispatch(chip, stream, cmd); +} + +static int snd_lx6464es_free(struct lx6464es *chip) +{ + snd_printdd("->snd_lx6464es_free\n"); + + lx_irq_disable(chip); + + if (chip->irq >= 0) + free_irq(chip->irq, chip); + + iounmap(chip->port_dsp_bar); + ioport_unmap(chip->port_plx_remapped); + + pci_release_regions(chip->pci); + pci_disable_device(chip->pci); + + kfree(chip); + + return 0; +} + +static int snd_lx6464es_dev_free(struct snd_device *device) +{ + return snd_lx6464es_free(device->device_data); +} + +/* reset the dsp during initialization */ +static int __devinit lx_init_xilinx_reset(struct lx6464es *chip) +{ + int i; + u32 plx_reg = lx_plx_reg_read(chip, ePLX_CHIPSC); + + snd_printdd("->lx_init_xilinx_reset\n"); + + /* activate reset of xilinx */ + plx_reg &= ~CHIPSC_RESET_XILINX; + + lx_plx_reg_write(chip, ePLX_CHIPSC, plx_reg); + msleep(1); + + lx_plx_reg_write(chip, ePLX_MBOX3, 0); + msleep(1); + + plx_reg |= CHIPSC_RESET_XILINX; + lx_plx_reg_write(chip, ePLX_CHIPSC, plx_reg); + + /* deactivate reset of xilinx */ + for (i = 0; i != 100; ++i) { + u32 reg_mbox3; + msleep(10); + reg_mbox3 = lx_plx_reg_read(chip, ePLX_MBOX3); + if (reg_mbox3) { + snd_printd(LXP "xilinx reset done\n"); + snd_printdd(LXP "xilinx took %d loops\n", i); + break; + } + } + + /* todo: add some error handling? */ + + /* clear mr */ + lx_dsp_reg_write(chip, eReg_CSM, 0); + + /* le xilinx ES peut ne pas etre encore pret, on attend. */ + msleep(600); + + return 0; +} + +static int __devinit lx_init_xilinx_test(struct lx6464es *chip) +{ + u32 reg; + + snd_printdd("->lx_init_xilinx_test\n"); + + /* TEST if we have access to Xilinx/MicroBlaze */ + lx_dsp_reg_write(chip, eReg_CSM, 0); + + reg = lx_dsp_reg_read(chip, eReg_CSM); + + if (reg) { + snd_printk(KERN_ERR LXP "Problem: Reg_CSM %x.\n", reg); + + /* PCI9056_SPACE0_REMAP */ + lx_plx_reg_write(chip, ePLX_PCICR, 1); + + reg = lx_dsp_reg_read(chip, eReg_CSM); + if (reg) { + snd_printk(KERN_ERR LXP "Error: Reg_CSM %x.\n", reg); + return -EAGAIN; /* seems to be appropriate */ + } + } + + snd_printd(LXP "Xilinx/MicroBlaze access test successful\n"); + + return 0; +} + +/* initialize ethersound */ +static int __devinit lx_init_ethersound_config(struct lx6464es *chip) +{ + int i; + u32 orig_conf_es = lx_dsp_reg_read(chip, eReg_CONFES); + + u32 default_conf_es = (64 << IOCR_OUTPUTS_OFFSET) | + (64 << IOCR_INPUTS_OFFSET) | + (FREQ_RATIO_SINGLE_MODE << FREQ_RATIO_OFFSET); + + u32 conf_es = (orig_conf_es & CONFES_READ_PART_MASK) + | (default_conf_es & CONFES_WRITE_PART_MASK); + + snd_printdd("->lx_init_ethersound\n"); + + chip->freq_ratio = FREQ_RATIO_SINGLE_MODE; + + /* + * write it to the card ! + * this actually kicks the ES xilinx, the first time since poweron. + * the MAC address in the Reg_ADMACESMSB Reg_ADMACESLSB registers + * is not ready before this is done, and the bit 2 in Reg_CSES is set. + * */ + lx_dsp_reg_write(chip, eReg_CONFES, conf_es); + + for (i = 0; i != 1000; ++i) { + if (lx_dsp_reg_read(chip, eReg_CSES) & 4) { + snd_printd(LXP "ethersound initialized after %dms\n", + i); + goto ethersound_initialized; + } + msleep(1); + } + snd_printk(KERN_WARNING LXP + "ethersound could not be initialized after %dms\n", i); + return -ETIMEDOUT; + + ethersound_initialized: + snd_printd(LXP "ethersound initialized\n"); + return 0; +} + +static int __devinit lx_init_get_version_features(struct lx6464es *chip) +{ + u32 dsp_version; + + int err; + + snd_printdd("->lx_init_get_version_features\n"); + + err = lx_dsp_get_version(chip, &dsp_version); + + if (err == 0) { + u32 freq; + + snd_printk(LXP "DSP version: V%02d.%02d #%d\n", + (dsp_version>>16) & 0xff, (dsp_version>>8) & 0xff, + dsp_version & 0xff); + + /* later: what firmware version do we expect? */ + + /* retrieve Play/Rec features */ + /* done here because we may have to handle alternate + * DSP files. */ + /* later */ + + /* init the EtherSound sample rate */ + err = lx_dsp_get_clock_frequency(chip, &freq); + if (err == 0) + chip->board_sample_rate = freq; + snd_printd(LXP "actual clock frequency %d\n", freq); + } else { + snd_printk(KERN_ERR LXP "DSP corrupted \n"); + err = -EAGAIN; + } + + return err; +} + +static int lx_set_granularity(struct lx6464es *chip, u32 gran) +{ + int err = 0; + u32 snapped_gran = MICROBLAZE_IBL_MIN; + + snd_printdd("->lx_set_granularity\n"); + + /* blocksize is a power of 2 */ + while ((snapped_gran < gran) && + (snapped_gran < MICROBLAZE_IBL_MAX)) { + snapped_gran *= 2; + } + + if (snapped_gran == chip->pcm_granularity) + return 0; + + err = lx_dsp_set_granularity(chip, snapped_gran); + if (err < 0) { + snd_printk(KERN_WARNING LXP "could not set granularity\n"); + err = -EAGAIN; + } + + if (snapped_gran != gran) + snd_printk(LXP "snapped blocksize to %d\n", snapped_gran); + + snd_printd(LXP "set blocksize on board %d\n", snapped_gran); + chip->pcm_granularity = snapped_gran; + + return err; +} + +/* initialize and test the xilinx dsp chip */ +static int __devinit lx_init_dsp(struct lx6464es *chip) +{ + int err; + u8 mac_address[6]; + int i; + + snd_printdd("->lx_init_dsp\n"); + + snd_printd(LXP "initialize board\n"); + err = lx_init_xilinx_reset(chip); + if (err) + return err; + + snd_printd(LXP "testing board\n"); + err = lx_init_xilinx_test(chip); + if (err) + return err; + + snd_printd(LXP "initialize ethersound configuration\n"); + err = lx_init_ethersound_config(chip); + if (err) + return err; + + lx_irq_enable(chip); + + /** \todo the mac address should be ready by not, but it isn't, + * so we wait for it */ + for (i = 0; i != 1000; ++i) { + err = lx_dsp_get_mac(chip, mac_address); + if (err) + return err; + if (mac_address[0] || mac_address[1] || mac_address[2] || + mac_address[3] || mac_address[4] || mac_address[5]) + goto mac_ready; + msleep(1); + } + return -ETIMEDOUT; + +mac_ready: + snd_printd(LXP "mac address ready read after: %dms\n", i); + snd_printk(LXP "mac address: %02X.%02X.%02X.%02X.%02X.%02X\n", + mac_address[0], mac_address[1], mac_address[2], + mac_address[3], mac_address[4], mac_address[5]); + + err = lx_init_get_version_features(chip); + if (err) + return err; + + lx_set_granularity(chip, MICROBLAZE_IBL_DEFAULT); + + chip->playback_mute = 0; + + return err; +} + +static struct snd_pcm_ops lx_ops_playback = { + .open = lx_pcm_open, + .close = lx_pcm_close, + .ioctl = snd_pcm_lib_ioctl, + .prepare = lx_pcm_prepare, + .hw_params = lx_pcm_hw_params_playback, + .hw_free = lx_pcm_hw_free, + .trigger = lx_pcm_trigger, + .pointer = lx_pcm_stream_pointer, +}; + +static struct snd_pcm_ops lx_ops_capture = { + .open = lx_pcm_open, + .close = lx_pcm_close, + .ioctl = snd_pcm_lib_ioctl, + .prepare = lx_pcm_prepare, + .hw_params = lx_pcm_hw_params_capture, + .hw_free = lx_pcm_hw_free, + .trigger = lx_pcm_trigger, + .pointer = lx_pcm_stream_pointer, +}; + +static int __devinit lx_pcm_create(struct lx6464es *chip) +{ + int err; + struct snd_pcm *pcm; + + u32 size = 64 * /* channels */ + 3 * /* 24 bit samples */ + MAX_STREAM_BUFFER * /* periods */ + MICROBLAZE_IBL_MAX * /* frames per period */ + 2; /* duplex */ + + size = PAGE_ALIGN(size); + + /* hardcoded device name & channel count */ + err = snd_pcm_new(chip->card, (char *)card_name, 0, + 1, 1, &pcm); + + pcm->private_data = chip; + + snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &lx_ops_playback); + snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE, &lx_ops_capture); + + pcm->info_flags = 0; + strcpy(pcm->name, card_name); + + err = snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV, + snd_dma_pci_data(chip->pci), + size, size); + if (err < 0) + return err; + + chip->pcm = pcm; + chip->capture_stream.is_capture = 1; + + return 0; +} + +static int lx_control_playback_info(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_info *uinfo) +{ + uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN; + uinfo->count = 1; + uinfo->value.integer.min = 0; + uinfo->value.integer.max = 1; + return 0; +} + +static int lx_control_playback_get(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct lx6464es *chip = snd_kcontrol_chip(kcontrol); + ucontrol->value.integer.value[0] = chip->playback_mute; + return 0; +} + +static int lx_control_playback_put(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct lx6464es *chip = snd_kcontrol_chip(kcontrol); + int changed = 0; + int current_value = chip->playback_mute; + + if (current_value != ucontrol->value.integer.value[0]) { + lx_level_unmute(chip, 0, !current_value); + chip->playback_mute = !current_value; + changed = 1; + } + return changed; +} + +static struct snd_kcontrol_new lx_control_playback_switch __devinitdata = { + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .name = "PCM Playback Switch", + .index = 0, + .access = SNDRV_CTL_ELEM_ACCESS_READWRITE, + .private_value = 0, + .info = lx_control_playback_info, + .get = lx_control_playback_get, + .put = lx_control_playback_put +}; + + + +static void lx_proc_levels_read(struct snd_info_entry *entry, + struct snd_info_buffer *buffer) +{ + u32 levels[64]; + int err; + int i, j; + struct lx6464es *chip = entry->private_data; + + snd_iprintf(buffer, "capture levels:\n"); + err = lx_level_peaks(chip, 1, 64, levels); + if (err < 0) + return; + + for (i = 0; i != 8; ++i) { + for (j = 0; j != 8; ++j) + snd_iprintf(buffer, "%08x ", levels[i*8+j]); + snd_iprintf(buffer, "\n"); + } + + snd_iprintf(buffer, "\nplayback levels:\n"); + + err = lx_level_peaks(chip, 0, 64, levels); + if (err < 0) + return; + + for (i = 0; i != 8; ++i) { + for (j = 0; j != 8; ++j) + snd_iprintf(buffer, "%08x ", levels[i*8+j]); + snd_iprintf(buffer, "\n"); + } + + snd_iprintf(buffer, "\n"); +} + +static int __devinit lx_proc_create(struct snd_card *card, struct lx6464es *chip) +{ + struct snd_info_entry *entry; + int err = snd_card_proc_new(card, "levels", &entry); + if (err < 0) + return err; + + snd_info_set_text_ops(entry, chip, lx_proc_levels_read); + return 0; +} + + +static int __devinit snd_lx6464es_create(struct snd_card *card, + struct pci_dev *pci, + struct lx6464es **rchip) +{ + struct lx6464es *chip; + int err; + + static struct snd_device_ops ops = { + .dev_free = snd_lx6464es_dev_free, + }; + + snd_printdd("->snd_lx6464es_create\n"); + + *rchip = NULL; + + /* enable PCI device */ + err = pci_enable_device(pci); + if (err < 0) + return err; + + pci_set_master(pci); + + /* check if we can restrict PCI DMA transfers to 32 bits */ + err = pci_set_dma_mask(pci, DMA_32BIT_MASK); + if (err < 0) { + snd_printk(KERN_ERR "architecture does not support " + "32bit PCI busmaster DMA\n"); + pci_disable_device(pci); + return -ENXIO; + } + + chip = kzalloc(sizeof(*chip), GFP_KERNEL); + if (chip == NULL) { + err = -ENOMEM; + goto alloc_failed; + } + + chip->card = card; + chip->pci = pci; + chip->irq = -1; + + /* initialize synchronization structs */ + spin_lock_init(&chip->lock); + spin_lock_init(&chip->msg_lock); + mutex_init(&chip->setup_mutex); + tasklet_init(&chip->trigger_tasklet, lx_trigger_tasklet, + (unsigned long)chip); + tasklet_init(&chip->tasklet_capture, lx_tasklet_capture, + (unsigned long)chip); + tasklet_init(&chip->tasklet_playback, lx_tasklet_playback, + (unsigned long)chip); + + /* request resources */ + err = pci_request_regions(pci, card_name); + if (err < 0) + goto request_regions_failed; + + /* plx port */ + chip->port_plx = pci_resource_start(pci, 1); + chip->port_plx_remapped = ioport_map(chip->port_plx, + pci_resource_len(pci, 1)); + + /* dsp port */ + chip->port_dsp_bar = pci_ioremap_bar(pci, 2); + + err = request_irq(pci->irq, lx_interrupt, IRQF_SHARED, + card_name, chip); + if (err) { + snd_printk(KERN_ERR LXP "unable to grab IRQ %d\n", pci->irq); + goto request_irq_failed; + } + chip->irq = pci->irq; + + err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops); + if (err < 0) + goto device_new_failed; + + err = lx_init_dsp(chip); + if (err < 0) { + snd_printk(KERN_ERR LXP "error during DSP initialization\n"); + return err; + } + + err = lx_pcm_create(chip); + if (err < 0) + return err; + + err = lx_proc_create(card, chip); + if (err < 0) + return err; + + err = snd_ctl_add(card, snd_ctl_new1(&lx_control_playback_switch, + chip)); + if (err < 0) + return err; + + snd_card_set_dev(card, &pci->dev); + + *rchip = chip; + return 0; + +device_new_failed: + free_irq(pci->irq, chip); + +request_irq_failed: + pci_release_regions(pci); + +request_regions_failed: + kfree(chip); + +alloc_failed: + pci_disable_device(pci); + + return err; +} + +static int __devinit snd_lx6464es_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + static int dev; + struct snd_card *card; + struct lx6464es *chip; + int err; + + snd_printdd("->snd_lx6464es_probe\n"); + + if (dev >= SNDRV_CARDS) + return -ENODEV; + if (!enable[dev]) { + dev++; + return -ENOENT; + } + + card = snd_card_new(index[dev], id[dev], THIS_MODULE, 0); + if (card == NULL) + return -ENOMEM; + + err = snd_lx6464es_create(card, pci, &chip); + if (err < 0) { + snd_printk(KERN_ERR LXP "error during snd_lx6464es_create\n"); + goto out_free; + } + + strcpy(card->driver, "lx6464es"); + strcpy(card->shortname, "Digigram LX6464ES"); + sprintf(card->longname, "%s at 0x%lx, 0x%p, irq %i", + card->shortname, chip->port_plx, + chip->port_dsp_bar, chip->irq); + + err = snd_card_register(card); + if (err < 0) + goto out_free; + + snd_printdd(LXP "initialization successful\n"); + pci_set_drvdata(pci, card); + dev++; + return 0; + +out_free: + snd_card_free(card); + return err; + +} + +static void __devexit snd_lx6464es_remove(struct pci_dev *pci) +{ + snd_card_free(pci_get_drvdata(pci)); + pci_set_drvdata(pci, NULL); +} + + +static struct pci_driver driver = { + .name = "Digigram LX6464ES", + .id_table = snd_lx6464es_ids, + .probe = snd_lx6464es_probe, + .remove = __devexit_p(snd_lx6464es_remove), +}; + + +/* module initialization */ +static int __init mod_init(void) +{ + return pci_register_driver(&driver); +} + +static void __exit mod_exit(void) +{ + pci_unregister_driver(&driver); +} + +module_init(mod_init); +module_exit(mod_exit); diff --git a/sound/pci/lx6464es/lx6464es.h b/sound/pci/lx6464es/lx6464es.h new file mode 100644 index 00000000000..012c010c8c8 --- /dev/null +++ b/sound/pci/lx6464es/lx6464es.h @@ -0,0 +1,114 @@ +/* -*- linux-c -*- * + * + * ALSA driver for the digigram lx6464es interface + * + * Copyright (c) 2009 Tim Blechmann + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + * + */ + +#ifndef LX6464ES_H +#define LX6464ES_H + +#include +#include + +#include +#include + +#include "lx_core.h" + +#define LXP "LX6464ES: " + +enum { + ES_cmd_free = 0, /* no command executing */ + ES_cmd_processing = 1, /* execution of a read/write command */ + ES_read_pending = 2, /* a asynchron read command is pending */ + ES_read_finishing = 3, /* a read command has finished waiting (set by + * Interrupt or CancelIrp) */ +}; + +enum lx_stream_status { + LX_STREAM_STATUS_FREE, +/* LX_STREAM_STATUS_OPEN, */ + LX_STREAM_STATUS_SCHEDULE_RUN, +/* LX_STREAM_STATUS_STARTED, */ + LX_STREAM_STATUS_RUNNING, + LX_STREAM_STATUS_SCHEDULE_STOP, +/* LX_STREAM_STATUS_STOPPED, */ +/* LX_STREAM_STATUS_PAUSED */ +}; + + +struct lx_stream { + struct snd_pcm_substream *stream; + snd_pcm_uframes_t frame_pos; + enum lx_stream_status status; /* free, open, running, draining + * pause */ + int is_capture:1; +}; + + +struct lx6464es { + struct snd_card *card; + struct pci_dev *pci; + int irq; + + spinlock_t lock; /* interrupt spinlock */ + struct mutex setup_mutex; /* mutex used in hw_params, open + * and close */ + + struct tasklet_struct trigger_tasklet; /* trigger tasklet */ + struct tasklet_struct tasklet_capture; + struct tasklet_struct tasklet_playback; + + /* ports */ + unsigned long port_plx; /* io port (size=256) */ + void __iomem *port_plx_remapped; /* remapped plx port */ + void __iomem *port_dsp_bar; /* memory port (32-bit, + * non-prefetchable, + * size=8K) */ + + /* messaging */ + spinlock_t msg_lock; /* message spinlock */ + atomic_t send_message_locked; + struct lx_rmh rmh; + + /* configuration */ + uint freq_ratio : 2; + uint playback_mute : 1; + uint hardware_running[2]; + u32 board_sample_rate; /* sample rate read from + * board */ + u32 sample_rate; /* our sample rate */ + u16 pcm_granularity; /* board blocksize */ + + /* dma */ + struct snd_dma_buffer capture_dma_buf; + struct snd_dma_buffer playback_dma_buf; + + /* pcm */ + struct snd_pcm *pcm; + + /* streams */ + struct lx_stream capture_stream; + struct lx_stream playback_stream; +}; + + +#endif /* LX6464ES_H */ diff --git a/sound/pci/lx6464es/lx_core.c b/sound/pci/lx6464es/lx_core.c new file mode 100644 index 00000000000..a9f8f882b10 --- /dev/null +++ b/sound/pci/lx6464es/lx_core.c @@ -0,0 +1,1442 @@ +/* -*- linux-c -*- * + * + * ALSA driver for the digigram lx6464es interface + * low-level interface + * + * Copyright (c) 2009 Tim Blechmann + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + * + */ + +/* #define RMH_DEBUG 1 */ + +#include +#include +#include + +#include "lx6464es.h" +#include "lx_core.h" + +/* low-level register access */ + +static const unsigned long dsp_port_offsets[] = { + 0, + 0x400, + 0x401, + 0x402, + 0x403, + 0x404, + 0x405, + 0x406, + 0x407, + 0x408, + 0x409, + 0x40a, + 0x40b, + 0x40c, + + 0x410, + 0x411, + 0x412, + 0x413, + 0x414, + 0x415, + 0x416, + + 0x420, + 0x430, + 0x431, + 0x432, + 0x433, + 0x434, + 0x440 +}; + +static void __iomem *lx_dsp_register(struct lx6464es *chip, int port) +{ + void __iomem *base_address = chip->port_dsp_bar; + return base_address + dsp_port_offsets[port]*4; +} + +unsigned long lx_dsp_reg_read(struct lx6464es *chip, int port) +{ + void __iomem *address = lx_dsp_register(chip, port); + return ioread32(address); +} + +void lx_dsp_reg_readbuf(struct lx6464es *chip, int port, u32 *data, u32 len) +{ + void __iomem *address = lx_dsp_register(chip, port); + memcpy_fromio(data, address, len*sizeof(u32)); +} + + +void lx_dsp_reg_write(struct lx6464es *chip, int port, unsigned data) +{ + void __iomem *address = lx_dsp_register(chip, port); + iowrite32(data, address); +} + +void lx_dsp_reg_writebuf(struct lx6464es *chip, int port, const u32 *data, + u32 len) +{ + void __iomem *address = lx_dsp_register(chip, port); + memcpy_toio(address, data, len*sizeof(u32)); +} + + +static const unsigned long plx_port_offsets[] = { + 0x04, + 0x40, + 0x44, + 0x48, + 0x4c, + 0x50, + 0x54, + 0x58, + 0x5c, + 0x64, + 0x68, + 0x6C +}; + +static void __iomem *lx_plx_register(struct lx6464es *chip, int port) +{ + void __iomem *base_address = chip->port_plx_remapped; + return base_address + plx_port_offsets[port]; +} + +unsigned long lx_plx_reg_read(struct lx6464es *chip, int port) +{ + void __iomem *address = lx_plx_register(chip, port); + return ioread32(address); +} + +void lx_plx_reg_write(struct lx6464es *chip, int port, u32 data) +{ + void __iomem *address = lx_plx_register(chip, port); + iowrite32(data, address); +} + +u32 lx_plx_mbox_read(struct lx6464es *chip, int mbox_nr) +{ + int index; + + switch (mbox_nr) { + case 1: + index = ePLX_MBOX1; break; + case 2: + index = ePLX_MBOX2; break; + case 3: + index = ePLX_MBOX3; break; + case 4: + index = ePLX_MBOX4; break; + case 5: + index = ePLX_MBOX5; break; + case 6: + index = ePLX_MBOX6; break; + case 7: + index = ePLX_MBOX7; break; + case 0: /* reserved for HF flags */ + snd_BUG(); + default: + return 0xdeadbeef; + } + + return lx_plx_reg_read(chip, index); +} + +int lx_plx_mbox_write(struct lx6464es *chip, int mbox_nr, u32 value) +{ + int index = -1; + + switch (mbox_nr) { + case 1: + index = ePLX_MBOX1; break; + case 3: + index = ePLX_MBOX3; break; + case 4: + index = ePLX_MBOX4; break; + case 5: + index = ePLX_MBOX5; break; + case 6: + index = ePLX_MBOX6; break; + case 7: + index = ePLX_MBOX7; break; + case 0: /* reserved for HF flags */ + case 2: /* reserved for Pipe States + * the DSP keeps an image of it */ + snd_BUG(); + return -EBADRQC; + } + + lx_plx_reg_write(chip, index, value); + return 0; +} + + +/* rmh */ + +#ifdef CONFIG_SND_DEBUG +#define CMD_NAME(a) a +#else +#define CMD_NAME(a) NULL +#endif + +#define Reg_CSM_MR 0x00000002 +#define Reg_CSM_MC 0x00000001 + +struct dsp_cmd_info { + u32 dcCodeOp; /* Op Code of the command (usually 1st 24-bits + * word).*/ + u16 dcCmdLength; /* Command length in words of 24 bits.*/ + u16 dcStatusType; /* Status type: 0 for fixed length, 1 for + * random. */ + u16 dcStatusLength; /* Status length (if fixed).*/ + char *dcOpName; +}; + +/* + Initialization and control data for the Microblaze interface + - OpCode: + the opcode field of the command set at the proper offset + - CmdLength + the number of command words + - StatusType + offset in the status registers: 0 means that the return value may be + different from 0, and must be read + - StatusLength + the number of status words (in addition to the return value) +*/ + +static struct dsp_cmd_info dsp_commands[] = +{ + { (CMD_00_INFO_DEBUG << OPCODE_OFFSET) , 1 /*custom*/ + , 1 , 0 /**/ , CMD_NAME("INFO_DEBUG") }, + { (CMD_01_GET_SYS_CFG << OPCODE_OFFSET) , 1 /**/ + , 1 , 2 /**/ , CMD_NAME("GET_SYS_CFG") }, + { (CMD_02_SET_GRANULARITY << OPCODE_OFFSET) , 1 /**/ + , 1 , 0 /**/ , CMD_NAME("SET_GRANULARITY") }, + { (CMD_03_SET_TIMER_IRQ << OPCODE_OFFSET) , 1 /**/ + , 1 , 0 /**/ , CMD_NAME("SET_TIMER_IRQ") }, + { (CMD_04_GET_EVENT << OPCODE_OFFSET) , 1 /**/ + , 1 , 0 /*up to 10*/ , CMD_NAME("GET_EVENT") }, + { (CMD_05_GET_PIPES << OPCODE_OFFSET) , 1 /**/ + , 1 , 2 /*up to 4*/ , CMD_NAME("GET_PIPES") }, + { (CMD_06_ALLOCATE_PIPE << OPCODE_OFFSET) , 1 /**/ + , 0 , 0 /**/ , CMD_NAME("ALLOCATE_PIPE") }, + { (CMD_07_RELEASE_PIPE << OPCODE_OFFSET) , 1 /**/ + , 0 , 0 /**/ , CMD_NAME("RELEASE_PIPE") }, + { (CMD_08_ASK_BUFFERS << OPCODE_OFFSET) , 1 /**/ + , 1 , MAX_STREAM_BUFFER , CMD_NAME("ASK_BUFFERS") }, + { (CMD_09_STOP_PIPE << OPCODE_OFFSET) , 1 /**/ + , 0 , 0 /*up to 2*/ , CMD_NAME("STOP_PIPE") }, + { (CMD_0A_GET_PIPE_SPL_COUNT << OPCODE_OFFSET) , 1 /**/ + , 1 , 1 /*up to 2*/ , CMD_NAME("GET_PIPE_SPL_COUNT") }, + { (CMD_0B_TOGGLE_PIPE_STATE << OPCODE_OFFSET) , 1 /*up to 5*/ + , 1 , 0 /**/ , CMD_NAME("TOGGLE_PIPE_STATE") }, + { (CMD_0C_DEF_STREAM << OPCODE_OFFSET) , 1 /*up to 4*/ + , 1 , 0 /**/ , CMD_NAME("DEF_STREAM") }, + { (CMD_0D_SET_MUTE << OPCODE_OFFSET) , 3 /**/ + , 1 , 0 /**/ , CMD_NAME("SET_MUTE") }, + { (CMD_0E_GET_STREAM_SPL_COUNT << OPCODE_OFFSET) , 1/**/ + , 1 , 2 /**/ , CMD_NAME("GET_STREAM_SPL_COUNT") }, + { (CMD_0F_UPDATE_BUFFER << OPCODE_OFFSET) , 3 /*up to 4*/ + , 0 , 1 /**/ , CMD_NAME("UPDATE_BUFFER") }, + { (CMD_10_GET_BUFFER << OPCODE_OFFSET) , 1 /**/ + , 1 , 4 /**/ , CMD_NAME("GET_BUFFER") }, + { (CMD_11_CANCEL_BUFFER << OPCODE_OFFSET) , 1 /**/ + , 1 , 1 /*up to 4*/ , CMD_NAME("CANCEL_BUFFER") }, + { (CMD_12_GET_PEAK << OPCODE_OFFSET) , 1 /**/ + , 1 , 1 /**/ , CMD_NAME("GET_PEAK") }, + { (CMD_13_SET_STREAM_STATE << OPCODE_OFFSET) , 1 /**/ + , 1 , 0 /**/ , CMD_NAME("SET_STREAM_STATE") }, +}; + +static void lx_message_init(struct lx_rmh *rmh, enum cmd_mb_opcodes cmd) +{ + snd_BUG_ON(cmd >= CMD_14_INVALID); + + rmh->cmd[0] = dsp_commands[cmd].dcCodeOp; + rmh->cmd_len = dsp_commands[cmd].dcCmdLength; + rmh->stat_len = dsp_commands[cmd].dcStatusLength; + rmh->dsp_stat = dsp_commands[cmd].dcStatusType; + rmh->cmd_idx = cmd; + memset(&rmh->cmd[1], 0, (REG_CRM_NUMBER - 1) * sizeof(u32)); + +#ifdef CONFIG_SND_DEBUG + memset(rmh->stat, 0, REG_CRM_NUMBER * sizeof(u32)); +#endif +#ifdef RMH_DEBUG + rmh->cmd_idx = cmd; +#endif +} + +#ifdef RMH_DEBUG +#define LXRMH "lx6464es rmh: " +static void lx_message_dump(struct lx_rmh *rmh) +{ + u8 idx = rmh->cmd_idx; + int i; + + snd_printk(LXRMH "command %s\n", dsp_commands[idx].dcOpName); + + for (i = 0; i != rmh->cmd_len; ++i) + snd_printk(LXRMH "\tcmd[%d] %08x\n", i, rmh->cmd[i]); + + for (i = 0; i != rmh->stat_len; ++i) + snd_printk(LXRMH "\tstat[%d]: %08x\n", i, rmh->stat[i]); + snd_printk("\n"); +} +#else +static inline void lx_message_dump(struct lx_rmh *rmh) +{} +#endif + + + +/* sleep 500 - 100 = 400 times 100us -> the timeout is >= 40 ms */ +#define XILINX_TIMEOUT_MS 40 +#define XILINX_POLL_NO_SLEEP 100 +#define XILINX_POLL_ITERATIONS 150 + +static int lx_message_send(struct lx6464es *chip, struct lx_rmh *rmh) +{ + u32 reg = ED_DSP_TIMED_OUT; + int dwloop; + int answer_received; + + if (lx_dsp_reg_read(chip, eReg_CSM) & (Reg_CSM_MC | Reg_CSM_MR)) { + snd_printk(KERN_ERR LXP "PIOSendMessage eReg_CSM %x\n", reg); + return -EBUSY; + } + + /* write command */ + lx_dsp_reg_writebuf(chip, eReg_CRM1, rmh->cmd, rmh->cmd_len); + + snd_BUG_ON(atomic_read(&chip->send_message_locked) != 0); + atomic_set(&chip->send_message_locked, 1); + + /* MicoBlaze gogogo */ + lx_dsp_reg_write(chip, eReg_CSM, Reg_CSM_MC); + + /* wait for interrupt to answer */ + for (dwloop = 0; dwloop != XILINX_TIMEOUT_MS; ++dwloop) { + answer_received = atomic_read(&chip->send_message_locked); + if (answer_received == 0) + break; + msleep(1); + } + + if (answer_received == 0) { + /* in Debug mode verify Reg_CSM_MR */ + snd_BUG_ON(!(lx_dsp_reg_read(chip, eReg_CSM) & Reg_CSM_MR)); + + /* command finished, read status */ + if (rmh->dsp_stat == 0) + reg = lx_dsp_reg_read(chip, eReg_CRM1); + else + reg = 0; + } else { + int i; + snd_printk(KERN_WARNING LXP "TIMEOUT lx_message_send! " + "Interrupts disabled?\n"); + + /* attente bit Reg_CSM_MR */ + for (i = 0; i != XILINX_POLL_ITERATIONS; i++) { + if ((lx_dsp_reg_read(chip, eReg_CSM) & Reg_CSM_MR)) { + if (rmh->dsp_stat == 0) + reg = lx_dsp_reg_read(chip, eReg_CRM1); + else + reg = 0; + goto polling_successful; + } + + if (i > XILINX_POLL_NO_SLEEP) + msleep(1); + } + snd_printk(KERN_WARNING LXP "TIMEOUT lx_message_send! " + "polling failed\n"); + +polling_successful: + atomic_set(&chip->send_message_locked, 0); + } + + if ((reg & ERROR_VALUE) == 0) { + /* read response */ + if (rmh->stat_len) { + snd_BUG_ON(rmh->stat_len >= (REG_CRM_NUMBER-1)); + + lx_dsp_reg_readbuf(chip, eReg_CRM2, rmh->stat, + rmh->stat_len); + } + } else + snd_printk(KERN_WARNING LXP "lx_message_send: error_value %x\n", + reg); + + /* clear Reg_CSM_MR */ + lx_dsp_reg_write(chip, eReg_CSM, 0); + + switch (reg) { + case ED_DSP_TIMED_OUT: + snd_printk(KERN_WARNING LXP "lx_message_send: dsp timeout\n"); + return -ETIMEDOUT; + + case ED_DSP_CRASHED: + snd_printk(KERN_WARNING LXP "lx_message_send: dsp crashed\n"); + return -EAGAIN; + } + + lx_message_dump(rmh); + return 0; +} + +static int lx_message_send_atomic(struct lx6464es *chip, struct lx_rmh *rmh) +{ + u32 reg = ED_DSP_TIMED_OUT; + int dwloop; + + if (lx_dsp_reg_read(chip, eReg_CSM) & (Reg_CSM_MC | Reg_CSM_MR)) { + snd_printk(KERN_ERR LXP "PIOSendMessage eReg_CSM %x\n", reg); + return -EBUSY; + } + + /* write command */ + lx_dsp_reg_writebuf(chip, eReg_CRM1, rmh->cmd, rmh->cmd_len); + + /* MicoBlaze gogogo */ + lx_dsp_reg_write(chip, eReg_CSM, Reg_CSM_MC); + + /* wait for interrupt to answer */ + for (dwloop = 0; dwloop != XILINX_TIMEOUT_MS * 1000; ++dwloop) { + if (lx_dsp_reg_read(chip, eReg_CSM) & Reg_CSM_MR) { + if (rmh->dsp_stat == 0) + reg = lx_dsp_reg_read(chip, eReg_CRM1); + else + reg = 0; + goto polling_successful; + } else + udelay(1); + } + snd_printk(KERN_WARNING LXP "TIMEOUT lx_message_send_atomic! " + "polling failed\n"); + +polling_successful: + if ((reg & ERROR_VALUE) == 0) { + /* read response */ + if (rmh->stat_len) { + snd_BUG_ON(rmh->stat_len >= (REG_CRM_NUMBER-1)); + lx_dsp_reg_readbuf(chip, eReg_CRM2, rmh->stat, + rmh->stat_len); + } + } else + snd_printk(LXP "rmh error: %08x\n", reg); + + /* clear Reg_CSM_MR */ + lx_dsp_reg_write(chip, eReg_CSM, 0); + + switch (reg) { + case ED_DSP_TIMED_OUT: + snd_printk(KERN_WARNING LXP "lx_message_send: dsp timeout\n"); + return -ETIMEDOUT; + + case ED_DSP_CRASHED: + snd_printk(KERN_WARNING LXP "lx_message_send: dsp crashed\n"); + return -EAGAIN; + } + + lx_message_dump(rmh); + + return reg; +} + + +/* low-level dsp access */ +int __devinit lx_dsp_get_version(struct lx6464es *chip, u32 *rdsp_version) +{ + u16 ret; + unsigned long flags; + + spin_lock_irqsave(&chip->msg_lock, flags); + + lx_message_init(&chip->rmh, CMD_01_GET_SYS_CFG); + ret = lx_message_send_atomic(chip, &chip->rmh); + + *rdsp_version = chip->rmh.stat[1]; + spin_unlock_irqrestore(&chip->msg_lock, flags); + return ret; +} + +int lx_dsp_get_clock_frequency(struct lx6464es *chip, u32 *rfreq) +{ + u16 ret = 0; + unsigned long flags; + u32 freq_raw = 0; + u32 freq = 0; + u32 frequency = 0; + + spin_lock_irqsave(&chip->msg_lock, flags); + + lx_message_init(&chip->rmh, CMD_01_GET_SYS_CFG); + ret = lx_message_send_atomic(chip, &chip->rmh); + + if (ret == 0) { + freq_raw = chip->rmh.stat[0] >> FREQ_FIELD_OFFSET; + freq = freq_raw & XES_FREQ_COUNT8_MASK; + + if ((freq < XES_FREQ_COUNT8_48_MAX) || + (freq > XES_FREQ_COUNT8_44_MIN)) + frequency = 0; /* unknown */ + else if (freq >= XES_FREQ_COUNT8_44_MAX) + frequency = 44100; + else + frequency = 48000; + } + + spin_unlock_irqrestore(&chip->msg_lock, flags); + + *rfreq = frequency * chip->freq_ratio; + + return ret; +} + +int lx_dsp_get_mac(struct lx6464es *chip, u8 *mac_address) +{ + u32 macmsb, maclsb; + + macmsb = lx_dsp_reg_read(chip, eReg_ADMACESMSB) & 0x00FFFFFF; + maclsb = lx_dsp_reg_read(chip, eReg_ADMACESLSB) & 0x00FFFFFF; + + /* todo: endianess handling */ + mac_address[5] = ((u8 *)(&maclsb))[0]; + mac_address[4] = ((u8 *)(&maclsb))[1]; + mac_address[3] = ((u8 *)(&maclsb))[2]; + mac_address[2] = ((u8 *)(&macmsb))[0]; + mac_address[1] = ((u8 *)(&macmsb))[1]; + mac_address[0] = ((u8 *)(&macmsb))[2]; + + return 0; +} + + +int lx_dsp_set_granularity(struct lx6464es *chip, u32 gran) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&chip->msg_lock, flags); + + lx_message_init(&chip->rmh, CMD_02_SET_GRANULARITY); + chip->rmh.cmd[0] |= gran; + + ret = lx_message_send_atomic(chip, &chip->rmh); + spin_unlock_irqrestore(&chip->msg_lock, flags); + return ret; +} + +int lx_dsp_read_async_events(struct lx6464es *chip, u32 *data) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&chip->msg_lock, flags); + + lx_message_init(&chip->rmh, CMD_04_GET_EVENT); + chip->rmh.stat_len = 9; /* we don't necessarily need the full length */ + + ret = lx_message_send_atomic(chip, &chip->rmh); + + if (!ret) + memcpy(data, chip->rmh.stat, chip->rmh.stat_len * sizeof(u32)); + + spin_unlock_irqrestore(&chip->msg_lock, flags); + return ret; +} + +#define CSES_TIMEOUT 100 /* microseconds */ +#define CSES_CE 0x0001 +#define CSES_BROADCAST 0x0002 +#define CSES_UPDATE_LDSV 0x0004 + +int lx_dsp_es_check_pipeline(struct lx6464es *chip) +{ + int i; + + for (i = 0; i != CSES_TIMEOUT; ++i) { + /* + * le bit CSES_UPDATE_LDSV est à 1 dés que le macprog + * est pret. il re-passe à 0 lorsque le premier read a + * été fait. pour l'instant on retire le test car ce bit + * passe a 1 environ 200 à 400 ms aprés que le registre + * confES à été écrit (kick du xilinx ES). + * + * On ne teste que le bit CE. + * */ + + u32 cses = lx_dsp_reg_read(chip, eReg_CSES); + + if ((cses & CSES_CE) == 0) + return 0; + + udelay(1); + } + + return -ETIMEDOUT; +} + + +#define PIPE_INFO_TO_CMD(capture, pipe) \ + ((u32)((u32)(pipe) | ((capture) ? ID_IS_CAPTURE : 0L)) << ID_OFFSET) + + + +/* low-level pipe handling */ +int lx_pipe_allocate(struct lx6464es *chip, u32 pipe, int is_capture, + int channels) +{ + int err; + unsigned long flags; + + u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe); + + spin_lock_irqsave(&chip->msg_lock, flags); + lx_message_init(&chip->rmh, CMD_06_ALLOCATE_PIPE); + + chip->rmh.cmd[0] |= pipe_cmd; + chip->rmh.cmd[0] |= channels; + + err = lx_message_send_atomic(chip, &chip->rmh); + spin_unlock_irqrestore(&chip->msg_lock, flags); + + if (err != 0) + snd_printk(KERN_ERR "lx6464es: could not allocate pipe\n"); + + return err; +} + +int lx_pipe_release(struct lx6464es *chip, u32 pipe, int is_capture) +{ + int err; + unsigned long flags; + + u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe); + + spin_lock_irqsave(&chip->msg_lock, flags); + lx_message_init(&chip->rmh, CMD_07_RELEASE_PIPE); + + chip->rmh.cmd[0] |= pipe_cmd; + + err = lx_message_send_atomic(chip, &chip->rmh); + spin_unlock_irqrestore(&chip->msg_lock, flags); + + return err; +} + +int lx_buffer_ask(struct lx6464es *chip, u32 pipe, int is_capture, + u32 *r_needed, u32 *r_freed, u32 *size_array) +{ + int err; + unsigned long flags; + + u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe); + +#ifdef CONFIG_SND_DEBUG + if (size_array) + memset(size_array, 0, sizeof(u32)*MAX_STREAM_BUFFER); +#endif + + *r_needed = 0; + *r_freed = 0; + + spin_lock_irqsave(&chip->msg_lock, flags); + lx_message_init(&chip->rmh, CMD_08_ASK_BUFFERS); + + chip->rmh.cmd[0] |= pipe_cmd; + + err = lx_message_send_atomic(chip, &chip->rmh); + + if (!err) { + int i; + for (i = 0; i < MAX_STREAM_BUFFER; ++i) { + u32 stat = chip->rmh.stat[i]; + if (stat & (BF_EOB << BUFF_FLAGS_OFFSET)) { + /* finished */ + *r_freed += 1; + if (size_array) + size_array[i] = stat & MASK_DATA_SIZE; + } else if ((stat & (BF_VALID << BUFF_FLAGS_OFFSET)) + == 0) + /* free */ + *r_needed += 1; + } + +#if 0 + snd_printdd(LXP "CMD_08_ASK_BUFFERS: needed %d, freed %d\n", + *r_needed, *r_freed); + for (i = 0; i < MAX_STREAM_BUFFER; ++i) { + for (i = 0; i != chip->rmh.stat_len; ++i) + snd_printdd(" stat[%d]: %x, %x\n", i, + chip->rmh.stat[i], + chip->rmh.stat[i] & MASK_DATA_SIZE); + } +#endif + } + + spin_unlock_irqrestore(&chip->msg_lock, flags); + return err; +} + + +int lx_pipe_stop(struct lx6464es *chip, u32 pipe, int is_capture) +{ + int err; + unsigned long flags; + + u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe); + + spin_lock_irqsave(&chip->msg_lock, flags); + lx_message_init(&chip->rmh, CMD_09_STOP_PIPE); + + chip->rmh.cmd[0] |= pipe_cmd; + + err = lx_message_send_atomic(chip, &chip->rmh); + + spin_unlock_irqrestore(&chip->msg_lock, flags); + return err; +} + +static int lx_pipe_toggle_state(struct lx6464es *chip, u32 pipe, int is_capture) +{ + int err; + unsigned long flags; + + u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe); + + spin_lock_irqsave(&chip->msg_lock, flags); + lx_message_init(&chip->rmh, CMD_0B_TOGGLE_PIPE_STATE); + + chip->rmh.cmd[0] |= pipe_cmd; + + err = lx_message_send_atomic(chip, &chip->rmh); + + spin_unlock_irqrestore(&chip->msg_lock, flags); + return err; +} + + +int lx_pipe_start(struct lx6464es *chip, u32 pipe, int is_capture) +{ + int err; + + err = lx_pipe_wait_for_idle(chip, pipe, is_capture); + if (err < 0) + return err; + + err = lx_pipe_toggle_state(chip, pipe, is_capture); + + return err; +} + +int lx_pipe_pause(struct lx6464es *chip, u32 pipe, int is_capture) +{ + int err = 0; + + err = lx_pipe_wait_for_start(chip, pipe, is_capture); + if (err < 0) + return err; + + err = lx_pipe_toggle_state(chip, pipe, is_capture); + + return err; +} + + +int lx_pipe_sample_count(struct lx6464es *chip, u32 pipe, int is_capture, + u64 *rsample_count) +{ + int err; + unsigned long flags; + + u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe); + + spin_lock_irqsave(&chip->msg_lock, flags); + lx_message_init(&chip->rmh, CMD_0A_GET_PIPE_SPL_COUNT); + + chip->rmh.cmd[0] |= pipe_cmd; + chip->rmh.stat_len = 2; /* need all words here! */ + + err = lx_message_send_atomic(chip, &chip->rmh); /* don't sleep! */ + + if (err != 0) + snd_printk(KERN_ERR + "lx6464es: could not query pipe's sample count\n"); + else { + *rsample_count = ((u64)(chip->rmh.stat[0] & MASK_SPL_COUNT_HI) + << 24) /* hi part */ + + chip->rmh.stat[1]; /* lo part */ + } + + spin_unlock_irqrestore(&chip->msg_lock, flags); + return err; +} + +int lx_pipe_state(struct lx6464es *chip, u32 pipe, int is_capture, u16 *rstate) +{ + int err; + unsigned long flags; + + u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe); + + spin_lock_irqsave(&chip->msg_lock, flags); + lx_message_init(&chip->rmh, CMD_0A_GET_PIPE_SPL_COUNT); + + chip->rmh.cmd[0] |= pipe_cmd; + + err = lx_message_send_atomic(chip, &chip->rmh); + + if (err != 0) + snd_printk(KERN_ERR "lx6464es: could not query pipe's state\n"); + else + *rstate = (chip->rmh.stat[0] >> PSTATE_OFFSET) & 0x0F; + + spin_unlock_irqrestore(&chip->msg_lock, flags); + return err; +} + +static int lx_pipe_wait_for_state(struct lx6464es *chip, u32 pipe, + int is_capture, u16 state) +{ + int i; + + /* max 2*PCMOnlyGranularity = 2*1024 at 44100 = < 50 ms: + * timeout 50 ms */ + for (i = 0; i != 50; ++i) { + u16 current_state; + int err = lx_pipe_state(chip, pipe, is_capture, ¤t_state); + + if (err < 0) + return err; + + if (current_state == state) + return 0; + + mdelay(1); + } + + return -ETIMEDOUT; +} + +int lx_pipe_wait_for_start(struct lx6464es *chip, u32 pipe, int is_capture) +{ + return lx_pipe_wait_for_state(chip, pipe, is_capture, PSTATE_RUN); +} + +int lx_pipe_wait_for_idle(struct lx6464es *chip, u32 pipe, int is_capture) +{ + return lx_pipe_wait_for_state(chip, pipe, is_capture, PSTATE_IDLE); +} + +/* low-level stream handling */ +int lx_stream_set_state(struct lx6464es *chip, u32 pipe, + int is_capture, enum stream_state_t state) +{ + int err; + unsigned long flags; + + u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe); + + spin_lock_irqsave(&chip->msg_lock, flags); + lx_message_init(&chip->rmh, CMD_13_SET_STREAM_STATE); + + chip->rmh.cmd[0] |= pipe_cmd; + chip->rmh.cmd[0] |= state; + + err = lx_message_send_atomic(chip, &chip->rmh); + spin_unlock_irqrestore(&chip->msg_lock, flags); + + return err; +} + +int lx_stream_set_format(struct lx6464es *chip, struct snd_pcm_runtime *runtime, + u32 pipe, int is_capture) +{ + int err; + unsigned long flags; + + u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe); + + u32 channels = runtime->channels; + + if (runtime->channels != channels) + snd_printk(KERN_ERR LXP "channel count mismatch: %d vs %d", + runtime->channels, channels); + + spin_lock_irqsave(&chip->msg_lock, flags); + lx_message_init(&chip->rmh, CMD_0C_DEF_STREAM); + + chip->rmh.cmd[0] |= pipe_cmd; + + if (runtime->sample_bits == 16) + /* 16 bit format */ + chip->rmh.cmd[0] |= (STREAM_FMT_16b << STREAM_FMT_OFFSET); + + if (snd_pcm_format_little_endian(runtime->format)) + /* little endian/intel format */ + chip->rmh.cmd[0] |= (STREAM_FMT_intel << STREAM_FMT_OFFSET); + + chip->rmh.cmd[0] |= channels-1; + + err = lx_message_send_atomic(chip, &chip->rmh); + spin_unlock_irqrestore(&chip->msg_lock, flags); + + return err; +} + +int lx_stream_state(struct lx6464es *chip, u32 pipe, int is_capture, + int *rstate) +{ + int err; + unsigned long flags; + + u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe); + + spin_lock_irqsave(&chip->msg_lock, flags); + lx_message_init(&chip->rmh, CMD_0E_GET_STREAM_SPL_COUNT); + + chip->rmh.cmd[0] |= pipe_cmd; + + err = lx_message_send_atomic(chip, &chip->rmh); + + *rstate = (chip->rmh.stat[0] & SF_START) ? START_STATE : PAUSE_STATE; + + spin_unlock_irqrestore(&chip->msg_lock, flags); + return err; +} + +int lx_stream_sample_position(struct lx6464es *chip, u32 pipe, int is_capture, + u64 *r_bytepos) +{ + int err; + unsigned long flags; + + u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe); + + spin_lock_irqsave(&chip->msg_lock, flags); + lx_message_init(&chip->rmh, CMD_0E_GET_STREAM_SPL_COUNT); + + chip->rmh.cmd[0] |= pipe_cmd; + + err = lx_message_send_atomic(chip, &chip->rmh); + + *r_bytepos = ((u64) (chip->rmh.stat[0] & MASK_SPL_COUNT_HI) + << 32) /* hi part */ + + chip->rmh.stat[1]; /* lo part */ + + spin_unlock_irqrestore(&chip->msg_lock, flags); + return err; +} + +/* low-level buffer handling */ +int lx_buffer_give(struct lx6464es *chip, u32 pipe, int is_capture, + u32 buffer_size, u32 buf_address_lo, u32 buf_address_hi, + u32 *r_buffer_index) +{ + int err; + unsigned long flags; + + u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe); + + spin_lock_irqsave(&chip->msg_lock, flags); + lx_message_init(&chip->rmh, CMD_0F_UPDATE_BUFFER); + + chip->rmh.cmd[0] |= pipe_cmd; + chip->rmh.cmd[0] |= BF_NOTIFY_EOB; /* request interrupt notification */ + + /* todo: pause request, circular buffer */ + + chip->rmh.cmd[1] = buffer_size & MASK_DATA_SIZE; + chip->rmh.cmd[2] = buf_address_lo; + + if (buf_address_hi) { + chip->rmh.cmd_len = 4; + chip->rmh.cmd[3] = buf_address_hi; + chip->rmh.cmd[0] |= BF_64BITS_ADR; + } + + err = lx_message_send_atomic(chip, &chip->rmh); + + if (err == 0) { + *r_buffer_index = chip->rmh.stat[0]; + goto done; + } + + if (err == EB_RBUFFERS_TABLE_OVERFLOW) + snd_printk(LXP "lx_buffer_give EB_RBUFFERS_TABLE_OVERFLOW\n"); + + if (err == EB_INVALID_STREAM) + snd_printk(LXP "lx_buffer_give EB_INVALID_STREAM\n"); + + if (err == EB_CMD_REFUSED) + snd_printk(LXP "lx_buffer_give EB_CMD_REFUSED\n"); + + done: + spin_unlock_irqrestore(&chip->msg_lock, flags); + return err; +} + +int lx_buffer_free(struct lx6464es *chip, u32 pipe, int is_capture, + u32 *r_buffer_size) +{ + int err; + unsigned long flags; + + u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe); + + spin_lock_irqsave(&chip->msg_lock, flags); + lx_message_init(&chip->rmh, CMD_11_CANCEL_BUFFER); + + chip->rmh.cmd[0] |= pipe_cmd; + chip->rmh.cmd[0] |= MASK_BUFFER_ID; /* ask for the current buffer: the + * microblaze will seek for it */ + + err = lx_message_send_atomic(chip, &chip->rmh); + + if (err == 0) + *r_buffer_size = chip->rmh.stat[0] & MASK_DATA_SIZE; + + spin_unlock_irqrestore(&chip->msg_lock, flags); + return err; +} + +int lx_buffer_cancel(struct lx6464es *chip, u32 pipe, int is_capture, + u32 buffer_index) +{ + int err; + unsigned long flags; + + u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe); + + spin_lock_irqsave(&chip->msg_lock, flags); + lx_message_init(&chip->rmh, CMD_11_CANCEL_BUFFER); + + chip->rmh.cmd[0] |= pipe_cmd; + chip->rmh.cmd[0] |= buffer_index; + + err = lx_message_send_atomic(chip, &chip->rmh); + + spin_unlock_irqrestore(&chip->msg_lock, flags); + return err; +} + + +/* low-level gain/peak handling + * + * \todo: can we unmute capture/playback channels independently? + * + * */ +int lx_level_unmute(struct lx6464es *chip, int is_capture, int unmute) +{ + int err; + unsigned long flags; + + /* bit set to 1: channel muted */ + u64 mute_mask = unmute ? 0 : 0xFFFFFFFFFFFFFFFFLLU; + + spin_lock_irqsave(&chip->msg_lock, flags); + lx_message_init(&chip->rmh, CMD_0D_SET_MUTE); + + chip->rmh.cmd[0] |= PIPE_INFO_TO_CMD(is_capture, 0); + + chip->rmh.cmd[1] = (u32)(mute_mask >> (u64)32); /* hi part */ + chip->rmh.cmd[2] = (u32)(mute_mask & (u64)0xFFFFFFFF); /* lo part */ + + snd_printk("mute %x %x %x\n", chip->rmh.cmd[0], chip->rmh.cmd[1], + chip->rmh.cmd[2]); + + err = lx_message_send_atomic(chip, &chip->rmh); + + spin_unlock_irqrestore(&chip->msg_lock, flags); + return err; +} + +static u32 peak_map[] = { + 0x00000109, /* -90.308dB */ + 0x0000083B, /* -72.247dB */ + 0x000020C4, /* -60.205dB */ + 0x00008273, /* -48.030dB */ + 0x00020756, /* -36.005dB */ + 0x00040C37, /* -30.001dB */ + 0x00081385, /* -24.002dB */ + 0x00101D3F, /* -18.000dB */ + 0x0016C310, /* -15.000dB */ + 0x002026F2, /* -12.001dB */ + 0x002D6A86, /* -9.000dB */ + 0x004026E6, /* -6.004dB */ + 0x005A9DF6, /* -3.000dB */ + 0x0065AC8B, /* -2.000dB */ + 0x00721481, /* -1.000dB */ + 0x007FFFFF, /* FS */ +}; + +int lx_level_peaks(struct lx6464es *chip, int is_capture, int channels, + u32 *r_levels) +{ + int err = 0; + unsigned long flags; + int i; + spin_lock_irqsave(&chip->msg_lock, flags); + + for (i = 0; i < channels; i += 4) { + u32 s0, s1, s2, s3; + + lx_message_init(&chip->rmh, CMD_12_GET_PEAK); + chip->rmh.cmd[0] |= PIPE_INFO_TO_CMD(is_capture, i); + + err = lx_message_send_atomic(chip, &chip->rmh); + + if (err == 0) { + s0 = peak_map[chip->rmh.stat[0] & 0x0F]; + s1 = peak_map[(chip->rmh.stat[0] >> 4) & 0xf]; + s2 = peak_map[(chip->rmh.stat[0] >> 8) & 0xf]; + s3 = peak_map[(chip->rmh.stat[0] >> 12) & 0xf]; + } else + s0 = s1 = s2 = s3 = 0; + + r_levels[0] = s0; + r_levels[1] = s1; + r_levels[2] = s2; + r_levels[3] = s3; + + r_levels += 4; + } + + spin_unlock_irqrestore(&chip->msg_lock, flags); + return err; +} + +/* interrupt handling */ +#define PCX_IRQ_NONE 0 +#define IRQCS_ACTIVE_PCIDB 0x00002000L /* Bit nø 13 */ +#define IRQCS_ENABLE_PCIIRQ 0x00000100L /* Bit nø 08 */ +#define IRQCS_ENABLE_PCIDB 0x00000200L /* Bit nø 09 */ + +static u32 lx_interrupt_test_ack(struct lx6464es *chip) +{ + u32 irqcs = lx_plx_reg_read(chip, ePLX_IRQCS); + + /* Test if PCI Doorbell interrupt is active */ + if (irqcs & IRQCS_ACTIVE_PCIDB) { + u32 temp; + irqcs = PCX_IRQ_NONE; + + while ((temp = lx_plx_reg_read(chip, ePLX_L2PCIDB))) { + /* RAZ interrupt */ + irqcs |= temp; + lx_plx_reg_write(chip, ePLX_L2PCIDB, temp); + } + + return irqcs; + } + return PCX_IRQ_NONE; +} + +static int lx_interrupt_ack(struct lx6464es *chip, u32 *r_irqsrc, + int *r_async_pending, int *r_async_escmd) +{ + u32 irq_async; + u32 irqsrc = lx_interrupt_test_ack(chip); + + if (irqsrc == PCX_IRQ_NONE) + return 0; + + *r_irqsrc = irqsrc; + + irq_async = irqsrc & MASK_SYS_ASYNC_EVENTS; /* + EtherSound response + * (set by xilinx) + EOB */ + + if (irq_async & MASK_SYS_STATUS_ESA) { + irq_async &= ~MASK_SYS_STATUS_ESA; + *r_async_escmd = 1; + } + + if (irqsrc & MASK_SYS_STATUS_CMD_DONE) + /* xilinx command notification */ + atomic_set(&chip->send_message_locked, 0); + + if (irq_async) { + /* snd_printd("interrupt: async event pending\n"); */ + *r_async_pending = 1; + } + + return 1; +} + +static int lx_interrupt_handle_async_events(struct lx6464es *chip, u32 irqsrc, + int *r_freq_changed, + u64 *r_notified_in_pipe_mask, + u64 *r_notified_out_pipe_mask) +{ + int err; + u32 stat[9]; /* answer from CMD_04_GET_EVENT */ + + /* On peut optimiser pour ne pas lire les evenements vides + * les mots de réponse sont dans l'ordre suivant : + * Stat[0] mot de status général + * Stat[1] fin de buffer OUT pF + * Stat[2] fin de buffer OUT pf + * Stat[3] fin de buffer IN pF + * Stat[4] fin de buffer IN pf + * Stat[5] underrun poid fort + * Stat[6] underrun poid faible + * Stat[7] overrun poid fort + * Stat[8] overrun poid faible + * */ + + u64 orun_mask; + u64 urun_mask; +#if 0 + int has_underrun = (irqsrc & MASK_SYS_STATUS_URUN) ? 1 : 0; + int has_overrun = (irqsrc & MASK_SYS_STATUS_ORUN) ? 1 : 0; +#endif + int eb_pending_out = (irqsrc & MASK_SYS_STATUS_EOBO) ? 1 : 0; + int eb_pending_in = (irqsrc & MASK_SYS_STATUS_EOBI) ? 1 : 0; + + *r_freq_changed = (irqsrc & MASK_SYS_STATUS_FREQ) ? 1 : 0; + + err = lx_dsp_read_async_events(chip, stat); + if (err < 0) + return err; + + if (eb_pending_in) { + *r_notified_in_pipe_mask = ((u64)stat[3] << 32) + + stat[4]; + snd_printdd(LXP "interrupt: EOBI pending %llx\n", + *r_notified_in_pipe_mask); + } + if (eb_pending_out) { + *r_notified_out_pipe_mask = ((u64)stat[1] << 32) + + stat[2]; + snd_printdd(LXP "interrupt: EOBO pending %llx\n", + *r_notified_out_pipe_mask); + } + + orun_mask = ((u64)stat[7] << 32) + stat[8]; + urun_mask = ((u64)stat[5] << 32) + stat[6]; + + /* todo: handle xrun notification */ + + return err; +} + +static int lx_interrupt_request_new_buffer(struct lx6464es *chip, + struct lx_stream *lx_stream) +{ + struct snd_pcm_substream *substream = lx_stream->stream; + int is_capture = lx_stream->is_capture; + int err; + unsigned long flags; + + const u32 channels = substream->runtime->channels; + const u32 bytes_per_frame = channels * 3; + const u32 period_size = substream->runtime->period_size; + const u32 period_bytes = period_size * bytes_per_frame; + const u32 pos = lx_stream->frame_pos; + const u32 next_pos = ((pos+1) == substream->runtime->periods) ? + 0 : pos + 1; + + dma_addr_t buf = substream->dma_buffer.addr + pos * period_bytes; + u32 buf_hi = 0; + u32 buf_lo = 0; + u32 buffer_index = 0; + + u32 needed, freed; + u32 size_array[MAX_STREAM_BUFFER]; + + snd_printdd("->lx_interrupt_request_new_buffer\n"); + + spin_lock_irqsave(&chip->lock, flags); + + err = lx_buffer_ask(chip, 0, is_capture, &needed, &freed, size_array); + snd_printdd(LXP "interrupt: needed %d, freed %d\n", needed, freed); + + unpack_pointer(buf, &buf_lo, &buf_hi); + err = lx_buffer_give(chip, 0, is_capture, period_bytes, buf_lo, buf_hi, + &buffer_index); + snd_printdd(LXP "interrupt: gave buffer index %x on %p (%d bytes)\n", + buffer_index, (void *)buf, period_bytes); + + lx_stream->frame_pos = next_pos; + spin_unlock_irqrestore(&chip->lock, flags); + + return err; +} + +void lx_tasklet_playback(unsigned long data) +{ + struct lx6464es *chip = (struct lx6464es *)data; + struct lx_stream *lx_stream = &chip->playback_stream; + int err; + + snd_printdd("->lx_tasklet_playback\n"); + + err = lx_interrupt_request_new_buffer(chip, lx_stream); + if (err < 0) + snd_printk(KERN_ERR LXP + "cannot request new buffer for playback\n"); + + snd_pcm_period_elapsed(lx_stream->stream); +} + +void lx_tasklet_capture(unsigned long data) +{ + struct lx6464es *chip = (struct lx6464es *)data; + struct lx_stream *lx_stream = &chip->capture_stream; + int err; + + snd_printdd("->lx_tasklet_capture\n"); + err = lx_interrupt_request_new_buffer(chip, lx_stream); + if (err < 0) + snd_printk(KERN_ERR LXP + "cannot request new buffer for capture\n"); + + snd_pcm_period_elapsed(lx_stream->stream); +} + + + +static int lx_interrupt_handle_audio_transfer(struct lx6464es *chip, + u64 notified_in_pipe_mask, + u64 notified_out_pipe_mask) +{ + int err = 0; + + if (notified_in_pipe_mask) { + snd_printdd(LXP "requesting audio transfer for capture\n"); + tasklet_hi_schedule(&chip->tasklet_capture); + } + + if (notified_out_pipe_mask) { + snd_printdd(LXP "requesting audio transfer for playback\n"); + tasklet_hi_schedule(&chip->tasklet_playback); + } + + return err; +} + + +irqreturn_t lx_interrupt(int irq, void *dev_id) +{ + struct lx6464es *chip = dev_id; + int async_pending, async_escmd; + u32 irqsrc; + + spin_lock(&chip->lock); + + snd_printdd("**************************************************\n"); + + if (!lx_interrupt_ack(chip, &irqsrc, &async_pending, &async_escmd)) { + spin_unlock(&chip->lock); + snd_printdd("IRQ_NONE\n"); + return IRQ_NONE; /* this device did not cause the interrupt */ + } + + if (irqsrc & MASK_SYS_STATUS_CMD_DONE) + goto exit; + +#if 0 + if (irqsrc & MASK_SYS_STATUS_EOBI) + snd_printdd(LXP "interrupt: EOBI\n"); + + if (irqsrc & MASK_SYS_STATUS_EOBO) + snd_printdd(LXP "interrupt: EOBO\n"); + + if (irqsrc & MASK_SYS_STATUS_URUN) + snd_printdd(LXP "interrupt: URUN\n"); + + if (irqsrc & MASK_SYS_STATUS_ORUN) + snd_printdd(LXP "interrupt: ORUN\n"); +#endif + + if (async_pending) { + u64 notified_in_pipe_mask = 0; + u64 notified_out_pipe_mask = 0; + int freq_changed; + int err; + + /* handle async events */ + err = lx_interrupt_handle_async_events(chip, irqsrc, + &freq_changed, + ¬ified_in_pipe_mask, + ¬ified_out_pipe_mask); + if (err) + snd_printk(KERN_ERR LXP + "error handling async events\n"); + + err = lx_interrupt_handle_audio_transfer(chip, + notified_in_pipe_mask, + notified_out_pipe_mask + ); + if (err) + snd_printk(KERN_ERR LXP + "error during audio transfer\n"); + } + + if (async_escmd) { +#if 0 + /* backdoor for ethersound commands + * + * for now, we do not need this + * + * */ + + snd_printdd("lx6464es: interrupt requests escmd handling\n"); +#endif + } + +exit: + spin_unlock(&chip->lock); + return IRQ_HANDLED; /* this device caused the interrupt */ +} + + +static void lx_irq_set(struct lx6464es *chip, int enable) +{ + u32 reg = lx_plx_reg_read(chip, ePLX_IRQCS); + + /* enable/disable interrupts + * + * Set the Doorbell and PCI interrupt enable bits + * + * */ + if (enable) + reg |= (IRQCS_ENABLE_PCIIRQ | IRQCS_ENABLE_PCIDB); + else + reg &= ~(IRQCS_ENABLE_PCIIRQ | IRQCS_ENABLE_PCIDB); + lx_plx_reg_write(chip, ePLX_IRQCS, reg); +} + +void lx_irq_enable(struct lx6464es *chip) +{ + snd_printdd("->lx_irq_enable\n"); + lx_irq_set(chip, 1); +} + +void lx_irq_disable(struct lx6464es *chip) +{ + snd_printdd("->lx_irq_disable\n"); + lx_irq_set(chip, 0); +} diff --git a/sound/pci/lx6464es/lx_core.h b/sound/pci/lx6464es/lx_core.h new file mode 100644 index 00000000000..6bd9cbbbc68 --- /dev/null +++ b/sound/pci/lx6464es/lx_core.h @@ -0,0 +1,242 @@ +/* -*- linux-c -*- * + * + * ALSA driver for the digigram lx6464es interface + * low-level interface + * + * Copyright (c) 2009 Tim Blechmann + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + * + */ + +#ifndef LX_CORE_H +#define LX_CORE_H + +#include + +#include "lx_defs.h" + +#define REG_CRM_NUMBER 12 + +struct lx6464es; + +/* low-level register access */ + +/* dsp register access */ +enum { + eReg_BASE, + eReg_CSM, + eReg_CRM1, + eReg_CRM2, + eReg_CRM3, + eReg_CRM4, + eReg_CRM5, + eReg_CRM6, + eReg_CRM7, + eReg_CRM8, + eReg_CRM9, + eReg_CRM10, + eReg_CRM11, + eReg_CRM12, + + eReg_ICR, + eReg_CVR, + eReg_ISR, + eReg_RXHTXH, + eReg_RXMTXM, + eReg_RHLTXL, + eReg_RESETDSP, + + eReg_CSUF, + eReg_CSES, + eReg_CRESMSB, + eReg_CRESLSB, + eReg_ADMACESMSB, + eReg_ADMACESLSB, + eReg_CONFES, + + eMaxPortLx +}; + +unsigned long lx_dsp_reg_read(struct lx6464es *chip, int port); +void lx_dsp_reg_readbuf(struct lx6464es *chip, int port, u32 *data, u32 len); +void lx_dsp_reg_write(struct lx6464es *chip, int port, unsigned data); +void lx_dsp_reg_writebuf(struct lx6464es *chip, int port, const u32 *data, + u32 len); + +/* plx register access */ +enum { + ePLX_PCICR, + + ePLX_MBOX0, + ePLX_MBOX1, + ePLX_MBOX2, + ePLX_MBOX3, + ePLX_MBOX4, + ePLX_MBOX5, + ePLX_MBOX6, + ePLX_MBOX7, + + ePLX_L2PCIDB, + ePLX_IRQCS, + ePLX_CHIPSC, + + eMaxPort +}; + +unsigned long lx_plx_reg_read(struct lx6464es *chip, int port); +void lx_plx_reg_write(struct lx6464es *chip, int port, u32 data); + +/* rhm */ +struct lx_rmh { + u16 cmd_len; /* length of the command to send (WORDs) */ + u16 stat_len; /* length of the status received (WORDs) */ + u16 dsp_stat; /* status type, RMP_SSIZE_XXX */ + u16 cmd_idx; /* index of the command */ + u32 cmd[REG_CRM_NUMBER]; + u32 stat[REG_CRM_NUMBER]; +}; + + +/* low-level dsp access */ +int __devinit lx_dsp_get_version(struct lx6464es *chip, u32 *rdsp_version); +int lx_dsp_get_clock_frequency(struct lx6464es *chip, u32 *rfreq); +int lx_dsp_set_granularity(struct lx6464es *chip, u32 gran); +int lx_dsp_read_async_events(struct lx6464es *chip, u32 *data); +int lx_dsp_get_mac(struct lx6464es *chip, u8 *mac_address); + + +/* low-level pipe handling */ +int lx_pipe_allocate(struct lx6464es *chip, u32 pipe, int is_capture, + int channels); +int lx_pipe_release(struct lx6464es *chip, u32 pipe, int is_capture); +int lx_pipe_sample_count(struct lx6464es *chip, u32 pipe, int is_capture, + u64 *rsample_count); +int lx_pipe_state(struct lx6464es *chip, u32 pipe, int is_capture, u16 *rstate); +int lx_pipe_stop(struct lx6464es *chip, u32 pipe, int is_capture); +int lx_pipe_start(struct lx6464es *chip, u32 pipe, int is_capture); +int lx_pipe_pause(struct lx6464es *chip, u32 pipe, int is_capture); + +int lx_pipe_wait_for_start(struct lx6464es *chip, u32 pipe, int is_capture); +int lx_pipe_wait_for_idle(struct lx6464es *chip, u32 pipe, int is_capture); + +/* low-level stream handling */ +int lx_stream_set_format(struct lx6464es *chip, struct snd_pcm_runtime *runtime, + u32 pipe, int is_capture); +int lx_stream_state(struct lx6464es *chip, u32 pipe, int is_capture, + int *rstate); +int lx_stream_sample_position(struct lx6464es *chip, u32 pipe, int is_capture, + u64 *r_bytepos); + +int lx_stream_set_state(struct lx6464es *chip, u32 pipe, + int is_capture, enum stream_state_t state); + +static inline int lx_stream_start(struct lx6464es *chip, u32 pipe, + int is_capture) +{ + snd_printdd("->lx_stream_start\n"); + return lx_stream_set_state(chip, pipe, is_capture, SSTATE_RUN); +} + +static inline int lx_stream_pause(struct lx6464es *chip, u32 pipe, + int is_capture) +{ + snd_printdd("->lx_stream_pause\n"); + return lx_stream_set_state(chip, pipe, is_capture, SSTATE_PAUSE); +} + +static inline int lx_stream_stop(struct lx6464es *chip, u32 pipe, + int is_capture) +{ + snd_printdd("->lx_stream_stop\n"); + return lx_stream_set_state(chip, pipe, is_capture, SSTATE_STOP); +} + +/* low-level buffer handling */ +int lx_buffer_ask(struct lx6464es *chip, u32 pipe, int is_capture, + u32 *r_needed, u32 *r_freed, u32 *size_array); +int lx_buffer_give(struct lx6464es *chip, u32 pipe, int is_capture, + u32 buffer_size, u32 buf_address_lo, u32 buf_address_hi, + u32 *r_buffer_index); +int lx_buffer_free(struct lx6464es *chip, u32 pipe, int is_capture, + u32 *r_buffer_size); +int lx_buffer_cancel(struct lx6464es *chip, u32 pipe, int is_capture, + u32 buffer_index); + +/* low-level gain/peak handling */ +int lx_level_unmute(struct lx6464es *chip, int is_capture, int unmute); +int lx_level_peaks(struct lx6464es *chip, int is_capture, int channels, + u32 *r_levels); + + +/* interrupt handling */ +irqreturn_t lx_interrupt(int irq, void *dev_id); +void lx_irq_enable(struct lx6464es *chip); +void lx_irq_disable(struct lx6464es *chip); + +void lx_tasklet_capture(unsigned long data); +void lx_tasklet_playback(unsigned long data); + + +/* Stream Format Header Defines (for LIN and IEEE754) */ +#define HEADER_FMT_BASE HEADER_FMT_BASE_LIN +#define HEADER_FMT_BASE_LIN 0xFED00000 +#define HEADER_FMT_BASE_FLOAT 0xFAD00000 +#define HEADER_FMT_MONO 0x00000080 /* bit 23 in header_lo. WARNING: old + * bit 22 is ignored in float + * format */ +#define HEADER_FMT_INTEL 0x00008000 +#define HEADER_FMT_16BITS 0x00002000 +#define HEADER_FMT_24BITS 0x00004000 +#define HEADER_FMT_UPTO11 0x00000200 /* frequency is less or equ. to 11k. + * */ +#define HEADER_FMT_UPTO32 0x00000100 /* frequency is over 11k and less + * then 32k.*/ + + +#define BIT_FMP_HEADER 23 +#define BIT_FMP_SD 22 +#define BIT_FMP_MULTICHANNEL 19 + +#define START_STATE 1 +#define PAUSE_STATE 0 + + + + + +/* from PcxAll_e.h */ +/* Start/Pause condition for pipes (PCXStartPipe, PCXPausePipe) */ +#define START_PAUSE_IMMEDIATE 0 +#define START_PAUSE_ON_SYNCHRO 1 +#define START_PAUSE_ON_TIME_CODE 2 + + +/* Pipe / Stream state */ +#define START_STATE 1 +#define PAUSE_STATE 0 + +static inline void unpack_pointer(dma_addr_t ptr, u32 *r_low, u32 *r_high) +{ + *r_low = (u32)(ptr & 0xffffffff); +#if BITS_PER_LONG == 32 + *r_high = 0; +#else + *r_high = (u32)((u64)ptr>>32); +#endif +} + +#endif /* LX_CORE_H */ diff --git a/sound/pci/lx6464es/lx_defs.h b/sound/pci/lx6464es/lx_defs.h new file mode 100644 index 00000000000..49d36bdd512 --- /dev/null +++ b/sound/pci/lx6464es/lx_defs.h @@ -0,0 +1,376 @@ +/* -*- linux-c -*- * + * + * ALSA driver for the digigram lx6464es interface + * adapted upstream headers + * + * Copyright (c) 2009 Tim Blechmann + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + * + */ + +#ifndef LX_DEFS_H +#define LX_DEFS_H + +/* code adapted from ethersound.h */ +#define XES_FREQ_COUNT8_MASK 0x00001FFF /* compteur 25MHz entre 8 ech. */ +#define XES_FREQ_COUNT8_44_MIN 0x00001288 /* 25M / + * [ 44k - ( 44.1k + 48k ) / 2 ] + * * 8 */ +#define XES_FREQ_COUNT8_44_MAX 0x000010F0 /* 25M / [ ( 44.1k + 48k ) / 2 ] + * * 8 */ +#define XES_FREQ_COUNT8_48_MAX 0x00000F08 /* 25M / + * [ 48k + ( 44.1k + 48k ) / 2 ] + * * 8 */ + +/* code adapted from LXES_registers.h */ + +#define IOCR_OUTPUTS_OFFSET 0 /* (rw) offset for the number of OUTs in the + * ConfES register. */ +#define IOCR_INPUTS_OFFSET 8 /* (rw) offset for the number of INs in the + * ConfES register. */ +#define FREQ_RATIO_OFFSET 19 /* (rw) offset for frequency ratio in the + * ConfES register. */ +#define FREQ_RATIO_SINGLE_MODE 0x01 /* value for single mode frequency ratio: + * sample rate = frequency rate. */ + +#define CONFES_READ_PART_MASK 0x00070000 +#define CONFES_WRITE_PART_MASK 0x00F80000 + +/* code adapted from if_drv_mb.h */ + +#define MASK_SYS_STATUS_ERROR (1L << 31) /* events that lead to a PCI irq if + * not yet pending */ +#define MASK_SYS_STATUS_URUN (1L << 30) +#define MASK_SYS_STATUS_ORUN (1L << 29) +#define MASK_SYS_STATUS_EOBO (1L << 28) +#define MASK_SYS_STATUS_EOBI (1L << 27) +#define MASK_SYS_STATUS_FREQ (1L << 26) +#define MASK_SYS_STATUS_ESA (1L << 25) /* reserved, this is set by the + * XES */ +#define MASK_SYS_STATUS_TIMER (1L << 24) + +#define MASK_SYS_ASYNC_EVENTS (MASK_SYS_STATUS_ERROR | \ + MASK_SYS_STATUS_URUN | \ + MASK_SYS_STATUS_ORUN | \ + MASK_SYS_STATUS_EOBO | \ + MASK_SYS_STATUS_EOBI | \ + MASK_SYS_STATUS_FREQ | \ + MASK_SYS_STATUS_ESA) + +#define MASK_SYS_PCI_EVENTS (MASK_SYS_ASYNC_EVENTS | \ + MASK_SYS_STATUS_TIMER) + +#define MASK_SYS_TIMER_COUNT 0x0000FFFF + +#define MASK_SYS_STATUS_EOT_PLX (1L << 22) /* event that remains + * internal: reserved fo end + * of plx dma */ +#define MASK_SYS_STATUS_XES (1L << 21) /* event that remains + * internal: pending XES + * IRQ */ +#define MASK_SYS_STATUS_CMD_DONE (1L << 20) /* alternate command + * management: notify driver + * instead of polling */ + + +#define MAX_STREAM_BUFFER 5 /* max amount of stream buffers. */ + +#define MICROBLAZE_IBL_MIN 32 +#define MICROBLAZE_IBL_DEFAULT 128 +#define MICROBLAZE_IBL_MAX 512 +/* #define MASK_GRANULARITY (2*MICROBLAZE_IBL_MAX-1) */ + + + +/* command opcodes, see reference for details */ + +/* + the capture bit position in the object_id field in driver commands + depends upon the number of managed channels. For now, 64 IN + 64 OUT are + supported. HOwever, the communication protocol forsees 1024 channels, hence + bit 10 indicates a capture (input) object). +*/ +#define ID_IS_CAPTURE (1L << 10) +#define ID_OFFSET 13 /* object ID is at the 13th bit in the + * 1st command word.*/ +#define ID_CH_MASK 0x3F +#define OPCODE_OFFSET 24 /* offset of the command opcode in the first + * command word.*/ + +enum cmd_mb_opcodes { + CMD_00_INFO_DEBUG = 0x00, + CMD_01_GET_SYS_CFG = 0x01, + CMD_02_SET_GRANULARITY = 0x02, + CMD_03_SET_TIMER_IRQ = 0x03, + CMD_04_GET_EVENT = 0x04, + CMD_05_GET_PIPES = 0x05, + + CMD_06_ALLOCATE_PIPE = 0x06, + CMD_07_RELEASE_PIPE = 0x07, + CMD_08_ASK_BUFFERS = 0x08, + CMD_09_STOP_PIPE = 0x09, + CMD_0A_GET_PIPE_SPL_COUNT = 0x0a, + CMD_0B_TOGGLE_PIPE_STATE = 0x0b, + + CMD_0C_DEF_STREAM = 0x0c, + CMD_0D_SET_MUTE = 0x0d, + CMD_0E_GET_STREAM_SPL_COUNT = 0x0e, + CMD_0F_UPDATE_BUFFER = 0x0f, + CMD_10_GET_BUFFER = 0x10, + CMD_11_CANCEL_BUFFER = 0x11, + CMD_12_GET_PEAK = 0x12, + CMD_13_SET_STREAM_STATE = 0x13, + CMD_14_INVALID = 0x14, +}; + +/* pipe states */ +enum pipe_state_t { + PSTATE_IDLE = 0, /* the pipe is not processed in the XES_IRQ + * (free or stopped, or paused). */ + PSTATE_RUN = 1, /* sustained play/record state. */ + PSTATE_PURGE = 2, /* the ES channels are now off, render pipes do + * not DMA, record pipe do a last DMA. */ + PSTATE_ACQUIRE = 3, /* the ES channels are now on, render pipes do + * not yet increase their sample count, record + * pipes do not DMA. */ + PSTATE_CLOSING = 4, /* the pipe is releasing, and may not yet + * receive an "alloc" command. */ +}; + +/* stream states */ +enum stream_state_t { + SSTATE_STOP = 0x00, /* setting to stop resets the stream spl + * count.*/ + SSTATE_RUN = (0x01 << 0), /* start DMA and spl count handling. */ + SSTATE_PAUSE = (0x01 << 1), /* pause DMA and spl count handling. */ +}; + +/* buffer flags */ +enum buffer_flags { + BF_VALID = 0x80, /* set if the buffer is valid, clear if free.*/ + BF_CURRENT = 0x40, /* set if this is the current buffer (there is + * always a current buffer).*/ + BF_NOTIFY_EOB = 0x20, /* set if this buffer must cause a PCI event + * when finished.*/ + BF_CIRCULAR = 0x10, /* set if buffer[1] must be copied to buffer[0] + * by the end of this buffer.*/ + BF_64BITS_ADR = 0x08, /* set if the hi part of the address is valid.*/ + BF_xx = 0x04, /* future extension.*/ + BF_EOB = 0x02, /* set if finished, but not yet free.*/ + BF_PAUSE = 0x01, /* pause stream at buffer end.*/ + BF_ZERO = 0x00, /* no flags (init).*/ +}; + +/** +* Stream Flags definitions +*/ +enum stream_flags { + SF_ZERO = 0x00000000, /* no flags (stream invalid). */ + SF_VALID = 0x10000000, /* the stream has a valid DMA_conf + * info (setstreamformat). */ + SF_XRUN = 0x20000000, /* the stream is un x-run state. */ + SF_START = 0x40000000, /* the DMA is running.*/ + SF_ASIO = 0x80000000, /* ASIO.*/ +}; + + +#define MASK_SPL_COUNT_HI 0x00FFFFFF /* 4 MSBits are status bits */ +#define PSTATE_OFFSET 28 /* 4 MSBits are status bits */ + + +#define MASK_STREAM_HAS_MAPPING (1L << 12) +#define MASK_STREAM_IS_ASIO (1L << 9) +#define STREAM_FMT_OFFSET 10 /* the stream fmt bits start at the 10th + * bit in the command word. */ + +#define STREAM_FMT_16b 0x02 +#define STREAM_FMT_intel 0x01 + +#define FREQ_FIELD_OFFSET 15 /* offset of the freq field in the response + * word */ + +#define BUFF_FLAGS_OFFSET 24 /* offset of the buffer flags in the + * response word. */ +#define MASK_DATA_SIZE 0x00FFFFFF /* this must match the field size of + * datasize in the buffer_t structure. */ + +#define MASK_BUFFER_ID 0xFF /* the cancel command awaits a buffer ID, + * may be 0xFF for "current". */ + + +/* code adapted from PcxErr_e.h */ + +/* Bits masks */ + +#define ERROR_MASK 0x8000 + +#define SOURCE_MASK 0x7800 + +#define E_SOURCE_BOARD 0x4000 /* 8 >> 1 */ +#define E_SOURCE_DRV 0x2000 /* 4 >> 1 */ +#define E_SOURCE_API 0x1000 /* 2 >> 1 */ +/* Error tools */ +#define E_SOURCE_TOOLS 0x0800 /* 1 >> 1 */ +/* Error pcxaudio */ +#define E_SOURCE_AUDIO 0x1800 /* 3 >> 1 */ +/* Error virtual pcx */ +#define E_SOURCE_VPCX 0x2800 /* 5 >> 1 */ +/* Error dispatcher */ +#define E_SOURCE_DISPATCHER 0x3000 /* 6 >> 1 */ +/* Error from CobraNet firmware */ +#define E_SOURCE_COBRANET 0x3800 /* 7 >> 1 */ + +#define E_SOURCE_USER 0x7800 + +#define CLASS_MASK 0x0700 + +#define CODE_MASK 0x00FF + +/* Bits values */ + +/* Values for the error/warning bit */ +#define ERROR_VALUE 0x8000 +#define WARNING_VALUE 0x0000 + +/* Class values */ +#define E_CLASS_GENERAL 0x0000 +#define E_CLASS_INVALID_CMD 0x0100 +#define E_CLASS_INVALID_STD_OBJECT 0x0200 +#define E_CLASS_RSRC_IMPOSSIBLE 0x0300 +#define E_CLASS_WRONG_CONTEXT 0x0400 +#define E_CLASS_BAD_SPECIFIC_PARAMETER 0x0500 +#define E_CLASS_REAL_TIME_ERROR 0x0600 +#define E_CLASS_DIRECTSHOW 0x0700 +#define E_CLASS_FREE 0x0700 + + +/* Complete DRV error code for the general class */ +#define ED_GN (ERROR_VALUE | E_SOURCE_DRV | E_CLASS_GENERAL) +#define ED_CONCURRENCY (ED_GN | 0x01) +#define ED_DSP_CRASHED (ED_GN | 0x02) +#define ED_UNKNOWN_BOARD (ED_GN | 0x03) +#define ED_NOT_INSTALLED (ED_GN | 0x04) +#define ED_CANNOT_OPEN_SVC_MANAGER (ED_GN | 0x05) +#define ED_CANNOT_READ_REGISTRY (ED_GN | 0x06) +#define ED_DSP_VERSION_MISMATCH (ED_GN | 0x07) +#define ED_UNAVAILABLE_FEATURE (ED_GN | 0x08) +#define ED_CANCELLED (ED_GN | 0x09) +#define ED_NO_RESPONSE_AT_IRQA (ED_GN | 0x10) +#define ED_INVALID_ADDRESS (ED_GN | 0x11) +#define ED_DSP_CORRUPTED (ED_GN | 0x12) +#define ED_PENDING_OPERATION (ED_GN | 0x13) +#define ED_NET_ALLOCATE_MEMORY_IMPOSSIBLE (ED_GN | 0x14) +#define ED_NET_REGISTER_ERROR (ED_GN | 0x15) +#define ED_NET_THREAD_ERROR (ED_GN | 0x16) +#define ED_NET_OPEN_ERROR (ED_GN | 0x17) +#define ED_NET_CLOSE_ERROR (ED_GN | 0x18) +#define ED_NET_NO_MORE_PACKET (ED_GN | 0x19) +#define ED_NET_NO_MORE_BUFFER (ED_GN | 0x1A) +#define ED_NET_SEND_ERROR (ED_GN | 0x1B) +#define ED_NET_RECEIVE_ERROR (ED_GN | 0x1C) +#define ED_NET_WRONG_MSG_SIZE (ED_GN | 0x1D) +#define ED_NET_WAIT_ERROR (ED_GN | 0x1E) +#define ED_NET_EEPROM_ERROR (ED_GN | 0x1F) +#define ED_INVALID_RS232_COM_NUMBER (ED_GN | 0x20) +#define ED_INVALID_RS232_INIT (ED_GN | 0x21) +#define ED_FILE_ERROR (ED_GN | 0x22) +#define ED_INVALID_GPIO_CMD (ED_GN | 0x23) +#define ED_RS232_ALREADY_OPENED (ED_GN | 0x24) +#define ED_RS232_NOT_OPENED (ED_GN | 0x25) +#define ED_GPIO_ALREADY_OPENED (ED_GN | 0x26) +#define ED_GPIO_NOT_OPENED (ED_GN | 0x27) +#define ED_REGISTRY_ERROR (ED_GN | 0x28) /* <- NCX */ +#define ED_INVALID_SERVICE (ED_GN | 0x29) /* <- NCX */ + +#define ED_READ_FILE_ALREADY_OPENED (ED_GN | 0x2a) /* <- Decalage + * pour RCX + * (old 0x28) + * */ +#define ED_READ_FILE_INVALID_COMMAND (ED_GN | 0x2b) /* ~ */ +#define ED_READ_FILE_INVALID_PARAMETER (ED_GN | 0x2c) /* ~ */ +#define ED_READ_FILE_ALREADY_CLOSED (ED_GN | 0x2d) /* ~ */ +#define ED_READ_FILE_NO_INFORMATION (ED_GN | 0x2e) /* ~ */ +#define ED_READ_FILE_INVALID_HANDLE (ED_GN | 0x2f) /* ~ */ +#define ED_READ_FILE_END_OF_FILE (ED_GN | 0x30) /* ~ */ +#define ED_READ_FILE_ERROR (ED_GN | 0x31) /* ~ */ + +#define ED_DSP_CRASHED_EXC_DSPSTACK_OVERFLOW (ED_GN | 0x32) /* <- Decalage pour + * PCX (old 0x14) */ +#define ED_DSP_CRASHED_EXC_SYSSTACK_OVERFLOW (ED_GN | 0x33) /* ~ */ +#define ED_DSP_CRASHED_EXC_ILLEGAL (ED_GN | 0x34) /* ~ */ +#define ED_DSP_CRASHED_EXC_TIMER_REENTRY (ED_GN | 0x35) /* ~ */ +#define ED_DSP_CRASHED_EXC_FATAL_ERROR (ED_GN | 0x36) /* ~ */ + +#define ED_FLASH_PCCARD_NOT_PRESENT (ED_GN | 0x37) + +#define ED_NO_CURRENT_CLOCK (ED_GN | 0x38) + +/* Complete DRV error code for real time class */ +#define ED_RT (ERROR_VALUE | E_SOURCE_DRV | E_CLASS_REAL_TIME_ERROR) +#define ED_DSP_TIMED_OUT (ED_RT | 0x01) +#define ED_DSP_CHK_TIMED_OUT (ED_RT | 0x02) +#define ED_STREAM_OVERRUN (ED_RT | 0x03) +#define ED_DSP_BUSY (ED_RT | 0x04) +#define ED_DSP_SEMAPHORE_TIME_OUT (ED_RT | 0x05) +#define ED_BOARD_TIME_OUT (ED_RT | 0x06) +#define ED_XILINX_ERROR (ED_RT | 0x07) +#define ED_COBRANET_ITF_NOT_RESPONDING (ED_RT | 0x08) + +/* Complete BOARD error code for the invaid standard object class */ +#define EB_ISO (ERROR_VALUE | E_SOURCE_BOARD | \ + E_CLASS_INVALID_STD_OBJECT) +#define EB_INVALID_EFFECT (EB_ISO | 0x00) +#define EB_INVALID_PIPE (EB_ISO | 0x40) +#define EB_INVALID_STREAM (EB_ISO | 0x80) +#define EB_INVALID_AUDIO (EB_ISO | 0xC0) + +/* Complete BOARD error code for impossible resource allocation class */ +#define EB_RI (ERROR_VALUE | E_SOURCE_BOARD | E_CLASS_RSRC_IMPOSSIBLE) +#define EB_ALLOCATE_ALL_STREAM_TRANSFERT_BUFFERS_IMPOSSIBLE (EB_RI | 0x01) +#define EB_ALLOCATE_PIPE_SAMPLE_BUFFER_IMPOSSIBLE (EB_RI | 0x02) + +#define EB_ALLOCATE_MEM_STREAM_IMPOSSIBLE \ + EB_ALLOCATE_ALL_STREAM_TRANSFERT_BUFFERS_IMPOSSIBLE +#define EB_ALLOCATE_MEM_PIPE_IMPOSSIBLE \ + EB_ALLOCATE_PIPE_SAMPLE_BUFFER_IMPOSSIBLE + +#define EB_ALLOCATE_DIFFERED_CMD_IMPOSSIBLE (EB_RI | 0x03) +#define EB_TOO_MANY_DIFFERED_CMD (EB_RI | 0x04) +#define EB_RBUFFERS_TABLE_OVERFLOW (EB_RI | 0x05) +#define EB_ALLOCATE_EFFECTS_IMPOSSIBLE (EB_RI | 0x08) +#define EB_ALLOCATE_EFFECT_POS_IMPOSSIBLE (EB_RI | 0x09) +#define EB_RBUFFER_NOT_AVAILABLE (EB_RI | 0x0A) +#define EB_ALLOCATE_CONTEXT_LIII_IMPOSSIBLE (EB_RI | 0x0B) +#define EB_STATUS_DIALOG_IMPOSSIBLE (EB_RI | 0x1D) +#define EB_CONTROL_CMD_IMPOSSIBLE (EB_RI | 0x1E) +#define EB_STATUS_SEND_IMPOSSIBLE (EB_RI | 0x1F) +#define EB_ALLOCATE_PIPE_IMPOSSIBLE (EB_RI | 0x40) +#define EB_ALLOCATE_STREAM_IMPOSSIBLE (EB_RI | 0x80) +#define EB_ALLOCATE_AUDIO_IMPOSSIBLE (EB_RI | 0xC0) + +/* Complete BOARD error code for wrong call context class */ +#define EB_WCC (ERROR_VALUE | E_SOURCE_BOARD | E_CLASS_WRONG_CONTEXT) +#define EB_CMD_REFUSED (EB_WCC | 0x00) +#define EB_START_STREAM_REFUSED (EB_WCC | 0xFC) +#define EB_SPC_REFUSED (EB_WCC | 0xFD) +#define EB_CSN_REFUSED (EB_WCC | 0xFE) +#define EB_CSE_REFUSED (EB_WCC | 0xFF) + + + + +#endif /* LX_DEFS_H */ -- cgit v1.2.3-70-g09d2 From ea20d9293ce423a39717ed4375393129a2e701f9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 10 Apr 2009 08:54:16 -0400 Subject: tracing: consolidate trace and trace_event headers Impact: clean up Neil Horman (et. al.) criticized the way the trace events were broken up into two files. The reason for that was that ftrace needed to separate out the declarations from where the #include was used. It then dawned on me that the tracepoint.h header only needs to define the TRACE_EVENT macro if it is not already defined. The solution is simply to test if TRACE_EVENT is defined, and if it is not then the linux/tracepoint.h header can define it. This change consolidates all the .h and _event_types.h into the .h file. Reported-by: Neil Horman Reported-by: Theodore Tso Reported-by: Jiaying Zhang Cc: Zhaolei Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Jason Baron Cc: Mathieu Desnoyers Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 9 +- include/trace/irq.h | 51 +++++- include/trace/irq_event_types.h | 55 ------ include/trace/kmem.h | 189 +++++++++++++++++++- include/trace/lockdep.h | 52 +++++- include/trace/lockdep_event_types.h | 57 ------ include/trace/sched.h | 333 ++++++++++++++++++++++++++++++++++- include/trace/sched_event_types.h | 337 ------------------------------------ include/trace/skb.h | 36 +++- include/trace/skb_event_types.h | 38 ---- include/trace/trace_event_types.h | 7 - kernel/trace/events.c | 1 + kernel/trace/trace_events_stage_1.h | 4 +- kernel/trace/trace_events_stage_2.h | 8 +- kernel/trace/trace_events_stage_3.h | 4 +- 15 files changed, 663 insertions(+), 518 deletions(-) delete mode 100644 include/trace/irq_event_types.h delete mode 100644 include/trace/lockdep_event_types.h delete mode 100644 include/trace/sched_event_types.h delete mode 100644 include/trace/skb_event_types.h delete mode 100644 include/trace/trace_event_types.h (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index d35a7ee7611..4353f3f7e62 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -31,6 +31,8 @@ struct tracepoint { * Keep in sync with vmlinux.lds.h. */ +#ifndef DECLARE_TRACE + #define TP_PROTO(args...) args #define TP_ARGS(args...) args @@ -114,6 +116,7 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end) { } #endif /* CONFIG_TRACEPOINTS */ +#endif /* DECLARE_TRACE */ /* * Connect a probe to a tracepoint. @@ -154,10 +157,13 @@ static inline void tracepoint_synchronize_unregister(void) } #define PARAMS(args...) args + +#ifndef TRACE_FORMAT #define TRACE_FORMAT(name, proto, args, fmt) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) +#endif - +#ifndef TRACE_EVENT /* * For use with the TRACE_EVENT macro: * @@ -262,5 +268,6 @@ static inline void tracepoint_synchronize_unregister(void) #define TRACE_EVENT(name, proto, args, struct, assign, print) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) +#endif #endif diff --git a/include/trace/irq.h b/include/trace/irq.h index ff5d4495dc3..04ab4c65222 100644 --- a/include/trace/irq.h +++ b/include/trace/irq.h @@ -1,9 +1,54 @@ -#ifndef _TRACE_IRQ_H +#if !defined(_TRACE_IRQ_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_IRQ_H -#include #include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM irq + +/* + * Tracepoint for entry of interrupt handler: + */ +TRACE_FORMAT(irq_handler_entry, + TP_PROTO(int irq, struct irqaction *action), + TP_ARGS(irq, action), + TP_FMT("irq=%d handler=%s", irq, action->name) + ); + +/* + * Tracepoint for return of an interrupt handler: + */ +TRACE_EVENT(irq_handler_exit, + + TP_PROTO(int irq, struct irqaction *action, int ret), + + TP_ARGS(irq, action, ret), + + TP_STRUCT__entry( + __field( int, irq ) + __field( int, ret ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->ret = ret; + ), + + TP_printk("irq=%d return=%s", + __entry->irq, __entry->ret ? "handled" : "unhandled") +); + +TRACE_FORMAT(softirq_entry, + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + TP_ARGS(h, vec), + TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) + ); -#include +TRACE_FORMAT(softirq_exit, + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + TP_ARGS(h, vec), + TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) + ); #endif diff --git a/include/trace/irq_event_types.h b/include/trace/irq_event_types.h deleted file mode 100644 index 85964ebd47e..00000000000 --- a/include/trace/irq_event_types.h +++ /dev/null @@ -1,55 +0,0 @@ - -/* use instead */ -#ifndef TRACE_FORMAT -# error Do not include this file directly. -# error Unless you know what you are doing. -#endif - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM irq - -/* - * Tracepoint for entry of interrupt handler: - */ -TRACE_FORMAT(irq_handler_entry, - TP_PROTO(int irq, struct irqaction *action), - TP_ARGS(irq, action), - TP_FMT("irq=%d handler=%s", irq, action->name) - ); - -/* - * Tracepoint for return of an interrupt handler: - */ -TRACE_EVENT(irq_handler_exit, - - TP_PROTO(int irq, struct irqaction *action, int ret), - - TP_ARGS(irq, action, ret), - - TP_STRUCT__entry( - __field( int, irq ) - __field( int, ret ) - ), - - TP_fast_assign( - __entry->irq = irq; - __entry->ret = ret; - ), - - TP_printk("irq=%d return=%s", - __entry->irq, __entry->ret ? "handled" : "unhandled") -); - -TRACE_FORMAT(softirq_entry, - TP_PROTO(struct softirq_action *h, struct softirq_action *vec), - TP_ARGS(h, vec), - TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) - ); - -TRACE_FORMAT(softirq_exit, - TP_PROTO(struct softirq_action *h, struct softirq_action *vec), - TP_ARGS(h, vec), - TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) - ); - -#undef TRACE_SYSTEM diff --git a/include/trace/kmem.h b/include/trace/kmem.h index 46efc2423f0..d7d12189e5c 100644 --- a/include/trace/kmem.h +++ b/include/trace/kmem.h @@ -1,9 +1,192 @@ -#ifndef _TRACE_KMEM_H +#if !defined(_TRACE_KMEM_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_KMEM_H #include #include -#include +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kmem -#endif /* _TRACE_KMEM_H */ +TRACE_EVENT(kmalloc, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags) +); + +TRACE_EVENT(kmem_cache_alloc, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags) +); + +TRACE_EVENT(kmalloc_node, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + __field( int, node ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + __entry->node = node; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags, + __entry->node) +); + +TRACE_EVENT(kmem_cache_alloc_node, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + __field( int, node ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + __entry->node = node; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags, + __entry->node) +); + +TRACE_EVENT(kfree, + + TP_PROTO(unsigned long call_site, const void *ptr), + + TP_ARGS(call_site, ptr), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + ), + + TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) +); + +TRACE_EVENT(kmem_cache_free, + + TP_PROTO(unsigned long call_site, const void *ptr), + + TP_ARGS(call_site, ptr), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + ), + + TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) +); + +#endif diff --git a/include/trace/lockdep.h b/include/trace/lockdep.h index 5ca67df87f2..8ee7900b38c 100644 --- a/include/trace/lockdep.h +++ b/include/trace/lockdep.h @@ -1,9 +1,57 @@ -#ifndef _TRACE_LOCKDEP_H +#if !defined(_TRACE_LOCKDEP_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_LOCKDEP_H #include #include -#include +#undef TRACE_SYSTEM +#define TRACE_SYSTEM lock + +#ifdef CONFIG_LOCKDEP + +TRACE_FORMAT(lock_acquire, + TP_PROTO(struct lockdep_map *lock, unsigned int subclass, + int trylock, int read, int check, + struct lockdep_map *next_lock, unsigned long ip), + TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), + TP_FMT("%s%s%s", trylock ? "try " : "", + read ? "read " : "", lock->name) + ); + +TRACE_FORMAT(lock_release, + TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), + TP_ARGS(lock, nested, ip), + TP_FMT("%s", lock->name) + ); + +#ifdef CONFIG_LOCK_STAT + +TRACE_FORMAT(lock_contended, + TP_PROTO(struct lockdep_map *lock, unsigned long ip), + TP_ARGS(lock, ip), + TP_FMT("%s", lock->name) + ); + +TRACE_EVENT(lock_acquired, + TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), + + TP_ARGS(lock, ip, waittime), + + TP_STRUCT__entry( + __field(const char *, name) + __field(unsigned long, wait_usec) + __field(unsigned long, wait_nsec_rem) + ), + TP_fast_assign( + __entry->name = lock->name; + __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC); + __entry->wait_usec = (unsigned long) waittime; + ), + TP_printk("%s (%lu.%03lu us)", __entry->name, __entry->wait_usec, + __entry->wait_nsec_rem) +); #endif +#endif + +#endif /* _TRACE_LOCKDEP_H */ diff --git a/include/trace/lockdep_event_types.h b/include/trace/lockdep_event_types.h deleted file mode 100644 index 863f1e4583a..00000000000 --- a/include/trace/lockdep_event_types.h +++ /dev/null @@ -1,57 +0,0 @@ - -#ifndef TRACE_FORMAT -# error Do not include this file directly. -# error Unless you know what you are doing. -#endif - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM lock - -#ifdef CONFIG_LOCKDEP - -TRACE_FORMAT(lock_acquire, - TP_PROTO(struct lockdep_map *lock, unsigned int subclass, - int trylock, int read, int check, - struct lockdep_map *next_lock, unsigned long ip), - TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), - TP_FMT("%s%s%s", trylock ? "try " : "", - read ? "read " : "", lock->name) - ); - -TRACE_FORMAT(lock_release, - TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), - TP_ARGS(lock, nested, ip), - TP_FMT("%s", lock->name) - ); - -#ifdef CONFIG_LOCK_STAT - -TRACE_FORMAT(lock_contended, - TP_PROTO(struct lockdep_map *lock, unsigned long ip), - TP_ARGS(lock, ip), - TP_FMT("%s", lock->name) - ); - -TRACE_EVENT(lock_acquired, - TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), - - TP_ARGS(lock, ip, waittime), - - TP_STRUCT__entry( - __field(const char *, name) - __field(unsigned long, wait_usec) - __field(unsigned long, wait_nsec_rem) - ), - TP_fast_assign( - __entry->name = lock->name; - __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC); - __entry->wait_usec = (unsigned long) waittime; - ), - TP_printk("%s (%lu.%03lu us)", __entry->name, __entry->wait_usec, - __entry->wait_nsec_rem) -); - -#endif -#endif - -#undef TRACE_SYSTEM diff --git a/include/trace/sched.h b/include/trace/sched.h index 4e372a1a29b..5b1cf4a2846 100644 --- a/include/trace/sched.h +++ b/include/trace/sched.h @@ -1,9 +1,336 @@ -#ifndef _TRACE_SCHED_H +#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_SCHED_H #include #include -#include +#undef TRACE_SYSTEM +#define TRACE_SYSTEM sched -#endif +/* + * Tracepoint for calling kthread_stop, performed to end a kthread: + */ +TRACE_EVENT(sched_kthread_stop, + + TP_PROTO(struct task_struct *t), + + TP_ARGS(t), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + ), + + TP_fast_assign( + memcpy(__entry->comm, t->comm, TASK_COMM_LEN); + __entry->pid = t->pid; + ), + + TP_printk("task %s:%d", __entry->comm, __entry->pid) +); + +/* + * Tracepoint for the return value of the kthread stopping: + */ +TRACE_EVENT(sched_kthread_stop_ret, + + TP_PROTO(int ret), + + TP_ARGS(ret), + + TP_STRUCT__entry( + __field( int, ret ) + ), + + TP_fast_assign( + __entry->ret = ret; + ), + + TP_printk("ret %d", __entry->ret) +); + +/* + * Tracepoint for waiting on task to unschedule: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_wait_task, + + TP_PROTO(struct rq *rq, struct task_struct *p), + + TP_ARGS(rq, p), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) +); + +/* + * Tracepoint for waking up a task: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_wakeup, + + TP_PROTO(struct rq *rq, struct task_struct *p, int success), + + TP_ARGS(rq, p, success), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + __field( int, success ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + __entry->success = success; + ), + + TP_printk("task %s:%d [%d] success=%d", + __entry->comm, __entry->pid, __entry->prio, + __entry->success) +); + +/* + * Tracepoint for waking up a new task: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_wakeup_new, + + TP_PROTO(struct rq *rq, struct task_struct *p, int success), + + TP_ARGS(rq, p, success), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + __field( int, success ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + __entry->success = success; + ), + + TP_printk("task %s:%d [%d] success=%d", + __entry->comm, __entry->pid, __entry->prio, + __entry->success) +); + +/* + * Tracepoint for task switches, performed by the scheduler: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_switch, + + TP_PROTO(struct rq *rq, struct task_struct *prev, + struct task_struct *next), + + TP_ARGS(rq, prev, next), + + TP_STRUCT__entry( + __array( char, prev_comm, TASK_COMM_LEN ) + __field( pid_t, prev_pid ) + __field( int, prev_prio ) + __array( char, next_comm, TASK_COMM_LEN ) + __field( pid_t, next_pid ) + __field( int, next_prio ) + ), + + TP_fast_assign( + memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); + __entry->prev_pid = prev->pid; + __entry->prev_prio = prev->prio; + memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); + __entry->next_pid = next->pid; + __entry->next_prio = next->prio; + ), + + TP_printk("task %s:%d [%d] ==> %s:%d [%d]", + __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, + __entry->next_comm, __entry->next_pid, __entry->next_prio) +); + +/* + * Tracepoint for a task being migrated: + */ +TRACE_EVENT(sched_migrate_task, + + TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu), + + TP_ARGS(p, orig_cpu, dest_cpu), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + __field( int, orig_cpu ) + __field( int, dest_cpu ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + __entry->orig_cpu = orig_cpu; + __entry->dest_cpu = dest_cpu; + ), + + TP_printk("task %s:%d [%d] from: %d to: %d", + __entry->comm, __entry->pid, __entry->prio, + __entry->orig_cpu, __entry->dest_cpu) +); + +/* + * Tracepoint for freeing a task: + */ +TRACE_EVENT(sched_process_free, + + TP_PROTO(struct task_struct *p), + + TP_ARGS(p), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) +); + +/* + * Tracepoint for a task exiting: + */ +TRACE_EVENT(sched_process_exit, + + TP_PROTO(struct task_struct *p), + + TP_ARGS(p), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) +); + +/* + * Tracepoint for a waiting task: + */ +TRACE_EVENT(sched_process_wait, + + TP_PROTO(struct pid *pid), + + TP_ARGS(pid), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + __entry->pid = pid_nr(pid); + __entry->prio = current->prio; + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) +); + +/* + * Tracepoint for do_fork: + */ +TRACE_EVENT(sched_process_fork, + + TP_PROTO(struct task_struct *parent, struct task_struct *child), + + TP_ARGS(parent, child), + + TP_STRUCT__entry( + __array( char, parent_comm, TASK_COMM_LEN ) + __field( pid_t, parent_pid ) + __array( char, child_comm, TASK_COMM_LEN ) + __field( pid_t, child_pid ) + ), + + TP_fast_assign( + memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN); + __entry->parent_pid = parent->pid; + memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN); + __entry->child_pid = child->pid; + ), + + TP_printk("parent %s:%d child %s:%d", + __entry->parent_comm, __entry->parent_pid, + __entry->child_comm, __entry->child_pid) +); + +/* + * Tracepoint for sending a signal: + */ +TRACE_EVENT(sched_signal_send, + + TP_PROTO(int sig, struct task_struct *p), + + TP_ARGS(sig, p), + + TP_STRUCT__entry( + __field( int, sig ) + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->sig = sig; + ), + + TP_printk("sig: %d task %s:%d", + __entry->sig, __entry->comm, __entry->pid) +); + +#endif /* _TRACE_SCHED_H */ diff --git a/include/trace/sched_event_types.h b/include/trace/sched_event_types.h deleted file mode 100644 index 63547dc1125..00000000000 --- a/include/trace/sched_event_types.h +++ /dev/null @@ -1,337 +0,0 @@ - -/* use instead */ -#ifndef TRACE_EVENT -# error Do not include this file directly. -# error Unless you know what you are doing. -#endif - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM sched - -/* - * Tracepoint for calling kthread_stop, performed to end a kthread: - */ -TRACE_EVENT(sched_kthread_stop, - - TP_PROTO(struct task_struct *t), - - TP_ARGS(t), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - ), - - TP_fast_assign( - memcpy(__entry->comm, t->comm, TASK_COMM_LEN); - __entry->pid = t->pid; - ), - - TP_printk("task %s:%d", __entry->comm, __entry->pid) -); - -/* - * Tracepoint for the return value of the kthread stopping: - */ -TRACE_EVENT(sched_kthread_stop_ret, - - TP_PROTO(int ret), - - TP_ARGS(ret), - - TP_STRUCT__entry( - __field( int, ret ) - ), - - TP_fast_assign( - __entry->ret = ret; - ), - - TP_printk("ret %d", __entry->ret) -); - -/* - * Tracepoint for waiting on task to unschedule: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_wait_task, - - TP_PROTO(struct rq *rq, struct task_struct *p), - - TP_ARGS(rq, p), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio) -); - -/* - * Tracepoint for waking up a task: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_wakeup, - - TP_PROTO(struct rq *rq, struct task_struct *p, int success), - - TP_ARGS(rq, p, success), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - __field( int, success ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - __entry->success = success; - ), - - TP_printk("task %s:%d [%d] success=%d", - __entry->comm, __entry->pid, __entry->prio, - __entry->success) -); - -/* - * Tracepoint for waking up a new task: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_wakeup_new, - - TP_PROTO(struct rq *rq, struct task_struct *p, int success), - - TP_ARGS(rq, p, success), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - __field( int, success ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - __entry->success = success; - ), - - TP_printk("task %s:%d [%d] success=%d", - __entry->comm, __entry->pid, __entry->prio, - __entry->success) -); - -/* - * Tracepoint for task switches, performed by the scheduler: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_switch, - - TP_PROTO(struct rq *rq, struct task_struct *prev, - struct task_struct *next), - - TP_ARGS(rq, prev, next), - - TP_STRUCT__entry( - __array( char, prev_comm, TASK_COMM_LEN ) - __field( pid_t, prev_pid ) - __field( int, prev_prio ) - __array( char, next_comm, TASK_COMM_LEN ) - __field( pid_t, next_pid ) - __field( int, next_prio ) - ), - - TP_fast_assign( - memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); - __entry->prev_pid = prev->pid; - __entry->prev_prio = prev->prio; - memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); - __entry->next_pid = next->pid; - __entry->next_prio = next->prio; - ), - - TP_printk("task %s:%d [%d] ==> %s:%d [%d]", - __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, - __entry->next_comm, __entry->next_pid, __entry->next_prio) -); - -/* - * Tracepoint for a task being migrated: - */ -TRACE_EVENT(sched_migrate_task, - - TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu), - - TP_ARGS(p, orig_cpu, dest_cpu), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - __field( int, orig_cpu ) - __field( int, dest_cpu ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - __entry->orig_cpu = orig_cpu; - __entry->dest_cpu = dest_cpu; - ), - - TP_printk("task %s:%d [%d] from: %d to: %d", - __entry->comm, __entry->pid, __entry->prio, - __entry->orig_cpu, __entry->dest_cpu) -); - -/* - * Tracepoint for freeing a task: - */ -TRACE_EVENT(sched_process_free, - - TP_PROTO(struct task_struct *p), - - TP_ARGS(p), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio) -); - -/* - * Tracepoint for a task exiting: - */ -TRACE_EVENT(sched_process_exit, - - TP_PROTO(struct task_struct *p), - - TP_ARGS(p), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio) -); - -/* - * Tracepoint for a waiting task: - */ -TRACE_EVENT(sched_process_wait, - - TP_PROTO(struct pid *pid), - - TP_ARGS(pid), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - __entry->pid = pid_nr(pid); - __entry->prio = current->prio; - ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio) -); - -/* - * Tracepoint for do_fork: - */ -TRACE_EVENT(sched_process_fork, - - TP_PROTO(struct task_struct *parent, struct task_struct *child), - - TP_ARGS(parent, child), - - TP_STRUCT__entry( - __array( char, parent_comm, TASK_COMM_LEN ) - __field( pid_t, parent_pid ) - __array( char, child_comm, TASK_COMM_LEN ) - __field( pid_t, child_pid ) - ), - - TP_fast_assign( - memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN); - __entry->parent_pid = parent->pid; - memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN); - __entry->child_pid = child->pid; - ), - - TP_printk("parent %s:%d child %s:%d", - __entry->parent_comm, __entry->parent_pid, - __entry->child_comm, __entry->child_pid) -); - -/* - * Tracepoint for sending a signal: - */ -TRACE_EVENT(sched_signal_send, - - TP_PROTO(int sig, struct task_struct *p), - - TP_ARGS(sig, p), - - TP_STRUCT__entry( - __field( int, sig ) - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->sig = sig; - ), - - TP_printk("sig: %d task %s:%d", - __entry->sig, __entry->comm, __entry->pid) -); - -#undef TRACE_SYSTEM diff --git a/include/trace/skb.h b/include/trace/skb.h index d2de7174a6e..e6fd281f7f8 100644 --- a/include/trace/skb.h +++ b/include/trace/skb.h @@ -1,9 +1,37 @@ -#ifndef _TRACE_SKB_H_ -#define _TRACE_SKB_H_ +#if !defined(_TRACE_SKB_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_SKB_H #include #include -#include +#undef TRACE_SYSTEM +#define TRACE_SYSTEM skb -#endif +/* + * Tracepoint for free an sk_buff: + */ +TRACE_EVENT(kfree_skb, + + TP_PROTO(struct sk_buff *skb, void *location), + + TP_ARGS(skb, location), + + TP_STRUCT__entry( + __field( void *, skbaddr ) + __field( unsigned short, protocol ) + __field( void *, location ) + ), + + TP_fast_assign( + __entry->skbaddr = skb; + if (skb) { + __entry->protocol = ntohs(skb->protocol); + } + __entry->location = location; + ), + + TP_printk("skbaddr=%p protocol=%u location=%p", + __entry->skbaddr, __entry->protocol, __entry->location) +); + +#endif /* _TRACE_SKB_H */ diff --git a/include/trace/skb_event_types.h b/include/trace/skb_event_types.h deleted file mode 100644 index 4a1c504c0e1..00000000000 --- a/include/trace/skb_event_types.h +++ /dev/null @@ -1,38 +0,0 @@ - -/* use instead */ -#ifndef TRACE_EVENT -# error Do not include this file directly. -# error Unless you know what you are doing. -#endif - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM skb - -/* - * Tracepoint for free an sk_buff: - */ -TRACE_EVENT(kfree_skb, - - TP_PROTO(struct sk_buff *skb, void *location), - - TP_ARGS(skb, location), - - TP_STRUCT__entry( - __field( void *, skbaddr ) - __field( unsigned short, protocol ) - __field( void *, location ) - ), - - TP_fast_assign( - __entry->skbaddr = skb; - if (skb) { - __entry->protocol = ntohs(skb->protocol); - } - __entry->location = location; - ), - - TP_printk("skbaddr=%p protocol=%u location=%p", - __entry->skbaddr, __entry->protocol, __entry->location) -); - -#undef TRACE_SYSTEM diff --git a/include/trace/trace_event_types.h b/include/trace/trace_event_types.h deleted file mode 100644 index 552a50e169a..00000000000 --- a/include/trace/trace_event_types.h +++ /dev/null @@ -1,7 +0,0 @@ -/* trace/_event_types.h here */ - -#include -#include -#include -#include -#include diff --git a/kernel/trace/events.c b/kernel/trace/events.c index 246f2aa6dc4..5a35a914f0e 100644 --- a/kernel/trace/events.c +++ b/kernel/trace/events.c @@ -8,6 +8,7 @@ #include "trace_output.h" +#define TRACE_HEADER_MULTI_READ #include "trace_events_stage_1.h" #include "trace_events_stage_2.h" #include "trace_events_stage_3.h" diff --git a/kernel/trace/trace_events_stage_1.h b/kernel/trace/trace_events_stage_1.h index 38985f9b379..475f46a047a 100644 --- a/kernel/trace/trace_events_stage_1.h +++ b/kernel/trace/trace_events_stage_1.h @@ -1,7 +1,7 @@ /* * Stage 1 of the trace events. * - * Override the macros in to include the following: + * Override the macros in to include the following: * * struct ftrace_raw_ { * struct trace_entry ent; @@ -36,4 +36,4 @@ }; \ static struct ftrace_event_call event_##name -#include +#include diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h index 59cfd7dfe68..aa4a67a0656 100644 --- a/kernel/trace/trace_events_stage_2.h +++ b/kernel/trace/trace_events_stage_2.h @@ -1,7 +1,7 @@ /* * Stage 2 of the trace events. * - * Override the macros in to include the following: + * Override the macros in to include the following: * * enum print_line_t * ftrace_raw_output_(struct trace_iterator *iter, int flags) @@ -64,7 +64,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ return TRACE_TYPE_HANDLED; \ } -#include +#include /* * Setup the showing format of trace point. @@ -128,7 +128,7 @@ ftrace_format_##call(struct trace_seq *s) \ return ret; \ } -#include +#include #undef __field #define __field(type, item) \ @@ -167,4 +167,4 @@ ftrace_define_fields_##call(void) \ return ret; \ } -#include +#include diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h index 5bb1b7ffbdb..45c04e1f38d 100644 --- a/kernel/trace/trace_events_stage_3.h +++ b/kernel/trace/trace_events_stage_3.h @@ -1,7 +1,7 @@ /* * Stage 3 of the trace events. * - * Override the macros in to include the following: + * Override the macros in to include the following: * * static void ftrace_event_(proto) * { @@ -272,7 +272,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ _TRACE_PROFILE_INIT(call) \ } -#include +#include #undef _TRACE_PROFILE #undef _TRACE_PROFILE_INIT -- cgit v1.2.3-70-g09d2 From 78ddb08feb7d4fbe3c0a9931804c51ee58be4023 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 14 Apr 2009 16:53:05 +0200 Subject: wait: don't use __wake_up_common() '777c6c5 wait: prevent exclusive waiter starvation' made __wake_up_common() global to be used from abort_exclusive_wait(). It was needed to do a wake-up with the waitqueue lock held while passing down a key to the wake-up function. Since '4ede816 epoll keyed wakeups: add __wake_up_locked_key() and __wake_up_sync_key()' there is an appropriate wrapper for this case: __wake_up_locked_key(). Use it here and make __wake_up_common() private to the scheduler again. Signed-off-by: Johannes Weiner Cc: Andrew Morton Cc: Peter Zijlstra LKML-Reference: <1239720785-19661-1-git-send-email-hannes@cmpxchg.org> Signed-off-by: Ingo Molnar --- include/linux/wait.h | 2 -- kernel/sched.c | 2 +- kernel/wait.c | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/wait.h b/include/linux/wait.h index 5d631c17eae..67d4e89b62d 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -132,8 +132,6 @@ static inline void __remove_wait_queue(wait_queue_head_t *head, list_del(&old->task_list); } -void __wake_up_common(wait_queue_head_t *q, unsigned int mode, - int nr_exclusive, int sync, void *key); void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key); void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key); void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, diff --git a/kernel/sched.c b/kernel/sched.c index 36d213bca47..92b4b56ad09 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5345,7 +5345,7 @@ EXPORT_SYMBOL(default_wake_function); * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns * zero in this (rare) case, and we handle it by continuing to scan the queue. */ -void __wake_up_common(wait_queue_head_t *q, unsigned int mode, +static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, int nr_exclusive, int sync, void *key) { wait_queue_t *curr, *next; diff --git a/kernel/wait.c b/kernel/wait.c index 42a2dbc181c..ea7c3b4275c 100644 --- a/kernel/wait.c +++ b/kernel/wait.c @@ -154,7 +154,7 @@ void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, if (!list_empty(&wait->task_list)) list_del_init(&wait->task_list); else if (waitqueue_active(q)) - __wake_up_common(q, mode, 1, 0, key); + __wake_up_locked_key(q, mode, key); spin_unlock_irqrestore(&q->lock, flags); } EXPORT_SYMBOL(abort_exclusive_wait); -- cgit v1.2.3-70-g09d2 From 72c6a9870f901045f2464c3dc6ee8914bfdc07aa Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 14 Apr 2009 17:33:57 +0200 Subject: rculist.h: introduce list_entry_rcu() and list_first_entry_rcu() I've run into the situation where I need to use list_first_entry with rcu-guarded list. This patch introduces this. Also simplify list_for_each_entry_rcu() to use new list_entry_rcu() instead of list_entry(). Signed-off-by: Jiri Pirko Reviewed-by: Paul E. McKenney Cc: dipankar@in.ibm.com LKML-Reference: <20090414153356.GC3999@psychotron.englab.brq.redhat.com> Signed-off-by: Ingo Molnar --- include/linux/rculist.h | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index e649bd3f2c9..5710f43bbc9 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -198,6 +198,32 @@ static inline void list_splice_init_rcu(struct list_head *list, at->prev = last; } +/** + * list_entry_rcu - get the struct for this entry + * @ptr: the &struct list_head pointer. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_struct within the struct. + * + * This primitive may safely run concurrently with the _rcu list-mutation + * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). + */ +#define list_entry_rcu(ptr, type, member) \ + container_of(rcu_dereference(ptr), type, member) + +/** + * list_first_entry_rcu - get the first element from a list + * @ptr: the list head to take the element from. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_struct within the struct. + * + * Note, that list is expected to be not empty. + * + * This primitive may safely run concurrently with the _rcu list-mutation + * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). + */ +#define list_first_entry_rcu(ptr, type, member) \ + list_entry_rcu((ptr)->next, type, member) + #define __list_for_each_rcu(pos, head) \ for (pos = rcu_dereference((head)->next); \ pos != (head); \ @@ -214,9 +240,9 @@ static inline void list_splice_init_rcu(struct list_head *list, * as long as the traversal is guarded by rcu_read_lock(). */ #define list_for_each_entry_rcu(pos, head, member) \ - for (pos = list_entry(rcu_dereference((head)->next), typeof(*pos), member); \ + for (pos = list_entry_rcu((head)->next, typeof(*pos), member); \ prefetch(pos->member.next), &pos->member != (head); \ - pos = list_entry(rcu_dereference(pos->member.next), typeof(*pos), member)) + pos = list_entry_rcu(pos->member.next, typeof(*pos), member)) /** -- cgit v1.2.3-70-g09d2 From a8d154b009168337494fbf345671bab74d3e4b8b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 10 Apr 2009 09:36:00 -0400 Subject: tracing: create automated trace defines This patch lowers the number of places a developer must modify to add new tracepoints. The current method to add a new tracepoint into an existing system is to write the trace point macro in the trace header with one of the macros TRACE_EVENT, TRACE_FORMAT or DECLARE_TRACE, then they must add the same named item into the C file with the macro DEFINE_TRACE(name) and then add the trace point. This change cuts out the needing to add the DEFINE_TRACE(name). Every file that uses the tracepoint must still include the trace/.h file, but the one C file must also add a define before the including of that file. #define CREATE_TRACE_POINTS #include This will cause the trace/mytrace.h file to also produce the C code necessary to implement the trace point. Note, if more than one trace/.h is used to create the C code it is best to list them all together. #define CREATE_TRACE_POINTS #include #include #include Thanks to Mathieu Desnoyers and Christoph Hellwig for coming up with the cleaner solution of the define above the includes over my first design to have the C code include a "special" header. This patch converts sched, irq and lockdep and skb to use this new method. Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Neil Horman Cc: Zhao Lei Cc: Eduard - Gabriel Munteanu Cc: Pekka Enberg Signed-off-by: Steven Rostedt --- include/trace/define_trace.h | 75 ++++++++++++++++++++++++++++++++++++++++++++ include/trace/irq.h | 5 ++- include/trace/kmem.h | 4 ++- include/trace/lockdep.h | 3 ++ include/trace/sched.h | 3 ++ include/trace/skb.h | 3 ++ kernel/exit.c | 4 --- kernel/fork.c | 2 -- kernel/irq/handle.c | 7 ++--- kernel/kthread.c | 3 -- kernel/lockdep.c | 12 ++----- kernel/sched.c | 10 ++---- kernel/signal.c | 2 -- kernel/softirq.c | 3 -- mm/util.c | 11 ++----- net/core/net-traces.c | 4 +-- 16 files changed, 105 insertions(+), 46 deletions(-) create mode 100644 include/trace/define_trace.h (limited to 'include') diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h new file mode 100644 index 00000000000..de9dc7d8508 --- /dev/null +++ b/include/trace/define_trace.h @@ -0,0 +1,75 @@ +/* + * Trace files that want to automate creationg of all tracepoints defined + * in their file should include this file. The following are macros that the + * trace file may define: + * + * TRACE_SYSTEM defines the system the tracepoint is for + * + * TRACE_INCLUDE_FILE if the file name is something other than TRACE_SYSTEM.h + * This macro may be defined to tell define_trace.h what file to include. + * Note, leave off the ".h". + * + * TRACE_INCLUDE_PATH if the path is something other than core kernel include/trace + * then this macro can define the path to use. Note, the path is relative to + * define_trace.h, not the file including it. Full path names for out of tree + * modules must be used. + */ + +#ifdef CREATE_TRACE_POINTS + +/* Prevent recursion */ +#undef CREATE_TRACE_POINTS + +#include + +#undef TRACE_EVENT +#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ + DEFINE_TRACE(name) + +#undef TRACE_FORMAT +#define TRACE_FORMAT(name, proto, args, print) \ + DEFINE_TRACE(name) + +#undef DECLARE_TRACE +#define DECLARE_TRACE(name, proto, args) \ + DEFINE_TRACE(name) + +#undef TRACE_INCLUDE +#undef __TRACE_INCLUDE + +#ifndef TRACE_INCLUDE_FILE +# define TRACE_INCLUDE_FILE TRACE_SYSTEM +# define UNDEF_TRACE_INCLUDE_FILE +#endif + +#ifndef TRACE_INCLUDE_PATH +# define __TRACE_INCLUDE(system) +# define UNDEF_TRACE_INCLUDE_FILE +#else +# define __TRACE_INCLUDE(system) __stringify(TRACE_INCLUDE_PATH/system.h) +#endif + +# define TRACE_INCLUDE(system) __TRACE_INCLUDE(system) + +/* Let the trace headers be reread */ +#define TRACE_HEADER_MULTI_READ + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +#undef TRACE_HEADER_MULTI_READ + +/* Only undef what we defined in this file */ +#ifdef UNDEF_TRACE_INCLUDE_FILE +# undef TRACE_INCLUDE_PATH +# undef UNDEF_TRACE_INCLUDE_FILE +#endif + +#ifdef UNDEF_TRACE_INCLUDE_FILE +# undef TRACE_INCLUDE_PATH +# undef UNDEF_TRACE_INCLUDE_FILE +#endif + +/* We may be processing more files */ +#define CREATE_TRACE_POINTS + +#endif /* CREATE_TRACE_POINTS */ diff --git a/include/trace/irq.h b/include/trace/irq.h index 04ab4c65222..75e3468e449 100644 --- a/include/trace/irq.h +++ b/include/trace/irq.h @@ -51,4 +51,7 @@ TRACE_FORMAT(softirq_exit, TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) ); -#endif +#endif /* _TRACE_IRQ_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/kmem.h b/include/trace/kmem.h index d7d12189e5c..c22c42f980b 100644 --- a/include/trace/kmem.h +++ b/include/trace/kmem.h @@ -188,5 +188,7 @@ TRACE_EVENT(kmem_cache_free, TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) ); +#endif /* _TRACE_KMEM_H */ -#endif +/* This part must be outside protection */ +#include diff --git a/include/trace/lockdep.h b/include/trace/lockdep.h index 8ee7900b38c..4d301e758de 100644 --- a/include/trace/lockdep.h +++ b/include/trace/lockdep.h @@ -55,3 +55,6 @@ TRACE_EVENT(lock_acquired, #endif #endif /* _TRACE_LOCKDEP_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/sched.h b/include/trace/sched.h index 5b1cf4a2846..ffa1cab586b 100644 --- a/include/trace/sched.h +++ b/include/trace/sched.h @@ -334,3 +334,6 @@ TRACE_EVENT(sched_signal_send, ); #endif /* _TRACE_SCHED_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/skb.h b/include/trace/skb.h index e6fd281f7f8..1e8fabb57c0 100644 --- a/include/trace/skb.h +++ b/include/trace/skb.h @@ -35,3 +35,6 @@ TRACE_EVENT(kfree_skb, ); #endif /* _TRACE_SKB_H */ + +/* This part must be outside protection */ +#include diff --git a/kernel/exit.c b/kernel/exit.c index abf9cf3b95c..2fe9d2c7eee 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -56,10 +56,6 @@ #include #include "cred-internals.h" -DEFINE_TRACE(sched_process_free); -DEFINE_TRACE(sched_process_exit); -DEFINE_TRACE(sched_process_wait); - static void exit_mm(struct task_struct * tsk); static void __unhash_process(struct task_struct *p) diff --git a/kernel/fork.c b/kernel/fork.c index b9e2edd0072..4bebf263923 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -83,8 +83,6 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0; __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ -DEFINE_TRACE(sched_process_fork); - int nr_processes(void) { int cpu; diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index d82142be8dd..983d8be8dff 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -17,9 +17,11 @@ #include #include #include -#include #include +#define CREATE_TRACE_POINTS +#include + #include "internals.h" /* @@ -348,9 +350,6 @@ static void warn_no_thread(unsigned int irq, struct irqaction *action) "but no thread function available.", irq, action->name); } -DEFINE_TRACE(irq_handler_entry); -DEFINE_TRACE(irq_handler_exit); - /** * handle_IRQ_event - irq action chain handler * @irq: the interrupt number diff --git a/kernel/kthread.c b/kernel/kthread.c index 4ebaf8519ab..e1c76924545 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -21,9 +21,6 @@ static DEFINE_SPINLOCK(kthread_create_lock); static LIST_HEAD(kthread_create_list); struct task_struct *kthreadd_task; -DEFINE_TRACE(sched_kthread_stop); -DEFINE_TRACE(sched_kthread_stop_ret); - struct kthread_create_info { /* Information passed to kthread() from kthreadd. */ diff --git a/kernel/lockdep.c b/kernel/lockdep.c index c4582a6ea95..257f21a76c5 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -42,12 +42,14 @@ #include #include #include -#include #include #include "lockdep_internals.h" +#define CREATE_TRACE_POINTS +#include + #ifdef CONFIG_PROVE_LOCKING int prove_locking = 1; module_param(prove_locking, int, 0644); @@ -2929,8 +2931,6 @@ void lock_set_class(struct lockdep_map *lock, const char *name, } EXPORT_SYMBOL_GPL(lock_set_class); -DEFINE_TRACE(lock_acquire); - /* * We are not always called with irqs disabled - do that here, * and also avoid lockdep recursion: @@ -2957,8 +2957,6 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, } EXPORT_SYMBOL_GPL(lock_acquire); -DEFINE_TRACE(lock_release); - void lock_release(struct lockdep_map *lock, int nested, unsigned long ip) { @@ -3061,8 +3059,6 @@ found_it: put_lock_stats(stats); } -DEFINE_TRACE(lock_acquired); - static void __lock_acquired(struct lockdep_map *lock, unsigned long ip) { @@ -3118,8 +3114,6 @@ found_it: lock->ip = ip; } -DEFINE_TRACE(lock_contended); - void lock_contended(struct lockdep_map *lock, unsigned long ip) { unsigned long flags; diff --git a/kernel/sched.c b/kernel/sched.c index 5724508c3b6..e6d4518d47e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -72,13 +72,15 @@ #include #include #include -#include #include #include #include "sched_cpupri.h" +#define CREATE_TRACE_POINTS +#include + /* * Convert user-nice values [ -20 ... 0 ... 19 ] * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], @@ -118,12 +120,6 @@ */ #define RUNTIME_INF ((u64)~0ULL) -DEFINE_TRACE(sched_wait_task); -DEFINE_TRACE(sched_wakeup); -DEFINE_TRACE(sched_wakeup_new); -DEFINE_TRACE(sched_switch); -DEFINE_TRACE(sched_migrate_task); - #ifdef CONFIG_SMP static void double_rq_lock(struct rq *rq1, struct rq *rq2); diff --git a/kernel/signal.c b/kernel/signal.c index d8034737db4..1d5703ff003 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -41,8 +41,6 @@ static struct kmem_cache *sigqueue_cachep; -DEFINE_TRACE(sched_signal_send); - static void __user *sig_handler(struct task_struct *t, int sig) { return t->sighand->action[sig - 1].sa.sa_handler; diff --git a/kernel/softirq.c b/kernel/softirq.c index 2fecefacdc5..a2d9b458ac2 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -186,9 +186,6 @@ EXPORT_SYMBOL(local_bh_enable_ip); */ #define MAX_SOFTIRQ_RESTART 10 -DEFINE_TRACE(softirq_entry); -DEFINE_TRACE(softirq_exit); - asmlinkage void __do_softirq(void) { struct softirq_action *h; diff --git a/mm/util.c b/mm/util.c index 2599e83eea1..0e74a22791c 100644 --- a/mm/util.c +++ b/mm/util.c @@ -4,9 +4,11 @@ #include #include #include -#include #include +#define CREATE_TRACE_POINTS +#include + /** * kstrdup - allocate space for and copy an existing string * @s: the string to duplicate @@ -239,13 +241,6 @@ int __attribute__((weak)) get_user_pages_fast(unsigned long start, EXPORT_SYMBOL_GPL(get_user_pages_fast); /* Tracepoints definitions. */ -DEFINE_TRACE(kmalloc); -DEFINE_TRACE(kmem_cache_alloc); -DEFINE_TRACE(kmalloc_node); -DEFINE_TRACE(kmem_cache_alloc_node); -DEFINE_TRACE(kfree); -DEFINE_TRACE(kmem_cache_free); - EXPORT_TRACEPOINT_SYMBOL(kmalloc); EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc); EXPORT_TRACEPOINT_SYMBOL(kmalloc_node); diff --git a/net/core/net-traces.c b/net/core/net-traces.c index c8fb45665e4..80177205947 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c @@ -19,11 +19,11 @@ #include #include #include -#include #include #include +#define CREATE_TRACE_POINTS +#include -DEFINE_TRACE(kfree_skb); EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); -- cgit v1.2.3-70-g09d2 From 9504504cbab29ecb694186b1c5b15d3579c43c51 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 11 Apr 2009 12:59:57 -0400 Subject: tracing: make trace_seq operations available for core kernel In the process to make TRACE_EVENT macro work for modules, the trace_seq operations must be available for core kernel code. These operations are quite useful and can be used for other implementations. The main idea is that we create a trace_seq handle that acts very much like the seq_file handle. struct trace_seq *s = kmalloc(sizeof(*s, GFP_KERNEL); trace_seq_init(s); trace_seq_printf(s, "some data %d\n", variable); printk("%s", s->buffer); The main use is to allow a top level function call several other functions that may store printf like data into the buffer. Then at the end, the top level function can process all the data with any method it would like to. It could be passed to userspace, output via printk or even use seq_file: trace_seq_to_user(s, ubuf, cnt); seq_puts(m, s->buffer); Signed-off-by: Steven Rostedt --- include/linux/trace_seq.h | 89 +++++++++++++++++++++++++++++++++++++++++++++ kernel/trace/trace.h | 15 +------- kernel/trace/trace_output.h | 16 +------- 3 files changed, 92 insertions(+), 28 deletions(-) create mode 100644 include/linux/trace_seq.h (limited to 'include') diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h new file mode 100644 index 00000000000..28051da876d --- /dev/null +++ b/include/linux/trace_seq.h @@ -0,0 +1,89 @@ +#ifndef _LINUX_TRACE_SEQ_H +#define _LINUX_TRACE_SEQ_H + +/* + * Trace sequences are used to allow a function to call several other functions + * to create a string of data to use (up to a max of PAGE_SIZE. + */ + +struct trace_seq { + unsigned char buffer[PAGE_SIZE]; + unsigned int len; + unsigned int readpos; +}; + +static inline void +trace_seq_init(struct trace_seq *s) +{ + s->len = 0; + s->readpos = 0; +} + +/* + * Currently only defined when tracing is enabled. + */ +#ifdef CONFIG_TRACING +extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); +extern int +trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary); +extern void trace_print_seq(struct seq_file *m, struct trace_seq *s); +extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, + size_t cnt); +extern int trace_seq_puts(struct trace_seq *s, const char *str); +extern int trace_seq_putc(struct trace_seq *s, unsigned char c); +extern int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len); +extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, + size_t len); +extern void *trace_seq_reserve(struct trace_seq *s, size_t len); +extern int trace_seq_path(struct trace_seq *s, struct path *path); + +#else /* CONFIG_TRACING */ +static inline int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))) +{ + return 0; +} +static inline int +trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) +{ + return 0; +} + +static inline void trace_print_seq(struct seq_file *m, struct trace_seq *s) +{ +} +static inline ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, + size_t cnt) +{ + return 0; +} +static inline int trace_seq_puts(struct trace_seq *s, const char *str) +{ + return 0; +} +static inline int trace_seq_putc(struct trace_seq *s, unsigned char c); +{ + return 0; +} +static inline int +trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len) +{ + return 0; +} +static inline int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, + size_t len) +{ + return 0; +} +static inline void *trace_seq_reserve(struct trace_seq *s, size_t len) +{ + return NULL; +} +static inline int trace_seq_path(struct trace_seq *s, struct path *path) +{ + return 0; +} +#endif /* CONFIG_TRACING */ + +#endif /* _LINUX_TRACE_SEQ_H */ diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b05b6ac982a..1882846b738 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -12,6 +12,8 @@ #include #include +#include + enum trace_type { __TRACE_FIRST_TYPE = 0, @@ -423,19 +425,6 @@ struct tracer { struct tracer_stat *stats; }; -struct trace_seq { - unsigned char buffer[PAGE_SIZE]; - unsigned int len; - unsigned int readpos; -}; - -static inline void -trace_seq_init(struct trace_seq *s) -{ - s->len = 0; - s->readpos = 0; -} - #define TRACE_PIPE_ALL_CPU -1 diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h index 91630217fb4..5c7cbfb65c7 100644 --- a/kernel/trace/trace_output.h +++ b/kernel/trace/trace_output.h @@ -1,6 +1,7 @@ #ifndef __TRACE_EVENTS_H #define __TRACE_EVENTS_H +#include #include "trace.h" typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter, @@ -20,24 +21,9 @@ trace_print_bprintk_msg_only(struct trace_iterator *iter); extern enum print_line_t trace_print_printk_msg_only(struct trace_iterator *iter); -extern void trace_print_seq(struct seq_file *m, struct trace_seq *s); - -extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -extern int -trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary); extern int seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags); -extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, - size_t cnt); -extern int trace_seq_puts(struct trace_seq *s, const char *str); -extern int trace_seq_putc(struct trace_seq *s, unsigned char c); -extern int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len); -extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, - size_t len); -extern void *trace_seq_reserve(struct trace_seq *s, size_t len); -extern int trace_seq_path(struct trace_seq *s, struct path *path); extern int seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s, unsigned long sym_flags); extern int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, -- cgit v1.2.3-70-g09d2 From 97f2025153499faa17267a0d4e18c7afaf73f39d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 13 Apr 2009 11:20:49 -0400 Subject: tracing/events: move declarations from trace directory to core include In preparation to allowing trace events to happen in modules, we need to move some of the local declarations in the kernel/trace directory into include/linux. This patch simply moves the declarations and performs no context changes. Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 146 +++++++++++++++++++++++++++++++++++++++++++ kernel/trace/trace.h | 120 +---------------------------------- kernel/trace/trace_output.h | 14 ----- 3 files changed, 147 insertions(+), 133 deletions(-) create mode 100644 include/linux/ftrace_event.h (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h new file mode 100644 index 00000000000..496b76d9f9d --- /dev/null +++ b/include/linux/ftrace_event.h @@ -0,0 +1,146 @@ +#ifndef _LINUX_FTRACE_EVENT_H +#define _LINUX_FTRACE_EVENT_H + +#include +#include + + +struct trace_array; +struct tracer; + +/* + * The trace entry - the most basic unit of tracing. This is what + * is printed in the end as a single line in the trace output, such as: + * + * bash-15816 [01] 235.197585: idle_cpu <- irq_enter + */ +struct trace_entry { + unsigned char type; + unsigned char flags; + unsigned char preempt_count; + int pid; + int tgid; +}; + +/* + * Trace iterator - used by printout routines who present trace + * results to users and which routines might sleep, etc: + */ +struct trace_iterator { + struct trace_array *tr; + struct tracer *trace; + void *private; + int cpu_file; + struct mutex mutex; + struct ring_buffer_iter *buffer_iter[NR_CPUS]; + + /* The below is zeroed out in pipe_read */ + struct trace_seq seq; + struct trace_entry *ent; + int cpu; + u64 ts; + + unsigned long iter_flags; + loff_t pos; + long idx; + + cpumask_var_t started; +}; + + +typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter, + int flags); +struct trace_event { + struct hlist_node node; + int type; + trace_print_func trace; + trace_print_func raw; + trace_print_func hex; + trace_print_func binary; +}; + +extern int register_ftrace_event(struct trace_event *event); +extern int unregister_ftrace_event(struct trace_event *event); + +/* Return values for print_line callback */ +enum print_line_t { + TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */ + TRACE_TYPE_HANDLED = 1, + TRACE_TYPE_UNHANDLED = 2, /* Relay to other output functions */ + TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */ +}; + + +struct ring_buffer_event * +trace_current_buffer_lock_reserve(unsigned char type, unsigned long len, + unsigned long flags, int pc); +void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, + unsigned long flags, int pc); +void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event, + unsigned long flags, int pc); +void trace_current_buffer_discard_commit(struct ring_buffer_event *event); + +void tracing_record_cmdline(struct task_struct *tsk); + +struct ftrace_event_call { + char *name; + char *system; + struct dentry *dir; + int enabled; + int (*regfunc)(void); + void (*unregfunc)(void); + int id; + int (*raw_init)(void); + int (*show_format)(struct trace_seq *s); + int (*define_fields)(void); + struct list_head fields; + int n_preds; + struct filter_pred **preds; + +#ifdef CONFIG_EVENT_PROFILE + atomic_t profile_count; + int (*profile_enable)(struct ftrace_event_call *); + void (*profile_disable)(struct ftrace_event_call *); +#endif +}; + +#define MAX_FILTER_PRED 8 +#define MAX_FILTER_STR_VAL 128 + +extern int init_preds(struct ftrace_event_call *call); +extern int filter_match_preds(struct ftrace_event_call *call, void *rec); +extern int filter_current_check_discard(struct ftrace_event_call *call, + void *rec, + struct ring_buffer_event *event); + +extern int trace_define_field(struct ftrace_event_call *call, char *type, + char *name, int offset, int size); + + +/* + * The double __builtin_constant_p is because gcc will give us an error + * if we try to allocate the static variable to fmt if it is not a + * constant. Even with the outer if statement optimizing out. + */ +#define event_trace_printk(ip, fmt, args...) \ +do { \ + __trace_printk_check_format(fmt, ##args); \ + tracing_record_cmdline(current); \ + if (__builtin_constant_p(fmt)) { \ + static const char *trace_printk_fmt \ + __attribute__((section("__trace_printk_fmt"))) = \ + __builtin_constant_p(fmt) ? fmt : NULL; \ + \ + __trace_bprintk(ip, trace_printk_fmt, ##args); \ + } else \ + __trace_printk(ip, fmt, ##args); \ +} while (0) + +#define __common_field(type, item) \ + ret = trace_define_field(event_call, #type, "common_" #item, \ + offsetof(typeof(field.ent), item), \ + sizeof(field.ent.item)); \ + if (ret) \ + return ret; + +#endif /* _LINUX_FTRACE_EVENT_H */ diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 1882846b738..6bcdf4af9b2 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -13,6 +13,7 @@ #include #include +#include enum trace_type { __TRACE_FIRST_TYPE = 0, @@ -43,20 +44,6 @@ enum trace_type { __TRACE_LAST_TYPE, }; -/* - * The trace entry - the most basic unit of tracing. This is what - * is printed in the end as a single line in the trace output, such as: - * - * bash-15816 [01] 235.197585: idle_cpu <- irq_enter - */ -struct trace_entry { - unsigned char type; - unsigned char flags; - unsigned char preempt_count; - int pid; - int tgid; -}; - /* * Function trace entry - function address and parent function addres: */ @@ -265,8 +252,6 @@ struct trace_array_cpu { char comm[TASK_COMM_LEN]; }; -struct trace_iterator; - /* * The trace array - an array of per-CPU trace arrays. This is the * highest level data structure that individual tracers deal with. @@ -341,15 +326,6 @@ extern void __ftrace_bad_type(void); __ftrace_bad_type(); \ } while (0) -/* Return values for print_line callback */ -enum print_line_t { - TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */ - TRACE_TYPE_HANDLED = 1, - TRACE_TYPE_UNHANDLED = 2, /* Relay to other output functions */ - TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */ -}; - - /* * An option specific to a tracer. This is a boolean value. * The bit is the bit index that sets its value on the @@ -428,31 +404,6 @@ struct tracer { #define TRACE_PIPE_ALL_CPU -1 -/* - * Trace iterator - used by printout routines who present trace - * results to users and which routines might sleep, etc: - */ -struct trace_iterator { - struct trace_array *tr; - struct tracer *trace; - void *private; - int cpu_file; - struct mutex mutex; - struct ring_buffer_iter *buffer_iter[NR_CPUS]; - - /* The below is zeroed out in pipe_read */ - struct trace_seq seq; - struct trace_entry *ent; - int cpu; - u64 ts; - - unsigned long iter_flags; - loff_t pos; - long idx; - - cpumask_var_t started; -}; - int tracer_init(struct tracer *t, struct trace_array *tr); int tracing_is_enabled(void); void trace_wake_up(void); @@ -479,15 +430,6 @@ void trace_buffer_unlock_commit(struct trace_array *tr, struct ring_buffer_event *event, unsigned long flags, int pc); -struct ring_buffer_event * -trace_current_buffer_lock_reserve(unsigned char type, unsigned long len, - unsigned long flags, int pc); -void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, - unsigned long flags, int pc); -void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event, - unsigned long flags, int pc); -void trace_current_buffer_discard_commit(struct ring_buffer_event *event); - struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data); @@ -510,7 +452,6 @@ void tracing_sched_switch_trace(struct trace_array *tr, struct task_struct *prev, struct task_struct *next, unsigned long flags, int pc); -void tracing_record_cmdline(struct task_struct *tsk); void tracing_sched_wakeup_trace(struct trace_array *tr, struct task_struct *wakee, @@ -790,28 +731,6 @@ struct ftrace_event_field { int size; }; -struct ftrace_event_call { - char *name; - char *system; - struct dentry *dir; - int enabled; - int (*regfunc)(void); - void (*unregfunc)(void); - int id; - int (*raw_init)(void); - int (*show_format)(struct trace_seq *s); - int (*define_fields)(void); - struct list_head fields; - int n_preds; - struct filter_pred **preds; - -#ifdef CONFIG_EVENT_PROFILE - atomic_t profile_count; - int (*profile_enable)(struct ftrace_event_call *); - void (*profile_disable)(struct ftrace_event_call *); -#endif -}; - struct event_subsystem { struct list_head list; const char *name; @@ -825,9 +744,6 @@ struct event_subsystem { (unsigned long)event < (unsigned long)__stop_ftrace_events; \ event++) -#define MAX_FILTER_PRED 8 -#define MAX_FILTER_STR_VAL 128 - struct filter_pred; typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event); @@ -845,9 +761,6 @@ struct filter_pred { int clear; }; -int trace_define_field(struct ftrace_event_call *call, char *type, - char *name, int offset, int size); -extern int init_preds(struct ftrace_event_call *call); extern void filter_free_pred(struct filter_pred *pred); extern void filter_print_preds(struct filter_pred **preds, int n_preds, struct trace_seq *s); @@ -855,13 +768,9 @@ extern int filter_parse(char **pbuf, struct filter_pred *pred); extern int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred); extern void filter_disable_preds(struct ftrace_event_call *call); -extern int filter_match_preds(struct ftrace_event_call *call, void *rec); extern void filter_free_subsystem_preds(struct event_subsystem *system); extern int filter_add_subsystem_pred(struct event_subsystem *system, struct filter_pred *pred); -extern int filter_current_check_discard(struct ftrace_event_call *call, - void *rec, - struct ring_buffer_event *event); static inline int filter_check_discard(struct ftrace_event_call *call, void *rec, @@ -876,14 +785,6 @@ filter_check_discard(struct ftrace_event_call *call, void *rec, return 0; } -#define __common_field(type, item) \ - ret = trace_define_field(event_call, #type, "common_" #item, \ - offsetof(typeof(field.ent), item), \ - sizeof(field.ent.item)); \ - if (ret) \ - return ret; - -void event_trace_printk(unsigned long ip, const char *fmt, ...); extern struct ftrace_event_call __start_ftrace_events[]; extern struct ftrace_event_call __stop_ftrace_events[]; @@ -895,25 +796,6 @@ extern struct ftrace_event_call __stop_ftrace_events[]; extern const char *__start___trace_bprintk_fmt[]; extern const char *__stop___trace_bprintk_fmt[]; -/* - * The double __builtin_constant_p is because gcc will give us an error - * if we try to allocate the static variable to fmt if it is not a - * constant. Even with the outer if statement optimizing out. - */ -#define event_trace_printk(ip, fmt, args...) \ -do { \ - __trace_printk_check_format(fmt, ##args); \ - tracing_record_cmdline(current); \ - if (__builtin_constant_p(fmt)) { \ - static const char *trace_printk_fmt \ - __attribute__((section("__trace_printk_fmt"))) = \ - __builtin_constant_p(fmt) ? fmt : NULL; \ - \ - __trace_bprintk(ip, trace_printk_fmt, ##args); \ - } else \ - __trace_printk(ip, fmt, ##args); \ -} while (0) - #undef TRACE_EVENT_FORMAT #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ extern struct ftrace_event_call event_##call; diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h index 5c7cbfb65c7..6e220a8e570 100644 --- a/kernel/trace/trace_output.h +++ b/kernel/trace/trace_output.h @@ -4,18 +4,6 @@ #include #include "trace.h" -typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter, - int flags); - -struct trace_event { - struct hlist_node node; - int type; - trace_print_func trace; - trace_print_func raw; - trace_print_func hex; - trace_print_func binary; -}; - extern enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter); extern enum print_line_t @@ -33,8 +21,6 @@ extern int trace_print_context(struct trace_iterator *iter); extern int trace_print_lat_context(struct trace_iterator *iter); extern struct trace_event *ftrace_find_event(int type); -extern int register_ftrace_event(struct trace_event *event); -extern int unregister_ftrace_event(struct trace_event *event); extern enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags); -- cgit v1.2.3-70-g09d2 From f42c85e74faa422cf0bc747ed808681145448f88 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 13 Apr 2009 12:25:37 -0400 Subject: tracing/events: move the ftrace event tracing code to core This patch moves the ftrace creation into include/trace/ftrace.h and simplifies the work of developers in adding new tracepoints. Just the act of creating the trace points in include/trace and including define_trace.h will create the events in the debugfs/tracing/events directory. This patch removes the need of include/trace/trace_events.h Signed-off-by: Steven Rostedt --- include/trace/define_trace.h | 4 + include/trace/ftrace.h | 492 ++++++++++++++++++++++++++++++++++++ include/trace/trace_events.h | 7 - kernel/trace/Makefile | 1 - kernel/trace/events.c | 15 -- kernel/trace/trace_events_stage_1.h | 39 --- kernel/trace/trace_events_stage_2.h | 170 ------------- kernel/trace/trace_events_stage_3.h | 279 -------------------- 8 files changed, 496 insertions(+), 511 deletions(-) create mode 100644 include/trace/ftrace.h delete mode 100644 include/trace/trace_events.h delete mode 100644 kernel/trace/events.c delete mode 100644 kernel/trace/trace_events_stage_1.h delete mode 100644 kernel/trace/trace_events_stage_2.h delete mode 100644 kernel/trace/trace_events_stage_3.h (limited to 'include') diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index de9dc7d8508..980eb66a6e3 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -56,6 +56,10 @@ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) +#ifdef CONFIG_EVENT_TRACER +#include +#endif + #undef TRACE_HEADER_MULTI_READ /* Only undef what we defined in this file */ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h new file mode 100644 index 00000000000..955b967acd7 --- /dev/null +++ b/include/trace/ftrace.h @@ -0,0 +1,492 @@ +/* + * Stage 1 of the trace events. + * + * Override the macros in to include the following: + * + * struct ftrace_raw_ { + * struct trace_entry ent; + * ; + * []; + * [...] + * }; + * + * The is created by the __field(type, item) macro or + * the __array(type2, item2, len) macro. + * We simply do "type item;", and that will create the fields + * in the structure. + */ + +#include + +#undef TRACE_FORMAT +#define TRACE_FORMAT(call, proto, args, fmt) + +#undef __array +#define __array(type, item, len) type item[len]; + +#undef __field +#define __field(type, item) type item; + +#undef TP_STRUCT__entry +#define TP_STRUCT__entry(args...) args + +#undef TRACE_EVENT +#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ + struct ftrace_raw_##name { \ + struct trace_entry ent; \ + tstruct \ + }; \ + static struct ftrace_event_call event_##name + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +/* + * Stage 2 of the trace events. + * + * Override the macros in to include the following: + * + * enum print_line_t + * ftrace_raw_output_(struct trace_iterator *iter, int flags) + * { + * struct trace_seq *s = &iter->seq; + * struct ftrace_raw_ *field; <-- defined in stage 1 + * struct trace_entry *entry; + * int ret; + * + * entry = iter->ent; + * + * if (entry->type != event_.id) { + * WARN_ON_ONCE(1); + * return TRACE_TYPE_UNHANDLED; + * } + * + * field = (typeof(field))entry; + * + * ret = trace_seq_printf(s, "\n"); + * if (!ret) + * return TRACE_TYPE_PARTIAL_LINE; + * + * return TRACE_TYPE_HANDLED; + * } + * + * This is the method used to print the raw event to the trace + * output format. Note, this is not needed if the data is read + * in binary. + */ + +#undef __entry +#define __entry field + +#undef TP_printk +#define TP_printk(fmt, args...) fmt "\n", args + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +enum print_line_t \ +ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ +{ \ + struct trace_seq *s = &iter->seq; \ + struct ftrace_raw_##call *field; \ + struct trace_entry *entry; \ + int ret; \ + \ + entry = iter->ent; \ + \ + if (entry->type != event_##call.id) { \ + WARN_ON_ONCE(1); \ + return TRACE_TYPE_UNHANDLED; \ + } \ + \ + field = (typeof(field))entry; \ + \ + ret = trace_seq_printf(s, #call ": " print); \ + if (!ret) \ + return TRACE_TYPE_PARTIAL_LINE; \ + \ + return TRACE_TYPE_HANDLED; \ +} + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +/* + * Setup the showing format of trace point. + * + * int + * ftrace_format_##call(struct trace_seq *s) + * { + * struct ftrace_raw_##call field; + * int ret; + * + * ret = trace_seq_printf(s, #type " " #item ";" + * " offset:%u; size:%u;\n", + * offsetof(struct ftrace_raw_##call, item), + * sizeof(field.type)); + * + * } + */ + +#undef TP_STRUCT__entry +#define TP_STRUCT__entry(args...) args + +#undef __field +#define __field(type, item) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ + "offset:%u;\tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), item), \ + (unsigned int)sizeof(field.item)); \ + if (!ret) \ + return 0; + +#undef __array +#define __array(type, item, len) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ + "offset:%u;\tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), item), \ + (unsigned int)sizeof(field.item)); \ + if (!ret) \ + return 0; + +#undef __entry +#define __entry REC + +#undef TP_printk +#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args) + +#undef TP_fast_assign +#define TP_fast_assign(args...) args + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ +static int \ +ftrace_format_##call(struct trace_seq *s) \ +{ \ + struct ftrace_raw_##call field; \ + int ret; \ + \ + tstruct; \ + \ + trace_seq_printf(s, "\nprint fmt: " print); \ + \ + return ret; \ +} + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +#undef __field +#define __field(type, item) \ + ret = trace_define_field(event_call, #type, #item, \ + offsetof(typeof(field), item), \ + sizeof(field.item)); \ + if (ret) \ + return ret; + +#undef __array +#define __array(type, item, len) \ + BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ + ret = trace_define_field(event_call, #type "[" #len "]", #item, \ + offsetof(typeof(field), item), \ + sizeof(field.item)); \ + if (ret) \ + return ret; + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ +int \ +ftrace_define_fields_##call(void) \ +{ \ + struct ftrace_raw_##call field; \ + struct ftrace_event_call *event_call = &event_##call; \ + int ret; \ + \ + __common_field(unsigned char, type); \ + __common_field(unsigned char, flags); \ + __common_field(unsigned char, preempt_count); \ + __common_field(int, pid); \ + __common_field(int, tgid); \ + \ + tstruct; \ + \ + return ret; \ +} + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +/* + * Stage 3 of the trace events. + * + * Override the macros in to include the following: + * + * static void ftrace_event_(proto) + * { + * event_trace_printk(_RET_IP_, ": " ); + * } + * + * static int ftrace_reg_event_(void) + * { + * int ret; + * + * ret = register_trace_(ftrace_event_); + * if (!ret) + * pr_info("event trace: Could not activate trace point " + * "probe to "); + * return ret; + * } + * + * static void ftrace_unreg_event_(void) + * { + * unregister_trace_(ftrace_event_); + * } + * + * For those macros defined with TRACE_FORMAT: + * + * static struct ftrace_event_call __used + * __attribute__((__aligned__(4))) + * __attribute__((section("_ftrace_events"))) event_ = { + * .name = "", + * .regfunc = ftrace_reg_event_, + * .unregfunc = ftrace_unreg_event_, + * } + * + * + * For those macros defined with TRACE_EVENT: + * + * static struct ftrace_event_call event_; + * + * static void ftrace_raw_event_(proto) + * { + * struct ring_buffer_event *event; + * struct ftrace_raw_ *entry; <-- defined in stage 1 + * unsigned long irq_flags; + * int pc; + * + * local_save_flags(irq_flags); + * pc = preempt_count(); + * + * event = trace_current_buffer_lock_reserve(event_.id, + * sizeof(struct ftrace_raw_), + * irq_flags, pc); + * if (!event) + * return; + * entry = ring_buffer_event_data(event); + * + * ; <-- Here we assign the entries by the __field and + * __array macros. + * + * trace_current_buffer_unlock_commit(event, irq_flags, pc); + * } + * + * static int ftrace_raw_reg_event_(void) + * { + * int ret; + * + * ret = register_trace_(ftrace_raw_event_); + * if (!ret) + * pr_info("event trace: Could not activate trace point " + * "probe to "); + * return ret; + * } + * + * static void ftrace_unreg_event_(void) + * { + * unregister_trace_(ftrace_raw_event_); + * } + * + * static struct trace_event ftrace_event_type_ = { + * .trace = ftrace_raw_output_, <-- stage 2 + * }; + * + * static int ftrace_raw_init_event_(void) + * { + * int id; + * + * id = register_ftrace_event(&ftrace_event_type_); + * if (!id) + * return -ENODEV; + * event_.id = id; + * return 0; + * } + * + * static struct ftrace_event_call __used + * __attribute__((__aligned__(4))) + * __attribute__((section("_ftrace_events"))) event_ = { + * .name = "", + * .system = "", + * .raw_init = ftrace_raw_init_event_, + * .regfunc = ftrace_reg_event_, + * .unregfunc = ftrace_unreg_event_, + * .show_format = ftrace_format_, + * } + * + */ + +#undef TP_FMT +#define TP_FMT(fmt, args...) fmt "\n", ##args + +#ifdef CONFIG_EVENT_PROFILE +#define _TRACE_PROFILE(call, proto, args) \ +static void ftrace_profile_##call(proto) \ +{ \ + extern void perf_tpcounter_event(int); \ + perf_tpcounter_event(event_##call.id); \ +} \ + \ +static int ftrace_profile_enable_##call(struct ftrace_event_call *call) \ +{ \ + int ret = 0; \ + \ + if (!atomic_inc_return(&call->profile_count)) \ + ret = register_trace_##call(ftrace_profile_##call); \ + \ + return ret; \ +} \ + \ +static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \ +{ \ + if (atomic_add_negative(-1, &call->profile_count)) \ + unregister_trace_##call(ftrace_profile_##call); \ +} + +#define _TRACE_PROFILE_INIT(call) \ + .profile_count = ATOMIC_INIT(-1), \ + .profile_enable = ftrace_profile_enable_##call, \ + .profile_disable = ftrace_profile_disable_##call, + +#else +#define _TRACE_PROFILE(call, proto, args) +#define _TRACE_PROFILE_INIT(call) +#endif + +#define _TRACE_FORMAT(call, proto, args, fmt) \ +static void ftrace_event_##call(proto) \ +{ \ + event_trace_printk(_RET_IP_, #call ": " fmt); \ +} \ + \ +static int ftrace_reg_event_##call(void) \ +{ \ + int ret; \ + \ + ret = register_trace_##call(ftrace_event_##call); \ + if (ret) \ + pr_info("event trace: Could not activate trace point " \ + "probe to " #call "\n"); \ + return ret; \ +} \ + \ +static void ftrace_unreg_event_##call(void) \ +{ \ + unregister_trace_##call(ftrace_event_##call); \ +} \ + \ +static struct ftrace_event_call event_##call; \ + \ +static int ftrace_init_event_##call(void) \ +{ \ + int id; \ + \ + id = register_ftrace_event(NULL); \ + if (!id) \ + return -ENODEV; \ + event_##call.id = id; \ + return 0; \ +} + +#undef TRACE_FORMAT +#define TRACE_FORMAT(call, proto, args, fmt) \ +_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \ +_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \ +static struct ftrace_event_call __used \ +__attribute__((__aligned__(4))) \ +__attribute__((section("_ftrace_events"))) event_##call = { \ + .name = #call, \ + .system = __stringify(TRACE_SYSTEM), \ + .raw_init = ftrace_init_event_##call, \ + .regfunc = ftrace_reg_event_##call, \ + .unregfunc = ftrace_unreg_event_##call, \ + _TRACE_PROFILE_INIT(call) \ +} + +#undef __entry +#define __entry entry + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \ + \ +static struct ftrace_event_call event_##call; \ + \ +static void ftrace_raw_event_##call(proto) \ +{ \ + struct ftrace_event_call *call = &event_##call; \ + struct ring_buffer_event *event; \ + struct ftrace_raw_##call *entry; \ + unsigned long irq_flags; \ + int pc; \ + \ + local_save_flags(irq_flags); \ + pc = preempt_count(); \ + \ + event = trace_current_buffer_lock_reserve(event_##call.id, \ + sizeof(struct ftrace_raw_##call), \ + irq_flags, pc); \ + if (!event) \ + return; \ + entry = ring_buffer_event_data(event); \ + \ + assign; \ + \ + if (!filter_current_check_discard(call, entry, event)) \ + trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \ +} \ + \ +static int ftrace_raw_reg_event_##call(void) \ +{ \ + int ret; \ + \ + ret = register_trace_##call(ftrace_raw_event_##call); \ + if (ret) \ + pr_info("event trace: Could not activate trace point " \ + "probe to " #call "\n"); \ + return ret; \ +} \ + \ +static void ftrace_raw_unreg_event_##call(void) \ +{ \ + unregister_trace_##call(ftrace_raw_event_##call); \ +} \ + \ +static struct trace_event ftrace_event_type_##call = { \ + .trace = ftrace_raw_output_##call, \ +}; \ + \ +static int ftrace_raw_init_event_##call(void) \ +{ \ + int id; \ + \ + id = register_ftrace_event(&ftrace_event_type_##call); \ + if (!id) \ + return -ENODEV; \ + event_##call.id = id; \ + INIT_LIST_HEAD(&event_##call.fields); \ + init_preds(&event_##call); \ + return 0; \ +} \ + \ +static struct ftrace_event_call __used \ +__attribute__((__aligned__(4))) \ +__attribute__((section("_ftrace_events"))) event_##call = { \ + .name = #call, \ + .system = __stringify(TRACE_SYSTEM), \ + .raw_init = ftrace_raw_init_event_##call, \ + .regfunc = ftrace_raw_reg_event_##call, \ + .unregfunc = ftrace_raw_unreg_event_##call, \ + .show_format = ftrace_format_##call, \ + .define_fields = ftrace_define_fields_##call, \ + _TRACE_PROFILE_INIT(call) \ +} + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +#undef _TRACE_PROFILE +#undef _TRACE_PROFILE_INIT + diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h deleted file mode 100644 index 13d6b85668c..00000000000 --- a/include/trace/trace_events.h +++ /dev/null @@ -1,7 +0,0 @@ -/* trace/.h here */ - -#include -#include -#include -#include -#include diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 3ad367e7c97..fb9d7f96489 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -41,7 +41,6 @@ obj-$(CONFIG_KMEMTRACE) += kmemtrace.o obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o obj-$(CONFIG_EVENT_TRACING) += trace_events.o -obj-$(CONFIG_EVENT_TRACER) += events.o obj-$(CONFIG_EVENT_TRACING) += trace_export.o obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o diff --git a/kernel/trace/events.c b/kernel/trace/events.c deleted file mode 100644 index 5a35a914f0e..00000000000 --- a/kernel/trace/events.c +++ /dev/null @@ -1,15 +0,0 @@ -/* - * This is the place to register all trace points as events. - */ - -#include - -#include - -#include "trace_output.h" - -#define TRACE_HEADER_MULTI_READ -#include "trace_events_stage_1.h" -#include "trace_events_stage_2.h" -#include "trace_events_stage_3.h" - diff --git a/kernel/trace/trace_events_stage_1.h b/kernel/trace/trace_events_stage_1.h deleted file mode 100644 index 475f46a047a..00000000000 --- a/kernel/trace/trace_events_stage_1.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Stage 1 of the trace events. - * - * Override the macros in to include the following: - * - * struct ftrace_raw_ { - * struct trace_entry ent; - * ; - * []; - * [...] - * }; - * - * The is created by the __field(type, item) macro or - * the __array(type2, item2, len) macro. - * We simply do "type item;", and that will create the fields - * in the structure. - */ - -#undef TRACE_FORMAT -#define TRACE_FORMAT(call, proto, args, fmt) - -#undef __array -#define __array(type, item, len) type item[len]; - -#undef __field -#define __field(type, item) type item; - -#undef TP_STRUCT__entry -#define TP_STRUCT__entry(args...) args - -#undef TRACE_EVENT -#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ - struct ftrace_raw_##name { \ - struct trace_entry ent; \ - tstruct \ - }; \ - static struct ftrace_event_call event_##name - -#include diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h deleted file mode 100644 index aa4a67a0656..00000000000 --- a/kernel/trace/trace_events_stage_2.h +++ /dev/null @@ -1,170 +0,0 @@ -/* - * Stage 2 of the trace events. - * - * Override the macros in to include the following: - * - * enum print_line_t - * ftrace_raw_output_(struct trace_iterator *iter, int flags) - * { - * struct trace_seq *s = &iter->seq; - * struct ftrace_raw_ *field; <-- defined in stage 1 - * struct trace_entry *entry; - * int ret; - * - * entry = iter->ent; - * - * if (entry->type != event_.id) { - * WARN_ON_ONCE(1); - * return TRACE_TYPE_UNHANDLED; - * } - * - * field = (typeof(field))entry; - * - * ret = trace_seq_printf(s, "\n"); - * if (!ret) - * return TRACE_TYPE_PARTIAL_LINE; - * - * return TRACE_TYPE_HANDLED; - * } - * - * This is the method used to print the raw event to the trace - * output format. Note, this is not needed if the data is read - * in binary. - */ - -#undef __entry -#define __entry field - -#undef TP_printk -#define TP_printk(fmt, args...) fmt "\n", args - -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ -enum print_line_t \ -ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ -{ \ - struct trace_seq *s = &iter->seq; \ - struct ftrace_raw_##call *field; \ - struct trace_entry *entry; \ - int ret; \ - \ - entry = iter->ent; \ - \ - if (entry->type != event_##call.id) { \ - WARN_ON_ONCE(1); \ - return TRACE_TYPE_UNHANDLED; \ - } \ - \ - field = (typeof(field))entry; \ - \ - ret = trace_seq_printf(s, #call ": " print); \ - if (!ret) \ - return TRACE_TYPE_PARTIAL_LINE; \ - \ - return TRACE_TYPE_HANDLED; \ -} - -#include - -/* - * Setup the showing format of trace point. - * - * int - * ftrace_format_##call(struct trace_seq *s) - * { - * struct ftrace_raw_##call field; - * int ret; - * - * ret = trace_seq_printf(s, #type " " #item ";" - * " offset:%u; size:%u;\n", - * offsetof(struct ftrace_raw_##call, item), - * sizeof(field.type)); - * - * } - */ - -#undef TP_STRUCT__entry -#define TP_STRUCT__entry(args...) args - -#undef __field -#define __field(type, item) \ - ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%u;\tsize:%u;\n", \ - (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item)); \ - if (!ret) \ - return 0; - -#undef __array -#define __array(type, item, len) \ - ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ - "offset:%u;\tsize:%u;\n", \ - (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item)); \ - if (!ret) \ - return 0; - -#undef __entry -#define __entry REC - -#undef TP_printk -#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args) - -#undef TP_fast_assign -#define TP_fast_assign(args...) args - -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ -static int \ -ftrace_format_##call(struct trace_seq *s) \ -{ \ - struct ftrace_raw_##call field; \ - int ret; \ - \ - tstruct; \ - \ - trace_seq_printf(s, "\nprint fmt: " print); \ - \ - return ret; \ -} - -#include - -#undef __field -#define __field(type, item) \ - ret = trace_define_field(event_call, #type, #item, \ - offsetof(typeof(field), item), \ - sizeof(field.item)); \ - if (ret) \ - return ret; - -#undef __array -#define __array(type, item, len) \ - BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ - ret = trace_define_field(event_call, #type "[" #len "]", #item, \ - offsetof(typeof(field), item), \ - sizeof(field.item)); \ - if (ret) \ - return ret; - -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ -int \ -ftrace_define_fields_##call(void) \ -{ \ - struct ftrace_raw_##call field; \ - struct ftrace_event_call *event_call = &event_##call; \ - int ret; \ - \ - __common_field(unsigned char, type); \ - __common_field(unsigned char, flags); \ - __common_field(unsigned char, preempt_count); \ - __common_field(int, pid); \ - __common_field(int, tgid); \ - \ - tstruct; \ - \ - return ret; \ -} - -#include diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h deleted file mode 100644 index 45c04e1f38d..00000000000 --- a/kernel/trace/trace_events_stage_3.h +++ /dev/null @@ -1,279 +0,0 @@ -/* - * Stage 3 of the trace events. - * - * Override the macros in to include the following: - * - * static void ftrace_event_(proto) - * { - * event_trace_printk(_RET_IP_, ": " ); - * } - * - * static int ftrace_reg_event_(void) - * { - * int ret; - * - * ret = register_trace_(ftrace_event_); - * if (!ret) - * pr_info("event trace: Could not activate trace point " - * "probe to "); - * return ret; - * } - * - * static void ftrace_unreg_event_(void) - * { - * unregister_trace_(ftrace_event_); - * } - * - * For those macros defined with TRACE_FORMAT: - * - * static struct ftrace_event_call __used - * __attribute__((__aligned__(4))) - * __attribute__((section("_ftrace_events"))) event_ = { - * .name = "", - * .regfunc = ftrace_reg_event_, - * .unregfunc = ftrace_unreg_event_, - * } - * - * - * For those macros defined with TRACE_EVENT: - * - * static struct ftrace_event_call event_; - * - * static void ftrace_raw_event_(proto) - * { - * struct ring_buffer_event *event; - * struct ftrace_raw_ *entry; <-- defined in stage 1 - * unsigned long irq_flags; - * int pc; - * - * local_save_flags(irq_flags); - * pc = preempt_count(); - * - * event = trace_current_buffer_lock_reserve(event_.id, - * sizeof(struct ftrace_raw_), - * irq_flags, pc); - * if (!event) - * return; - * entry = ring_buffer_event_data(event); - * - * ; <-- Here we assign the entries by the __field and - * __array macros. - * - * trace_current_buffer_unlock_commit(event, irq_flags, pc); - * } - * - * static int ftrace_raw_reg_event_(void) - * { - * int ret; - * - * ret = register_trace_(ftrace_raw_event_); - * if (!ret) - * pr_info("event trace: Could not activate trace point " - * "probe to "); - * return ret; - * } - * - * static void ftrace_unreg_event_(void) - * { - * unregister_trace_(ftrace_raw_event_); - * } - * - * static struct trace_event ftrace_event_type_ = { - * .trace = ftrace_raw_output_, <-- stage 2 - * }; - * - * static int ftrace_raw_init_event_(void) - * { - * int id; - * - * id = register_ftrace_event(&ftrace_event_type_); - * if (!id) - * return -ENODEV; - * event_.id = id; - * return 0; - * } - * - * static struct ftrace_event_call __used - * __attribute__((__aligned__(4))) - * __attribute__((section("_ftrace_events"))) event_ = { - * .name = "", - * .system = "", - * .raw_init = ftrace_raw_init_event_, - * .regfunc = ftrace_reg_event_, - * .unregfunc = ftrace_unreg_event_, - * .show_format = ftrace_format_, - * } - * - */ - -#undef TP_FMT -#define TP_FMT(fmt, args...) fmt "\n", ##args - -#ifdef CONFIG_EVENT_PROFILE -#define _TRACE_PROFILE(call, proto, args) \ -static void ftrace_profile_##call(proto) \ -{ \ - extern void perf_tpcounter_event(int); \ - perf_tpcounter_event(event_##call.id); \ -} \ - \ -static int ftrace_profile_enable_##call(struct ftrace_event_call *call) \ -{ \ - int ret = 0; \ - \ - if (!atomic_inc_return(&call->profile_count)) \ - ret = register_trace_##call(ftrace_profile_##call); \ - \ - return ret; \ -} \ - \ -static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \ -{ \ - if (atomic_add_negative(-1, &call->profile_count)) \ - unregister_trace_##call(ftrace_profile_##call); \ -} - -#define _TRACE_PROFILE_INIT(call) \ - .profile_count = ATOMIC_INIT(-1), \ - .profile_enable = ftrace_profile_enable_##call, \ - .profile_disable = ftrace_profile_disable_##call, - -#else -#define _TRACE_PROFILE(call, proto, args) -#define _TRACE_PROFILE_INIT(call) -#endif - -#define _TRACE_FORMAT(call, proto, args, fmt) \ -static void ftrace_event_##call(proto) \ -{ \ - event_trace_printk(_RET_IP_, #call ": " fmt); \ -} \ - \ -static int ftrace_reg_event_##call(void) \ -{ \ - int ret; \ - \ - ret = register_trace_##call(ftrace_event_##call); \ - if (ret) \ - pr_info("event trace: Could not activate trace point " \ - "probe to " #call "\n"); \ - return ret; \ -} \ - \ -static void ftrace_unreg_event_##call(void) \ -{ \ - unregister_trace_##call(ftrace_event_##call); \ -} \ - \ -static struct ftrace_event_call event_##call; \ - \ -static int ftrace_init_event_##call(void) \ -{ \ - int id; \ - \ - id = register_ftrace_event(NULL); \ - if (!id) \ - return -ENODEV; \ - event_##call.id = id; \ - return 0; \ -} - -#undef TRACE_FORMAT -#define TRACE_FORMAT(call, proto, args, fmt) \ -_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \ -_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \ -static struct ftrace_event_call __used \ -__attribute__((__aligned__(4))) \ -__attribute__((section("_ftrace_events"))) event_##call = { \ - .name = #call, \ - .system = __stringify(TRACE_SYSTEM), \ - .raw_init = ftrace_init_event_##call, \ - .regfunc = ftrace_reg_event_##call, \ - .unregfunc = ftrace_unreg_event_##call, \ - _TRACE_PROFILE_INIT(call) \ -} - -#undef __entry -#define __entry entry - -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ -_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \ - \ -static struct ftrace_event_call event_##call; \ - \ -static void ftrace_raw_event_##call(proto) \ -{ \ - struct ftrace_event_call *call = &event_##call; \ - struct ring_buffer_event *event; \ - struct ftrace_raw_##call *entry; \ - unsigned long irq_flags; \ - int pc; \ - \ - local_save_flags(irq_flags); \ - pc = preempt_count(); \ - \ - event = trace_current_buffer_lock_reserve(event_##call.id, \ - sizeof(struct ftrace_raw_##call), \ - irq_flags, pc); \ - if (!event) \ - return; \ - entry = ring_buffer_event_data(event); \ - \ - assign; \ - \ - if (!filter_current_check_discard(call, entry, event)) \ - trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \ -} \ - \ -static int ftrace_raw_reg_event_##call(void) \ -{ \ - int ret; \ - \ - ret = register_trace_##call(ftrace_raw_event_##call); \ - if (ret) \ - pr_info("event trace: Could not activate trace point " \ - "probe to " #call "\n"); \ - return ret; \ -} \ - \ -static void ftrace_raw_unreg_event_##call(void) \ -{ \ - unregister_trace_##call(ftrace_raw_event_##call); \ -} \ - \ -static struct trace_event ftrace_event_type_##call = { \ - .trace = ftrace_raw_output_##call, \ -}; \ - \ -static int ftrace_raw_init_event_##call(void) \ -{ \ - int id; \ - \ - id = register_ftrace_event(&ftrace_event_type_##call); \ - if (!id) \ - return -ENODEV; \ - event_##call.id = id; \ - INIT_LIST_HEAD(&event_##call.fields); \ - init_preds(&event_##call); \ - return 0; \ -} \ - \ -static struct ftrace_event_call __used \ -__attribute__((__aligned__(4))) \ -__attribute__((section("_ftrace_events"))) event_##call = { \ - .name = #call, \ - .system = __stringify(TRACE_SYSTEM), \ - .raw_init = ftrace_raw_init_event_##call, \ - .regfunc = ftrace_raw_reg_event_##call, \ - .unregfunc = ftrace_raw_unreg_event_##call, \ - .show_format = ftrace_format_##call, \ - .define_fields = ftrace_define_fields_##call, \ - _TRACE_PROFILE_INIT(call) \ -} - -#include - -#undef _TRACE_PROFILE -#undef _TRACE_PROFILE_INIT - -- cgit v1.2.3-70-g09d2 From a59fd6027218bd7c994e39d14afe0242f895144f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 10 Apr 2009 13:52:20 -0400 Subject: tracing/events: convert event call sites to use a link list Impact: makes it possible to define events in modules The events are created by reading down the section that they are linked in by the macros. But this is not scalable to modules. This patch converts the manipulations to use a global link list, and on boot up it adds the items in the section to the list. This change will allow modules to add their tracing events to the list as well. Note, this change alone does not permit modules to use the TRACE_EVENT macros, but the change is needed for them to eventually do so. Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 1 + kernel/trace/trace.h | 13 +--------- kernel/trace/trace_event_profile.c | 4 +-- kernel/trace/trace_events.c | 51 +++++++++++++++++++++++--------------- kernel/trace/trace_events_filter.c | 8 +++--- 5 files changed, 39 insertions(+), 38 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 496b76d9f9d..17810853b4f 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -83,6 +83,7 @@ void trace_current_buffer_discard_commit(struct ring_buffer_event *event); void tracing_record_cmdline(struct task_struct *tsk); struct ftrace_event_call { + struct list_head list; char *name; char *system; struct dentry *dir; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 6bcdf4af9b2..8817c18ef97 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -739,11 +739,6 @@ struct event_subsystem { struct filter_pred **preds; }; -#define events_for_each(event) \ - for (event = __start_ftrace_events; \ - (unsigned long)event < (unsigned long)__stop_ftrace_events; \ - event++) - struct filter_pred; typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event); @@ -785,13 +780,7 @@ filter_check_discard(struct ftrace_event_call *call, void *rec, return 0; } -extern struct ftrace_event_call __start_ftrace_events[]; -extern struct ftrace_event_call __stop_ftrace_events[]; - -#define for_each_event(event) \ - for (event = __start_ftrace_events; \ - (unsigned long)event < (unsigned long)__stop_ftrace_events; \ - event++) +extern struct list_head ftrace_events; extern const char *__start___trace_bprintk_fmt[]; extern const char *__stop___trace_bprintk_fmt[]; diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index 199de9c7422..7bf2ad65eee 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c @@ -11,7 +11,7 @@ int ftrace_profile_enable(int event_id) { struct ftrace_event_call *event; - for_each_event(event) { + list_for_each_entry(event, &ftrace_events, list) { if (event->id == event_id) return event->profile_enable(event); } @@ -23,7 +23,7 @@ void ftrace_profile_disable(int event_id) { struct ftrace_event_call *event; - for_each_event(event) { + list_for_each_entry(event, &ftrace_events, list) { if (event->id == event_id) return event->profile_disable(event); } diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index ead68ac9919..5c66aaff07c 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -19,6 +19,8 @@ static DEFINE_MUTEX(event_mutex); +LIST_HEAD(ftrace_events); + int trace_define_field(struct ftrace_event_call *call, char *type, char *name, int offset, int size) { @@ -54,16 +56,14 @@ err: static void ftrace_clear_events(void) { - struct ftrace_event_call *call = (void *)__start_ftrace_events; - + struct ftrace_event_call *call; - while ((unsigned long)call < (unsigned long)__stop_ftrace_events) { + list_for_each_entry(call, &ftrace_events, list) { if (call->enabled) { call->enabled = 0; call->unregfunc(); } - call++; } } @@ -89,7 +89,7 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call, static int ftrace_set_clr_event(char *buf, int set) { - struct ftrace_event_call *call = __start_ftrace_events; + struct ftrace_event_call *call; char *event = NULL, *sub = NULL, *match; int ret = -EINVAL; @@ -118,7 +118,7 @@ static int ftrace_set_clr_event(char *buf, int set) } mutex_lock(&event_mutex); - for_each_event(call) { + list_for_each_entry(call, &ftrace_events, list) { if (!call->name || !call->regfunc) continue; @@ -224,15 +224,17 @@ ftrace_event_write(struct file *file, const char __user *ubuf, static void * t_next(struct seq_file *m, void *v, loff_t *pos) { - struct ftrace_event_call *call = m->private; - struct ftrace_event_call *next = call; + struct list_head *list = m->private; + struct ftrace_event_call *call; (*pos)++; for (;;) { - if ((unsigned long)call >= (unsigned long)__stop_ftrace_events) + if (list == &ftrace_events) return NULL; + call = list_entry(list, struct ftrace_event_call, list); + /* * The ftrace subsystem is for showing formats only. * They can not be enabled or disabled via the event files. @@ -240,11 +242,10 @@ t_next(struct seq_file *m, void *v, loff_t *pos) if (call->regfunc) break; - call++; - next = call; + list = list->next; } - m->private = ++next; + m->private = list->next; return call; } @@ -257,22 +258,23 @@ static void *t_start(struct seq_file *m, loff_t *pos) static void * s_next(struct seq_file *m, void *v, loff_t *pos) { - struct ftrace_event_call *call = m->private; - struct ftrace_event_call *next; + struct list_head *list = m->private; + struct ftrace_event_call *call; (*pos)++; retry: - if ((unsigned long)call >= (unsigned long)__stop_ftrace_events) + if (list == &ftrace_events) return NULL; + call = list_entry(list, struct ftrace_event_call, list); + if (!call->enabled) { - call++; + list = list->next; goto retry; } - next = call; - m->private = ++next; + m->private = list->next; return call; } @@ -312,7 +314,7 @@ ftrace_event_seq_open(struct inode *inode, struct file *file) if (!ret) { struct seq_file *m = file->private_data; - m->private = __start_ftrace_events; + m->private = ftrace_events.next; } return ret; } @@ -797,9 +799,17 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) return 0; } +extern struct ftrace_event_call __start_ftrace_events[]; +extern struct ftrace_event_call __stop_ftrace_events[]; + +#define for_each_event(event) \ + for (event = __start_ftrace_events; \ + (unsigned long)event < (unsigned long)__stop_ftrace_events; \ + event++) + static __init int event_trace_init(void) { - struct ftrace_event_call *call = __start_ftrace_events; + struct ftrace_event_call *call; struct dentry *d_tracer; struct dentry *entry; struct dentry *d_events; @@ -830,6 +840,7 @@ static __init int event_trace_init(void) /* The linker may leave blanks */ if (!call->name) continue; + list_add(&call->list, &ftrace_events); event_create_dir(call, d_events); } diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index de42dad42a8..d30b06b02b4 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -223,7 +223,7 @@ oom: void filter_free_subsystem_preds(struct event_subsystem *system) { - struct ftrace_event_call *call = __start_ftrace_events; + struct ftrace_event_call *call; int i; if (system->n_preds) { @@ -234,7 +234,7 @@ void filter_free_subsystem_preds(struct event_subsystem *system) system->n_preds = 0; } - events_for_each(call) { + list_for_each_entry(call, &ftrace_events, list) { if (!call->define_fields) continue; @@ -320,7 +320,7 @@ int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred) int filter_add_subsystem_pred(struct event_subsystem *system, struct filter_pred *pred) { - struct ftrace_event_call *call = __start_ftrace_events; + struct ftrace_event_call *call; if (system->n_preds && !pred->compound) filter_free_subsystem_preds(system); @@ -337,7 +337,7 @@ int filter_add_subsystem_pred(struct event_subsystem *system, system->preds[system->n_preds] = pred; - events_for_each(call) { + list_for_each_entry(call, &ftrace_events, list) { int err; if (!call->define_fields) -- cgit v1.2.3-70-g09d2 From 6d723736e472f7a0cd5b62c84152fceead241328 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 10 Apr 2009 14:53:50 -0400 Subject: tracing/events: add support for modules to TRACE_EVENT Impact: allow modules to add TRACE_EVENTS on load This patch adds the final hooks to allow modules to use the TRACE_EVENT macro. A notifier and a data structure are used to link the TRACE_EVENTs defined in the module to connect them with the ftrace event tracing system. It also adds the necessary automated clean ups to the trace events when a module is removed. Cc: Rusty Russell Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 3 + include/linux/module.h | 4 ++ include/linux/trace_seq.h | 2 + include/trace/ftrace.h | 1 + kernel/module.c | 7 +++ kernel/trace/trace_events.c | 128 ++++++++++++++++++++++++++++++++----------- 6 files changed, 113 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 17810853b4f..75f3ac01a87 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -7,6 +7,7 @@ struct trace_array; struct tracer; +struct dentry; /* * The trace entry - the most basic unit of tracing. This is what @@ -87,6 +88,7 @@ struct ftrace_event_call { char *name; char *system; struct dentry *dir; + struct trace_event *event; int enabled; int (*regfunc)(void); void (*unregfunc)(void); @@ -97,6 +99,7 @@ struct ftrace_event_call { struct list_head fields; int n_preds; struct filter_pred **preds; + void *mod; #ifdef CONFIG_EVENT_PROFILE atomic_t profile_count; diff --git a/include/linux/module.h b/include/linux/module.h index 627ac082e2a..6155fa44168 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -337,6 +337,10 @@ struct module const char **trace_bprintk_fmt_start; unsigned int num_trace_bprintk_fmt; #endif +#ifdef CONFIG_EVENT_TRACING + struct ftrace_event_call *trace_events; + unsigned int num_trace_events; +#endif #ifdef CONFIG_MODULE_UNLOAD /* What modules depend on me? */ diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index 28051da876d..15ca2c71af1 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -1,6 +1,8 @@ #ifndef _LINUX_TRACE_SEQ_H #define _LINUX_TRACE_SEQ_H +#include + /* * Trace sequences are used to allow a function to call several other functions * to create a string of data to use (up to a max of PAGE_SIZE. diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 955b967acd7..60c5323bee6 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -477,6 +477,7 @@ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) event_##call = { \ .name = #call, \ .system = __stringify(TRACE_SYSTEM), \ + .event = &ftrace_event_type_##call, \ .raw_init = ftrace_raw_init_event_##call, \ .regfunc = ftrace_raw_reg_event_##call, \ .unregfunc = ftrace_raw_unreg_event_##call, \ diff --git a/kernel/module.c b/kernel/module.c index e797812a4d9..a0394706f10 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -18,6 +18,7 @@ */ #include #include +#include #include #include #include @@ -2172,6 +2173,12 @@ static noinline struct module *load_module(void __user *umod, sizeof(*mod->tracepoints), &mod->num_tracepoints); #endif +#ifdef CONFIG_EVENT_TRACING + mod->trace_events = section_objs(hdr, sechdrs, secstrings, + "_ftrace_events", + sizeof(*mod->trace_events), + &mod->num_trace_events); +#endif #ifdef CONFIG_MODVERSIONS if ((mod->num_syms && !mod->crcs) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 8b9e621b80b..a4b177720a6 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -713,7 +713,13 @@ event_subsystem_dir(const char *name, struct dentry *d_events) return d_events; } - system->name = name; + system->name = kstrdup(name, GFP_KERNEL); + if (!system->name) { + debugfs_remove(system->entry); + kfree(system); + return d_events; + } + list_add(&system->list, &event_subsystems); system->preds = NULL; @@ -738,7 +744,7 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) * If the trace point header did not define TRACE_SYSTEM * then the system would be called "TRACE_SYSTEM". */ - if (strcmp(call->system, "TRACE_SYSTEM") != 0) + if (strcmp(call->system, TRACE_SYSTEM) != 0) d_events = event_subsystem_dir(call->system, d_events); if (call->raw_init) { @@ -757,21 +763,13 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) return -1; } - if (call->regfunc) { - entry = debugfs_create_file("enable", 0644, call->dir, call, - &ftrace_enable_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'%s/enable' entry\n", call->name); - } + if (call->regfunc) + entry = trace_create_file("enable", 0644, call->dir, call, + &ftrace_enable_fops); - if (call->id) { - entry = debugfs_create_file("id", 0444, call->dir, call, - &ftrace_event_id_fops); - if (!entry) - pr_warning("Could not create debugfs '%s/id' entry\n", - call->name); - } + if (call->id) + entry = trace_create_file("id", 0444, call->dir, call, + &ftrace_event_id_fops); if (call->define_fields) { ret = call->define_fields(); @@ -780,40 +778,102 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) " events/%s\n", call->name); return ret; } - entry = debugfs_create_file("filter", 0644, call->dir, call, - &ftrace_event_filter_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'%s/filter' entry\n", call->name); + entry = trace_create_file("filter", 0644, call->dir, call, + &ftrace_event_filter_fops); } /* A trace may not want to export its format */ if (!call->show_format) return 0; - entry = debugfs_create_file("format", 0444, call->dir, call, - &ftrace_event_format_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'%s/format' entry\n", call->name); + entry = trace_create_file("format", 0444, call->dir, call, + &ftrace_event_format_fops); + + return 0; +} + +#define for_each_event(event, start, end) \ + for (event = start; \ + (unsigned long)event < (unsigned long)end; \ + event++) + +static void trace_module_add_events(struct module *mod) +{ + struct ftrace_event_call *call, *start, *end; + struct dentry *d_events; + + start = mod->trace_events; + end = mod->trace_events + mod->num_trace_events; + + if (start == end) + return; + + d_events = event_trace_events_dir(); + if (!d_events) + return; + + for_each_event(call, start, end) { + /* The linker may leave blanks */ + if (!call->name) + continue; + call->mod = mod; + list_add(&call->list, &ftrace_events); + event_create_dir(call, d_events); + } +} + +static void trace_module_remove_events(struct module *mod) +{ + struct ftrace_event_call *call, *p; + + list_for_each_entry_safe(call, p, &ftrace_events, list) { + if (call->mod == mod) { + if (call->enabled) { + call->enabled = 0; + call->unregfunc(); + } + if (call->event) + unregister_ftrace_event(call->event); + debugfs_remove_recursive(call->dir); + list_del(&call->list); + } + } +} + +int trace_module_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + struct module *mod = data; + + mutex_lock(&event_mutex); + switch (val) { + case MODULE_STATE_COMING: + trace_module_add_events(mod); + break; + case MODULE_STATE_GOING: + trace_module_remove_events(mod); + break; + } + mutex_unlock(&event_mutex); return 0; } +struct notifier_block trace_module_nb = { + .notifier_call = trace_module_notify, + .priority = 0, +}; + extern struct ftrace_event_call __start_ftrace_events[]; extern struct ftrace_event_call __stop_ftrace_events[]; -#define for_each_event(event) \ - for (event = __start_ftrace_events; \ - (unsigned long)event < (unsigned long)__stop_ftrace_events; \ - event++) - static __init int event_trace_init(void) { struct ftrace_event_call *call; struct dentry *d_tracer; struct dentry *entry; struct dentry *d_events; + int ret; d_tracer = tracing_init_dentry(); if (!d_tracer) @@ -837,7 +897,7 @@ static __init int event_trace_init(void) if (!d_events) return 0; - for_each_event(call) { + for_each_event(call, __start_ftrace_events, __stop_ftrace_events) { /* The linker may leave blanks */ if (!call->name) continue; @@ -845,6 +905,10 @@ static __init int event_trace_init(void) event_create_dir(call, d_events); } + ret = register_module_notifier(&trace_module_nb); + if (!ret) + pr_warning("Failed to register trace events module notifier\n"); + return 0; } fs_initcall(event_trace_init); -- cgit v1.2.3-70-g09d2 From ecda8ae02a08ef065ff387f5cb2a2d4999da2408 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 14 Apr 2009 18:49:38 -0400 Subject: tracing/events: fix lockdep system name Impact: fix compile error of lockdep event tracer Ingo Molnar pointed out that the system name for the lockdep tracer was "lock" which is used to include the event trace file name. It should be "lockdep" Reported-by: Ingo Molnar Signed-off-by: Steven Rostedt --- include/trace/lockdep.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/lockdep.h b/include/trace/lockdep.h index 4d301e758de..45e326b5c7f 100644 --- a/include/trace/lockdep.h +++ b/include/trace/lockdep.h @@ -5,7 +5,7 @@ #include #undef TRACE_SYSTEM -#define TRACE_SYSTEM lock +#define TRACE_SYSTEM lockdep #ifdef CONFIG_LOCKDEP -- cgit v1.2.3-70-g09d2 From ad8d75fff811a6a230f7f43b05a6483099349533 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 14 Apr 2009 19:39:12 -0400 Subject: tracing/events: move trace point headers into include/trace/events Impact: clean up Create a sub directory in include/trace called events to keep the trace point headers in their own separate directory. Only headers that declare trace points should be defined in this directory. Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Neil Horman Cc: Zhao Lei Cc: Eduard - Gabriel Munteanu Cc: Pekka Enberg Signed-off-by: Steven Rostedt --- include/linux/kmemtrace.h | 2 +- include/trace/define_trace.h | 2 +- include/trace/events/irq.h | 57 +++++++ include/trace/events/kmem.h | 194 ++++++++++++++++++++++ include/trace/events/lockdep.h | 60 +++++++ include/trace/events/sched.h | 339 ++++++++++++++++++++++++++++++++++++++ include/trace/events/skb.h | 40 +++++ include/trace/irq.h | 57 ------- include/trace/kmem.h | 194 ---------------------- include/trace/lockdep.h | 60 ------- include/trace/sched.h | 339 -------------------------------------- include/trace/skb.h | 40 ----- kernel/exit.c | 2 +- kernel/fork.c | 3 +- kernel/irq/handle.c | 2 +- kernel/kthread.c | 2 +- kernel/lockdep.c | 2 +- kernel/sched.c | 2 +- kernel/signal.c | 2 +- kernel/softirq.c | 2 +- kernel/trace/ftrace.c | 2 +- kernel/trace/trace_sched_switch.c | 2 +- kernel/trace/trace_sched_wakeup.c | 2 +- mm/util.c | 2 +- net/core/drop_monitor.c | 2 +- net/core/net-traces.c | 2 +- net/core/skbuff.c | 2 +- 27 files changed, 708 insertions(+), 707 deletions(-) create mode 100644 include/trace/events/irq.h create mode 100644 include/trace/events/kmem.h create mode 100644 include/trace/events/lockdep.h create mode 100644 include/trace/events/sched.h create mode 100644 include/trace/events/skb.h delete mode 100644 include/trace/irq.h delete mode 100644 include/trace/kmem.h delete mode 100644 include/trace/lockdep.h delete mode 100644 include/trace/sched.h delete mode 100644 include/trace/skb.h (limited to 'include') diff --git a/include/linux/kmemtrace.h b/include/linux/kmemtrace.h index 15c45a27a92..b616d3930c3 100644 --- a/include/linux/kmemtrace.h +++ b/include/linux/kmemtrace.h @@ -9,7 +9,7 @@ #ifdef __KERNEL__ -#include +#include #ifdef CONFIG_KMEMTRACE extern void kmemtrace_init(void); diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index 980eb66a6e3..18869417109 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -43,7 +43,7 @@ #endif #ifndef TRACE_INCLUDE_PATH -# define __TRACE_INCLUDE(system) +# define __TRACE_INCLUDE(system) # define UNDEF_TRACE_INCLUDE_FILE #else # define __TRACE_INCLUDE(system) __stringify(TRACE_INCLUDE_PATH/system.h) diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h new file mode 100644 index 00000000000..75e3468e449 --- /dev/null +++ b/include/trace/events/irq.h @@ -0,0 +1,57 @@ +#if !defined(_TRACE_IRQ_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_IRQ_H + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM irq + +/* + * Tracepoint for entry of interrupt handler: + */ +TRACE_FORMAT(irq_handler_entry, + TP_PROTO(int irq, struct irqaction *action), + TP_ARGS(irq, action), + TP_FMT("irq=%d handler=%s", irq, action->name) + ); + +/* + * Tracepoint for return of an interrupt handler: + */ +TRACE_EVENT(irq_handler_exit, + + TP_PROTO(int irq, struct irqaction *action, int ret), + + TP_ARGS(irq, action, ret), + + TP_STRUCT__entry( + __field( int, irq ) + __field( int, ret ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->ret = ret; + ), + + TP_printk("irq=%d return=%s", + __entry->irq, __entry->ret ? "handled" : "unhandled") +); + +TRACE_FORMAT(softirq_entry, + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + TP_ARGS(h, vec), + TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) + ); + +TRACE_FORMAT(softirq_exit, + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + TP_ARGS(h, vec), + TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) + ); + +#endif /* _TRACE_IRQ_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h new file mode 100644 index 00000000000..c22c42f980b --- /dev/null +++ b/include/trace/events/kmem.h @@ -0,0 +1,194 @@ +#if !defined(_TRACE_KMEM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KMEM_H + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kmem + +TRACE_EVENT(kmalloc, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags) +); + +TRACE_EVENT(kmem_cache_alloc, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags) +); + +TRACE_EVENT(kmalloc_node, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + __field( int, node ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + __entry->node = node; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags, + __entry->node) +); + +TRACE_EVENT(kmem_cache_alloc_node, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + __field( int, node ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + __entry->node = node; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags, + __entry->node) +); + +TRACE_EVENT(kfree, + + TP_PROTO(unsigned long call_site, const void *ptr), + + TP_ARGS(call_site, ptr), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + ), + + TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) +); + +TRACE_EVENT(kmem_cache_free, + + TP_PROTO(unsigned long call_site, const void *ptr), + + TP_ARGS(call_site, ptr), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + ), + + TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) +); +#endif /* _TRACE_KMEM_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/lockdep.h b/include/trace/events/lockdep.h new file mode 100644 index 00000000000..45e326b5c7f --- /dev/null +++ b/include/trace/events/lockdep.h @@ -0,0 +1,60 @@ +#if !defined(_TRACE_LOCKDEP_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_LOCKDEP_H + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM lockdep + +#ifdef CONFIG_LOCKDEP + +TRACE_FORMAT(lock_acquire, + TP_PROTO(struct lockdep_map *lock, unsigned int subclass, + int trylock, int read, int check, + struct lockdep_map *next_lock, unsigned long ip), + TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), + TP_FMT("%s%s%s", trylock ? "try " : "", + read ? "read " : "", lock->name) + ); + +TRACE_FORMAT(lock_release, + TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), + TP_ARGS(lock, nested, ip), + TP_FMT("%s", lock->name) + ); + +#ifdef CONFIG_LOCK_STAT + +TRACE_FORMAT(lock_contended, + TP_PROTO(struct lockdep_map *lock, unsigned long ip), + TP_ARGS(lock, ip), + TP_FMT("%s", lock->name) + ); + +TRACE_EVENT(lock_acquired, + TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), + + TP_ARGS(lock, ip, waittime), + + TP_STRUCT__entry( + __field(const char *, name) + __field(unsigned long, wait_usec) + __field(unsigned long, wait_nsec_rem) + ), + TP_fast_assign( + __entry->name = lock->name; + __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC); + __entry->wait_usec = (unsigned long) waittime; + ), + TP_printk("%s (%lu.%03lu us)", __entry->name, __entry->wait_usec, + __entry->wait_nsec_rem) +); + +#endif +#endif + +#endif /* _TRACE_LOCKDEP_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h new file mode 100644 index 00000000000..ffa1cab586b --- /dev/null +++ b/include/trace/events/sched.h @@ -0,0 +1,339 @@ +#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_SCHED_H + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM sched + +/* + * Tracepoint for calling kthread_stop, performed to end a kthread: + */ +TRACE_EVENT(sched_kthread_stop, + + TP_PROTO(struct task_struct *t), + + TP_ARGS(t), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + ), + + TP_fast_assign( + memcpy(__entry->comm, t->comm, TASK_COMM_LEN); + __entry->pid = t->pid; + ), + + TP_printk("task %s:%d", __entry->comm, __entry->pid) +); + +/* + * Tracepoint for the return value of the kthread stopping: + */ +TRACE_EVENT(sched_kthread_stop_ret, + + TP_PROTO(int ret), + + TP_ARGS(ret), + + TP_STRUCT__entry( + __field( int, ret ) + ), + + TP_fast_assign( + __entry->ret = ret; + ), + + TP_printk("ret %d", __entry->ret) +); + +/* + * Tracepoint for waiting on task to unschedule: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_wait_task, + + TP_PROTO(struct rq *rq, struct task_struct *p), + + TP_ARGS(rq, p), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) +); + +/* + * Tracepoint for waking up a task: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_wakeup, + + TP_PROTO(struct rq *rq, struct task_struct *p, int success), + + TP_ARGS(rq, p, success), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + __field( int, success ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + __entry->success = success; + ), + + TP_printk("task %s:%d [%d] success=%d", + __entry->comm, __entry->pid, __entry->prio, + __entry->success) +); + +/* + * Tracepoint for waking up a new task: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_wakeup_new, + + TP_PROTO(struct rq *rq, struct task_struct *p, int success), + + TP_ARGS(rq, p, success), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + __field( int, success ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + __entry->success = success; + ), + + TP_printk("task %s:%d [%d] success=%d", + __entry->comm, __entry->pid, __entry->prio, + __entry->success) +); + +/* + * Tracepoint for task switches, performed by the scheduler: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_switch, + + TP_PROTO(struct rq *rq, struct task_struct *prev, + struct task_struct *next), + + TP_ARGS(rq, prev, next), + + TP_STRUCT__entry( + __array( char, prev_comm, TASK_COMM_LEN ) + __field( pid_t, prev_pid ) + __field( int, prev_prio ) + __array( char, next_comm, TASK_COMM_LEN ) + __field( pid_t, next_pid ) + __field( int, next_prio ) + ), + + TP_fast_assign( + memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); + __entry->prev_pid = prev->pid; + __entry->prev_prio = prev->prio; + memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); + __entry->next_pid = next->pid; + __entry->next_prio = next->prio; + ), + + TP_printk("task %s:%d [%d] ==> %s:%d [%d]", + __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, + __entry->next_comm, __entry->next_pid, __entry->next_prio) +); + +/* + * Tracepoint for a task being migrated: + */ +TRACE_EVENT(sched_migrate_task, + + TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu), + + TP_ARGS(p, orig_cpu, dest_cpu), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + __field( int, orig_cpu ) + __field( int, dest_cpu ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + __entry->orig_cpu = orig_cpu; + __entry->dest_cpu = dest_cpu; + ), + + TP_printk("task %s:%d [%d] from: %d to: %d", + __entry->comm, __entry->pid, __entry->prio, + __entry->orig_cpu, __entry->dest_cpu) +); + +/* + * Tracepoint for freeing a task: + */ +TRACE_EVENT(sched_process_free, + + TP_PROTO(struct task_struct *p), + + TP_ARGS(p), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) +); + +/* + * Tracepoint for a task exiting: + */ +TRACE_EVENT(sched_process_exit, + + TP_PROTO(struct task_struct *p), + + TP_ARGS(p), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) +); + +/* + * Tracepoint for a waiting task: + */ +TRACE_EVENT(sched_process_wait, + + TP_PROTO(struct pid *pid), + + TP_ARGS(pid), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + __entry->pid = pid_nr(pid); + __entry->prio = current->prio; + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) +); + +/* + * Tracepoint for do_fork: + */ +TRACE_EVENT(sched_process_fork, + + TP_PROTO(struct task_struct *parent, struct task_struct *child), + + TP_ARGS(parent, child), + + TP_STRUCT__entry( + __array( char, parent_comm, TASK_COMM_LEN ) + __field( pid_t, parent_pid ) + __array( char, child_comm, TASK_COMM_LEN ) + __field( pid_t, child_pid ) + ), + + TP_fast_assign( + memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN); + __entry->parent_pid = parent->pid; + memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN); + __entry->child_pid = child->pid; + ), + + TP_printk("parent %s:%d child %s:%d", + __entry->parent_comm, __entry->parent_pid, + __entry->child_comm, __entry->child_pid) +); + +/* + * Tracepoint for sending a signal: + */ +TRACE_EVENT(sched_signal_send, + + TP_PROTO(int sig, struct task_struct *p), + + TP_ARGS(sig, p), + + TP_STRUCT__entry( + __field( int, sig ) + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->sig = sig; + ), + + TP_printk("sig: %d task %s:%d", + __entry->sig, __entry->comm, __entry->pid) +); + +#endif /* _TRACE_SCHED_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h new file mode 100644 index 00000000000..1e8fabb57c0 --- /dev/null +++ b/include/trace/events/skb.h @@ -0,0 +1,40 @@ +#if !defined(_TRACE_SKB_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_SKB_H + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM skb + +/* + * Tracepoint for free an sk_buff: + */ +TRACE_EVENT(kfree_skb, + + TP_PROTO(struct sk_buff *skb, void *location), + + TP_ARGS(skb, location), + + TP_STRUCT__entry( + __field( void *, skbaddr ) + __field( unsigned short, protocol ) + __field( void *, location ) + ), + + TP_fast_assign( + __entry->skbaddr = skb; + if (skb) { + __entry->protocol = ntohs(skb->protocol); + } + __entry->location = location; + ), + + TP_printk("skbaddr=%p protocol=%u location=%p", + __entry->skbaddr, __entry->protocol, __entry->location) +); + +#endif /* _TRACE_SKB_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/irq.h b/include/trace/irq.h deleted file mode 100644 index 75e3468e449..00000000000 --- a/include/trace/irq.h +++ /dev/null @@ -1,57 +0,0 @@ -#if !defined(_TRACE_IRQ_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_IRQ_H - -#include -#include - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM irq - -/* - * Tracepoint for entry of interrupt handler: - */ -TRACE_FORMAT(irq_handler_entry, - TP_PROTO(int irq, struct irqaction *action), - TP_ARGS(irq, action), - TP_FMT("irq=%d handler=%s", irq, action->name) - ); - -/* - * Tracepoint for return of an interrupt handler: - */ -TRACE_EVENT(irq_handler_exit, - - TP_PROTO(int irq, struct irqaction *action, int ret), - - TP_ARGS(irq, action, ret), - - TP_STRUCT__entry( - __field( int, irq ) - __field( int, ret ) - ), - - TP_fast_assign( - __entry->irq = irq; - __entry->ret = ret; - ), - - TP_printk("irq=%d return=%s", - __entry->irq, __entry->ret ? "handled" : "unhandled") -); - -TRACE_FORMAT(softirq_entry, - TP_PROTO(struct softirq_action *h, struct softirq_action *vec), - TP_ARGS(h, vec), - TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) - ); - -TRACE_FORMAT(softirq_exit, - TP_PROTO(struct softirq_action *h, struct softirq_action *vec), - TP_ARGS(h, vec), - TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) - ); - -#endif /* _TRACE_IRQ_H */ - -/* This part must be outside protection */ -#include diff --git a/include/trace/kmem.h b/include/trace/kmem.h deleted file mode 100644 index c22c42f980b..00000000000 --- a/include/trace/kmem.h +++ /dev/null @@ -1,194 +0,0 @@ -#if !defined(_TRACE_KMEM_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_KMEM_H - -#include -#include - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM kmem - -TRACE_EVENT(kmalloc, - - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), - - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - ), - - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - __entry->gfp_flags) -); - -TRACE_EVENT(kmem_cache_alloc, - - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), - - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - ), - - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - __entry->gfp_flags) -); - -TRACE_EVENT(kmalloc_node, - - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), - - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - __field( int, node ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - __entry->node = node; - ), - - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - __entry->gfp_flags, - __entry->node) -); - -TRACE_EVENT(kmem_cache_alloc_node, - - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), - - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - __field( int, node ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - __entry->node = node; - ), - - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - __entry->gfp_flags, - __entry->node) -); - -TRACE_EVENT(kfree, - - TP_PROTO(unsigned long call_site, const void *ptr), - - TP_ARGS(call_site, ptr), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - ), - - TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) -); - -TRACE_EVENT(kmem_cache_free, - - TP_PROTO(unsigned long call_site, const void *ptr), - - TP_ARGS(call_site, ptr), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - ), - - TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) -); -#endif /* _TRACE_KMEM_H */ - -/* This part must be outside protection */ -#include diff --git a/include/trace/lockdep.h b/include/trace/lockdep.h deleted file mode 100644 index 45e326b5c7f..00000000000 --- a/include/trace/lockdep.h +++ /dev/null @@ -1,60 +0,0 @@ -#if !defined(_TRACE_LOCKDEP_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_LOCKDEP_H - -#include -#include - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM lockdep - -#ifdef CONFIG_LOCKDEP - -TRACE_FORMAT(lock_acquire, - TP_PROTO(struct lockdep_map *lock, unsigned int subclass, - int trylock, int read, int check, - struct lockdep_map *next_lock, unsigned long ip), - TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), - TP_FMT("%s%s%s", trylock ? "try " : "", - read ? "read " : "", lock->name) - ); - -TRACE_FORMAT(lock_release, - TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), - TP_ARGS(lock, nested, ip), - TP_FMT("%s", lock->name) - ); - -#ifdef CONFIG_LOCK_STAT - -TRACE_FORMAT(lock_contended, - TP_PROTO(struct lockdep_map *lock, unsigned long ip), - TP_ARGS(lock, ip), - TP_FMT("%s", lock->name) - ); - -TRACE_EVENT(lock_acquired, - TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), - - TP_ARGS(lock, ip, waittime), - - TP_STRUCT__entry( - __field(const char *, name) - __field(unsigned long, wait_usec) - __field(unsigned long, wait_nsec_rem) - ), - TP_fast_assign( - __entry->name = lock->name; - __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC); - __entry->wait_usec = (unsigned long) waittime; - ), - TP_printk("%s (%lu.%03lu us)", __entry->name, __entry->wait_usec, - __entry->wait_nsec_rem) -); - -#endif -#endif - -#endif /* _TRACE_LOCKDEP_H */ - -/* This part must be outside protection */ -#include diff --git a/include/trace/sched.h b/include/trace/sched.h deleted file mode 100644 index ffa1cab586b..00000000000 --- a/include/trace/sched.h +++ /dev/null @@ -1,339 +0,0 @@ -#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_SCHED_H - -#include -#include - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM sched - -/* - * Tracepoint for calling kthread_stop, performed to end a kthread: - */ -TRACE_EVENT(sched_kthread_stop, - - TP_PROTO(struct task_struct *t), - - TP_ARGS(t), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - ), - - TP_fast_assign( - memcpy(__entry->comm, t->comm, TASK_COMM_LEN); - __entry->pid = t->pid; - ), - - TP_printk("task %s:%d", __entry->comm, __entry->pid) -); - -/* - * Tracepoint for the return value of the kthread stopping: - */ -TRACE_EVENT(sched_kthread_stop_ret, - - TP_PROTO(int ret), - - TP_ARGS(ret), - - TP_STRUCT__entry( - __field( int, ret ) - ), - - TP_fast_assign( - __entry->ret = ret; - ), - - TP_printk("ret %d", __entry->ret) -); - -/* - * Tracepoint for waiting on task to unschedule: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_wait_task, - - TP_PROTO(struct rq *rq, struct task_struct *p), - - TP_ARGS(rq, p), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio) -); - -/* - * Tracepoint for waking up a task: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_wakeup, - - TP_PROTO(struct rq *rq, struct task_struct *p, int success), - - TP_ARGS(rq, p, success), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - __field( int, success ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - __entry->success = success; - ), - - TP_printk("task %s:%d [%d] success=%d", - __entry->comm, __entry->pid, __entry->prio, - __entry->success) -); - -/* - * Tracepoint for waking up a new task: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_wakeup_new, - - TP_PROTO(struct rq *rq, struct task_struct *p, int success), - - TP_ARGS(rq, p, success), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - __field( int, success ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - __entry->success = success; - ), - - TP_printk("task %s:%d [%d] success=%d", - __entry->comm, __entry->pid, __entry->prio, - __entry->success) -); - -/* - * Tracepoint for task switches, performed by the scheduler: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_switch, - - TP_PROTO(struct rq *rq, struct task_struct *prev, - struct task_struct *next), - - TP_ARGS(rq, prev, next), - - TP_STRUCT__entry( - __array( char, prev_comm, TASK_COMM_LEN ) - __field( pid_t, prev_pid ) - __field( int, prev_prio ) - __array( char, next_comm, TASK_COMM_LEN ) - __field( pid_t, next_pid ) - __field( int, next_prio ) - ), - - TP_fast_assign( - memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); - __entry->prev_pid = prev->pid; - __entry->prev_prio = prev->prio; - memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); - __entry->next_pid = next->pid; - __entry->next_prio = next->prio; - ), - - TP_printk("task %s:%d [%d] ==> %s:%d [%d]", - __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, - __entry->next_comm, __entry->next_pid, __entry->next_prio) -); - -/* - * Tracepoint for a task being migrated: - */ -TRACE_EVENT(sched_migrate_task, - - TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu), - - TP_ARGS(p, orig_cpu, dest_cpu), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - __field( int, orig_cpu ) - __field( int, dest_cpu ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - __entry->orig_cpu = orig_cpu; - __entry->dest_cpu = dest_cpu; - ), - - TP_printk("task %s:%d [%d] from: %d to: %d", - __entry->comm, __entry->pid, __entry->prio, - __entry->orig_cpu, __entry->dest_cpu) -); - -/* - * Tracepoint for freeing a task: - */ -TRACE_EVENT(sched_process_free, - - TP_PROTO(struct task_struct *p), - - TP_ARGS(p), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio) -); - -/* - * Tracepoint for a task exiting: - */ -TRACE_EVENT(sched_process_exit, - - TP_PROTO(struct task_struct *p), - - TP_ARGS(p), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio) -); - -/* - * Tracepoint for a waiting task: - */ -TRACE_EVENT(sched_process_wait, - - TP_PROTO(struct pid *pid), - - TP_ARGS(pid), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - __entry->pid = pid_nr(pid); - __entry->prio = current->prio; - ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio) -); - -/* - * Tracepoint for do_fork: - */ -TRACE_EVENT(sched_process_fork, - - TP_PROTO(struct task_struct *parent, struct task_struct *child), - - TP_ARGS(parent, child), - - TP_STRUCT__entry( - __array( char, parent_comm, TASK_COMM_LEN ) - __field( pid_t, parent_pid ) - __array( char, child_comm, TASK_COMM_LEN ) - __field( pid_t, child_pid ) - ), - - TP_fast_assign( - memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN); - __entry->parent_pid = parent->pid; - memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN); - __entry->child_pid = child->pid; - ), - - TP_printk("parent %s:%d child %s:%d", - __entry->parent_comm, __entry->parent_pid, - __entry->child_comm, __entry->child_pid) -); - -/* - * Tracepoint for sending a signal: - */ -TRACE_EVENT(sched_signal_send, - - TP_PROTO(int sig, struct task_struct *p), - - TP_ARGS(sig, p), - - TP_STRUCT__entry( - __field( int, sig ) - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->sig = sig; - ), - - TP_printk("sig: %d task %s:%d", - __entry->sig, __entry->comm, __entry->pid) -); - -#endif /* _TRACE_SCHED_H */ - -/* This part must be outside protection */ -#include diff --git a/include/trace/skb.h b/include/trace/skb.h deleted file mode 100644 index 1e8fabb57c0..00000000000 --- a/include/trace/skb.h +++ /dev/null @@ -1,40 +0,0 @@ -#if !defined(_TRACE_SKB_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_SKB_H - -#include -#include - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM skb - -/* - * Tracepoint for free an sk_buff: - */ -TRACE_EVENT(kfree_skb, - - TP_PROTO(struct sk_buff *skb, void *location), - - TP_ARGS(skb, location), - - TP_STRUCT__entry( - __field( void *, skbaddr ) - __field( unsigned short, protocol ) - __field( void *, location ) - ), - - TP_fast_assign( - __entry->skbaddr = skb; - if (skb) { - __entry->protocol = ntohs(skb->protocol); - } - __entry->location = location; - ), - - TP_printk("skbaddr=%p protocol=%u location=%p", - __entry->skbaddr, __entry->protocol, __entry->location) -); - -#endif /* _TRACE_SKB_H */ - -/* This part must be outside protection */ -#include diff --git a/kernel/exit.c b/kernel/exit.c index 2fe9d2c7eee..cab535c427b 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -48,7 +48,7 @@ #include #include #include -#include +#include #include #include diff --git a/kernel/fork.c b/kernel/fork.c index 4bebf263923..085f73ebcea 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -61,7 +61,6 @@ #include #include #include -#include #include #include @@ -71,6 +70,8 @@ #include #include +#include + /* * Protected counters by write_lock_irq(&tasklist_lock) */ diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 983d8be8dff..37c63633e78 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -20,7 +20,7 @@ #include #define CREATE_TRACE_POINTS -#include +#include #include "internals.h" diff --git a/kernel/kthread.c b/kernel/kthread.c index e1c76924545..41c88fe4050 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #define KTHREAD_NICE_LEVEL (-5) diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 257f21a76c5..47b201ecc6d 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -48,7 +48,7 @@ #include "lockdep_internals.h" #define CREATE_TRACE_POINTS -#include +#include #ifdef CONFIG_PROVE_LOCKING int prove_locking = 1; diff --git a/kernel/sched.c b/kernel/sched.c index e6d4518d47e..9f7ffd00b6e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -79,7 +79,7 @@ #include "sched_cpupri.h" #define CREATE_TRACE_POINTS -#include +#include /* * Convert user-nice values [ -20 ... 0 ... 19 ] diff --git a/kernel/signal.c b/kernel/signal.c index 1d5703ff003..94ec0a4dde0 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include diff --git a/kernel/softirq.c b/kernel/softirq.c index a2d9b458ac2..7ab9dfd8d08 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include /* diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8e6a0b5c994..a2348898858 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -29,7 +29,7 @@ #include #include -#include +#include #include diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 9d8cccdfaa0..a98106dd979 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include "trace.h" diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 5bc00e8f153..b8b13c5540f 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include "trace.h" diff --git a/mm/util.c b/mm/util.c index 0e74a22791c..6794a336e9a 100644 --- a/mm/util.c +++ b/mm/util.c @@ -7,7 +7,7 @@ #include #define CREATE_TRACE_POINTS -#include +#include /** * kstrdup - allocate space for and copy an existing string diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 9fd0dc3cca9..b75b6cea49d 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -23,7 +23,7 @@ #include #include -#include +#include #include diff --git a/net/core/net-traces.c b/net/core/net-traces.c index 80177205947..499a67eaf3a 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c @@ -24,6 +24,6 @@ #include #define CREATE_TRACE_POINTS -#include +#include EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ce6356cd9f7..12806b84445 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -65,7 +65,7 @@ #include #include -#include +#include #include "kmap_skb.h" -- cgit v1.2.3-70-g09d2 From 05725f7eb4b8acb147c5fc7b91397b1f6bcab00d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 14 Apr 2009 20:17:16 +0200 Subject: rculist: use list_entry_rcu in places where it's appropriate Use previously introduced list_entry_rcu instead of an open-coded list_entry + rcu_dereference combination. Signed-off-by: Jiri Pirko Reviewed-by: Paul E. McKenney Cc: dipankar@in.ibm.com LKML-Reference: <20090414181715.GA3634@psychotron.englab.brq.redhat.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 8 +++++--- ipc/sem.c | 4 ++-- security/integrity/ima/ima_fs.c | 4 ++-- security/smack/smackfs.c | 8 ++++---- 4 files changed, 13 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index b4c38bc8049..886df41e745 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -77,6 +77,7 @@ struct sched_param { #include #include #include +#include #include #include @@ -2010,7 +2011,8 @@ static inline unsigned long wait_task_inactive(struct task_struct *p, } #endif -#define next_task(p) list_entry(rcu_dereference((p)->tasks.next), struct task_struct, tasks) +#define next_task(p) \ + list_entry_rcu((p)->tasks.next, struct task_struct, tasks) #define for_each_process(p) \ for (p = &init_task ; (p = next_task(p)) != &init_task ; ) @@ -2049,8 +2051,8 @@ int same_thread_group(struct task_struct *p1, struct task_struct *p2) static inline struct task_struct *next_thread(const struct task_struct *p) { - return list_entry(rcu_dereference(p->thread_group.next), - struct task_struct, thread_group); + return list_entry_rcu(p->thread_group.next, + struct task_struct, thread_group); } static inline int thread_group_empty(struct task_struct *p) diff --git a/ipc/sem.c b/ipc/sem.c index 16a2189e96f..87c2b641fd7 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -1290,8 +1290,8 @@ void exit_sem(struct task_struct *tsk) int i; rcu_read_lock(); - un = list_entry(rcu_dereference(ulp->list_proc.next), - struct sem_undo, list_proc); + un = list_entry_rcu(ulp->list_proc.next, + struct sem_undo, list_proc); if (&un->list_proc == &ulp->list_proc) semid = -1; else diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c index ffbe259700b..510186f0b72 100644 --- a/security/integrity/ima/ima_fs.c +++ b/security/integrity/ima/ima_fs.c @@ -84,8 +84,8 @@ static void *ima_measurements_next(struct seq_file *m, void *v, loff_t *pos) * against concurrent list-extension */ rcu_read_lock(); - qe = list_entry(rcu_dereference(qe->later.next), - struct ima_queue_entry, later); + qe = list_entry_rcu(qe->later.next, + struct ima_queue_entry, later); rcu_read_unlock(); (*pos)++; diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index e03a7e19c73..11d2cb19d7a 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -734,8 +734,8 @@ static void smk_netlbladdr_insert(struct smk_netlbladdr *new) return; } - m = list_entry(rcu_dereference(smk_netlbladdr_list.next), - struct smk_netlbladdr, list); + m = list_entry_rcu(smk_netlbladdr_list.next, + struct smk_netlbladdr, list); /* the comparison '>' is a bit hacky, but works */ if (new->smk_mask.s_addr > m->smk_mask.s_addr) { @@ -748,8 +748,8 @@ static void smk_netlbladdr_insert(struct smk_netlbladdr *new) list_add_rcu(&new->list, &m->list); return; } - m_next = list_entry(rcu_dereference(m->list.next), - struct smk_netlbladdr, list); + m_next = list_entry_rcu(m->list.next, + struct smk_netlbladdr, list); if (new->smk_mask.s_addr > m_next->smk_mask.s_addr) { list_add_rcu(&new->list, &m->list); return; -- cgit v1.2.3-70-g09d2 From e6a1a89d572c31b62d6dcf11a371c7323852d9b2 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 15 Apr 2009 18:22:41 +0900 Subject: dma-debug: add dma_debug_resize_entries() to adjust the number of dma_debug_entries We use a static value for the number of dma_debug_entries. It can be overwritten by a kernel command line option. Some IOMMUs (e.g. GART) can't set an appropriate value by a kernel command line option because they can't know such value until they finish initializing up their hardware. This patch adds dma_debug_resize_entries() enables IOMMUs to adjust the number of dma_debug_entries anytime. Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Cc: fujita.tomonori@lab.ntt.co.jp Cc: akpm@linux-foundation.org LKML-Reference: <20090415182234R.fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- include/linux/dma-debug.h | 7 +++++ lib/dma-debug.c | 72 +++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 73 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h index 28d53cb7b5a..171ad8aedc8 100644 --- a/include/linux/dma-debug.h +++ b/include/linux/dma-debug.h @@ -32,6 +32,8 @@ extern void dma_debug_add_bus(struct bus_type *bus); extern void dma_debug_init(u32 num_entries); +extern int dma_debug_resize_entries(u32 num_entries); + extern void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, size_t size, int direction, dma_addr_t dma_addr, @@ -91,6 +93,11 @@ static inline void dma_debug_init(u32 num_entries) { } +static inline int dma_debug_resize_entries(u32 num_entries) +{ + return 0; +} + static inline void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, size_t size, int direction, dma_addr_t dma_addr, diff --git a/lib/dma-debug.c b/lib/dma-debug.c index d3da7edc034..5d61019330c 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -85,6 +85,7 @@ static u32 show_num_errors = 1; static u32 num_free_entries; static u32 min_free_entries; +static u32 nr_total_entries; /* number of preallocated entries requested by kernel cmdline */ static u32 req_entries; @@ -257,6 +258,21 @@ static void add_dma_entry(struct dma_debug_entry *entry) put_hash_bucket(bucket, &flags); } +static struct dma_debug_entry *__dma_entry_alloc(void) +{ + struct dma_debug_entry *entry; + + entry = list_entry(free_entries.next, struct dma_debug_entry, list); + list_del(&entry->list); + memset(entry, 0, sizeof(*entry)); + + num_free_entries -= 1; + if (num_free_entries < min_free_entries) + min_free_entries = num_free_entries; + + return entry; +} + /* struct dma_entry allocator * * The next two functions implement the allocator for @@ -276,9 +292,7 @@ static struct dma_debug_entry *dma_entry_alloc(void) goto out; } - entry = list_entry(free_entries.next, struct dma_debug_entry, list); - list_del(&entry->list); - memset(entry, 0, sizeof(*entry)); + entry = __dma_entry_alloc(); #ifdef CONFIG_STACKTRACE entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES; @@ -286,9 +300,6 @@ static struct dma_debug_entry *dma_entry_alloc(void) entry->stacktrace.skip = 2; save_stack_trace(&entry->stacktrace); #endif - num_free_entries -= 1; - if (num_free_entries < min_free_entries) - min_free_entries = num_free_entries; out: spin_unlock_irqrestore(&free_entries_lock, flags); @@ -310,6 +321,53 @@ static void dma_entry_free(struct dma_debug_entry *entry) spin_unlock_irqrestore(&free_entries_lock, flags); } +int dma_debug_resize_entries(u32 num_entries) +{ + int i, delta, ret = 0; + unsigned long flags; + struct dma_debug_entry *entry; + LIST_HEAD(tmp); + + spin_lock_irqsave(&free_entries_lock, flags); + + if (nr_total_entries < num_entries) { + delta = num_entries - nr_total_entries; + + spin_unlock_irqrestore(&free_entries_lock, flags); + + for (i = 0; i < delta; i++) { + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + break; + + list_add_tail(&entry->list, &tmp); + } + + spin_lock_irqsave(&free_entries_lock, flags); + + list_splice(&tmp, &free_entries); + nr_total_entries += i; + num_free_entries += i; + } else { + delta = nr_total_entries - num_entries; + + for (i = 0; i < delta && !list_empty(&free_entries); i++) { + entry = __dma_entry_alloc(); + kfree(entry); + } + + nr_total_entries -= i; + } + + if (nr_total_entries != num_entries) + ret = 1; + + spin_unlock_irqrestore(&free_entries_lock, flags); + + return ret; +} +EXPORT_SYMBOL(dma_debug_resize_entries); + /* * DMA-API debugging init code * @@ -490,6 +548,8 @@ void dma_debug_init(u32 num_entries) return; } + nr_total_entries = num_free_entries; + printk(KERN_INFO "DMA-API: debugging enabled by kernel config\n"); } -- cgit v1.2.3-70-g09d2 From d0deef5b14af7d5bbd0003a0a2a1a32326e20a6d Mon Sep 17 00:00:00 2001 From: Shawn Du Date: Tue, 14 Apr 2009 13:58:56 +0800 Subject: blktrace: support per-partition tracing Though one can specify '-d /dev/sda1' when using blktrace, it still traces the whole sda. To support per-partition tracing, when we start tracing, we initialize bt->start_lba and bt->end_lba to the start and end sector of that partition. Note some actions are per device, thus we don't filter 0-sector events. The original patch and discussion can be found here: http://marc.info/?l=linux-btrace&m=122949374214540&w=2 Signed-off-by: Shawn Du Signed-off-by: Li Zefan Acked-by: "Theodore Ts'o" Cc: Arnaldo Carvalho de Melo Cc: Jens Axboe LKML-Reference: <49E42620.4050701@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- block/compat_ioctl.c | 2 +- drivers/scsi/sg.c | 1 + include/linux/blktrace_api.h | 24 +++++++++++++----------- kernel/trace/blktrace.c | 29 +++++++++++++++++++++-------- 4 files changed, 36 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index f87615dea46..f8c218cd08e 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -568,7 +568,7 @@ static int compat_blk_trace_setup(struct block_device *bdev, char __user *arg) memcpy(&buts.name, &cbuts.name, 32); mutex_lock(&bdev->bd_mutex); - ret = do_blk_trace_setup(q, b, bdev->bd_dev, &buts); + ret = do_blk_trace_setup(q, b, bdev->bd_dev, bdev, &buts); mutex_unlock(&bdev->bd_mutex); if (ret) return ret; diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 82312df9b0b..49c98730bb8 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1065,6 +1065,7 @@ sg_ioctl(struct inode *inode, struct file *filp, return blk_trace_setup(sdp->device->request_queue, sdp->disk->disk_name, MKDEV(SCSI_GENERIC_MAJOR, sdp->index), + NULL, (char *)arg); case BLKTRACESTART: return blk_trace_startstop(sdp->device->request_queue, 1); diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index d960889e92e..267edc4017e 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -165,8 +165,9 @@ struct blk_trace { extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *); extern void blk_trace_shutdown(struct request_queue *); -extern int do_blk_trace_setup(struct request_queue *q, - char *name, dev_t dev, struct blk_user_trace_setup *buts); +extern int do_blk_trace_setup(struct request_queue *q, char *name, + dev_t dev, struct block_device *bdev, + struct blk_user_trace_setup *buts); extern void __trace_note_message(struct blk_trace *, const char *fmt, ...); /** @@ -193,6 +194,7 @@ extern void __trace_note_message(struct blk_trace *, const char *fmt, ...); extern void blk_add_driver_data(struct request_queue *q, struct request *rq, void *data, size_t len); extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, + struct block_device *bdev, char __user *arg); extern int blk_trace_startstop(struct request_queue *q, int start); extern int blk_trace_remove(struct request_queue *q); @@ -200,15 +202,15 @@ extern int blk_trace_remove(struct request_queue *q); extern struct attribute_group blk_trace_attr_group; #else /* !CONFIG_BLK_DEV_IO_TRACE */ -#define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) -#define blk_trace_shutdown(q) do { } while (0) -#define do_blk_trace_setup(q, name, dev, buts) (-ENOTTY) -#define blk_add_driver_data(q, rq, data, len) do {} while (0) -#define blk_trace_setup(q, name, dev, arg) (-ENOTTY) -#define blk_trace_startstop(q, start) (-ENOTTY) -#define blk_trace_remove(q) (-ENOTTY) -#define blk_add_trace_msg(q, fmt, ...) do { } while (0) - +# define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) +# define blk_trace_shutdown(q) do { } while (0) +# define do_blk_trace_setup(q, name, dev, bdev, buts) (-ENOTTY) +# define blk_add_driver_data(q, rq, data, len) do {} while (0) +# define blk_trace_setup(q, name, dev, bdev, arg) (-ENOTTY) +# define blk_trace_startstop(q, start) (-ENOTTY) +# define blk_trace_remove(q) (-ENOTTY) +# define blk_add_trace_msg(q, fmt, ...) do { } while (0) #endif /* CONFIG_BLK_DEV_IO_TRACE */ + #endif /* __KERNEL__ */ #endif diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 2b98195b338..e932654cf59 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -147,7 +147,7 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector, { if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0) return 1; - if (sector < bt->start_lba || sector > bt->end_lba) + if (sector && (sector < bt->start_lba || sector > bt->end_lba)) return 1; if (bt->pid && pid != bt->pid) return 1; @@ -192,7 +192,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, what |= MASK_TC_BIT(rw, DISCARD); pid = tsk->pid; - if (unlikely(act_log_check(bt, what, sector, pid))) + if (act_log_check(bt, what, sector, pid)) return; cpu = raw_smp_processor_id(); @@ -407,11 +407,13 @@ static struct rchan_callbacks blk_relay_callbacks = { * Setup everything required to start tracing */ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, - struct blk_user_trace_setup *buts) + struct block_device *bdev, + struct blk_user_trace_setup *buts) { struct blk_trace *old_bt, *bt = NULL; struct dentry *dir = NULL; int ret, i; + struct hd_struct *part = NULL; if (!buts->buf_size || !buts->buf_nr) return -EINVAL; @@ -480,11 +482,21 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, if (!bt->act_mask) bt->act_mask = (u16) -1; - bt->start_lba = buts->start_lba; - bt->end_lba = buts->end_lba; - if (!bt->end_lba) + if (bdev) + part = bdev->bd_part; + + if (part) { + bt->start_lba = part->start_sect; + bt->end_lba = part->start_sect + part->nr_sects; + } else bt->end_lba = -1ULL; + /* overwrite with user settings */ + if (buts->start_lba) + bt->start_lba = buts->start_lba; + if (buts->end_lba) + bt->end_lba = buts->end_lba; + bt->pid = buts->pid; bt->trace_state = Blktrace_setup; @@ -505,6 +517,7 @@ err: } int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, + struct block_device *bdev, char __user *arg) { struct blk_user_trace_setup buts; @@ -514,7 +527,7 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, if (ret) return -EFAULT; - ret = do_blk_trace_setup(q, name, dev, &buts); + ret = do_blk_trace_setup(q, name, dev, bdev, &buts); if (ret) return ret; @@ -582,7 +595,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) switch (cmd) { case BLKTRACESETUP: bdevname(bdev, b); - ret = blk_trace_setup(q, b, bdev->bd_dev, arg); + ret = blk_trace_setup(q, b, bdev->bd_dev, bdev, arg); break; case BLKTRACESTART: start = 1; -- cgit v1.2.3-70-g09d2 From 1d54ad6da9192fed5dd3b60224d9f2dfea0dcd82 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 14 Apr 2009 14:00:05 +0800 Subject: blktrace: add trace/ to /sys/block/sda Impact: allow ftrace-plugin blktrace to trace device-mapper devices To trace a single partition: # echo 1 > /sys/block/sda/sda1/enable To trace the whole sda instead: # echo 1 > /sys/block/sda/enable Thus we also fix an issue reported by Ted, that ftrace-plugin blktrace can't be used to trace device-mapper devices. Now: # echo 1 > /sys/block/dm-0/trace/enable echo: write error: No such device or address # mount -t ext4 /dev/dm-0 /mnt # echo 1 > /sys/block/dm-0/trace/enable # echo blk > /debug/tracing/current_tracer Reported-by: Theodore Tso Signed-off-by: Li Zefan Acked-by: "Theodore Ts'o" Cc: Arnaldo Carvalho de Melo Cc: Shawn Du Cc: Jens Axboe LKML-Reference: <49E42665.6020506@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- block/blk-sysfs.c | 7 ++++++- include/linux/blktrace_api.h | 6 ++++++ kernel/trace/blktrace.c | 5 +++++ 3 files changed, 17 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 73f36beff5c..8653d710b39 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -387,16 +387,21 @@ struct kobj_type blk_queue_ktype = { int blk_register_queue(struct gendisk *disk) { int ret; + struct device *dev = disk_to_dev(disk); struct request_queue *q = disk->queue; if (WARN_ON(!q)) return -ENXIO; + ret = blk_trace_init_sysfs(dev); + if (ret) + return ret; + if (!q->request_fn) return 0; - ret = kobject_add(&q->kobj, kobject_get(&disk_to_dev(disk)->kobj), + ret = kobject_add(&q->kobj, kobject_get(&dev->kobj), "%s", "queue"); if (ret < 0) return ret; diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 267edc4017e..62763c95285 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -198,6 +198,7 @@ extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, char __user *arg); extern int blk_trace_startstop(struct request_queue *q, int start); extern int blk_trace_remove(struct request_queue *q); +extern int blk_trace_init_sysfs(struct device *dev); extern struct attribute_group blk_trace_attr_group; @@ -210,6 +211,11 @@ extern struct attribute_group blk_trace_attr_group; # define blk_trace_startstop(q, start) (-ENOTTY) # define blk_trace_remove(q) (-ENOTTY) # define blk_add_trace_msg(q, fmt, ...) do { } while (0) +static inline int blk_trace_init_sysfs(struct device *dev) +{ + return 0; +} + #endif /* CONFIG_BLK_DEV_IO_TRACE */ #endif /* __KERNEL__ */ diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index d1098988052..8e7c5da3a3e 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1644,3 +1644,8 @@ out: return ret ? ret : count; } +int blk_trace_init_sysfs(struct device *dev) +{ + return sysfs_create_group(&dev->kobj, &blk_trace_attr_group); +} + -- cgit v1.2.3-70-g09d2 From 76620aafd66f0004829764940c5466144969cffc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 16 Apr 2009 02:02:07 -0700 Subject: gro: New frags interface to avoid copying shinfo It turns out that copying a 16-byte area at ~800k times a second can be really expensive :) This patch redesigns the frags GRO interface to avoid copying that area twice. The two disciples of the frags interface have been converted. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/net/cxgb3/adapter.h | 2 +- drivers/net/cxgb3/sge.c | 53 +++++++++++++++++------------ drivers/net/sfc/rx.c | 26 ++++++++++----- include/linux/if_vlan.h | 6 ++-- include/linux/netdevice.h | 22 ++++++------ net/8021q/vlan_core.c | 4 +-- net/core/dev.c | 81 ++++++++++++++++++++------------------------- 7 files changed, 101 insertions(+), 93 deletions(-) (limited to 'include') diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h index 714df2b675e..322434ac42f 100644 --- a/drivers/net/cxgb3/adapter.h +++ b/drivers/net/cxgb3/adapter.h @@ -195,7 +195,7 @@ struct sge_qset { /* an SGE queue set */ struct sge_rspq rspq; struct sge_fl fl[SGE_RXQ_PER_SET]; struct sge_txq txq[SGE_TXQ_PER_SET]; - struct napi_gro_fraginfo lro_frag_tbl; + int nomem; int lro_enabled; void *lro_va; struct net_device *netdev; diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c index 26d3587f339..73d569e758e 100644 --- a/drivers/net/cxgb3/sge.c +++ b/drivers/net/cxgb3/sge.c @@ -654,7 +654,8 @@ static void t3_reset_qset(struct sge_qset *q) q->txq_stopped = 0; q->tx_reclaim_timer.function = NULL; /* for t3_stop_sge_timers() */ q->rx_reclaim_timer.function = NULL; - q->lro_frag_tbl.nr_frags = q->lro_frag_tbl.len = 0; + q->nomem = 0; + napi_free_frags(&q->napi); } @@ -2074,20 +2075,19 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs, struct sge_fl *fl, int len, int complete) { struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; + struct sk_buff *skb = NULL; struct cpl_rx_pkt *cpl; - struct skb_frag_struct *rx_frag = qs->lro_frag_tbl.frags; - int nr_frags = qs->lro_frag_tbl.nr_frags; - int frag_len = qs->lro_frag_tbl.len; + struct skb_frag_struct *rx_frag; + int nr_frags; int offset = 0; - if (!nr_frags) { - offset = 2 + sizeof(struct cpl_rx_pkt); - qs->lro_va = cpl = sd->pg_chunk.va + 2; + if (!qs->nomem) { + skb = napi_get_frags(&qs->napi); + qs->nomem = !skb; } fl->credits--; - len -= offset; pci_dma_sync_single_for_cpu(adap->pdev, pci_unmap_addr(sd, dma_addr), fl->buf_size - SGE_PG_RSVD, @@ -2100,21 +2100,38 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs, fl->alloc_size, PCI_DMA_FROMDEVICE); + if (!skb) { + put_page(sd->pg_chunk.page); + if (complete) + qs->nomem = 0; + return; + } + + rx_frag = skb_shinfo(skb)->frags; + nr_frags = skb_shinfo(skb)->nr_frags; + + if (!nr_frags) { + offset = 2 + sizeof(struct cpl_rx_pkt); + qs->lro_va = sd->pg_chunk.va + 2; + } + len -= offset; + prefetch(qs->lro_va); rx_frag += nr_frags; rx_frag->page = sd->pg_chunk.page; rx_frag->page_offset = sd->pg_chunk.offset + offset; rx_frag->size = len; - frag_len += len; - qs->lro_frag_tbl.nr_frags++; - qs->lro_frag_tbl.len = frag_len; + skb->len += len; + skb->data_len += len; + skb->truesize += len; + skb_shinfo(skb)->nr_frags++; if (!complete) return; - qs->lro_frag_tbl.ip_summed = CHECKSUM_UNNECESSARY; + skb->ip_summed = CHECKSUM_UNNECESSARY; cpl = qs->lro_va; if (unlikely(cpl->vlan_valid)) { @@ -2123,15 +2140,11 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs, struct vlan_group *grp = pi->vlan_grp; if (likely(grp != NULL)) { - vlan_gro_frags(&qs->napi, grp, ntohs(cpl->vlan), - &qs->lro_frag_tbl); - goto out; + vlan_gro_frags(&qs->napi, grp, ntohs(cpl->vlan)); + return; } } - napi_gro_frags(&qs->napi, &qs->lro_frag_tbl); - -out: - qs->lro_frag_tbl.nr_frags = qs->lro_frag_tbl.len = 0; + napi_gro_frags(&qs->napi); } /** @@ -2300,8 +2313,6 @@ no_mem: if (fl->use_pages) { void *addr = fl->sdesc[fl->cidx].pg_chunk.va; - prefetch(&qs->lro_frag_tbl); - prefetch(addr); #if L1_CACHE_BYTES < 128 prefetch(addr + L1_CACHE_BYTES); diff --git a/drivers/net/sfc/rx.c b/drivers/net/sfc/rx.c index 66d7fe3db3e..01f9432c31e 100644 --- a/drivers/net/sfc/rx.c +++ b/drivers/net/sfc/rx.c @@ -450,17 +450,27 @@ static void efx_rx_packet_lro(struct efx_channel *channel, /* Pass the skb/page into the LRO engine */ if (rx_buf->page) { - struct napi_gro_fraginfo info; + struct sk_buff *skb = napi_get_frags(napi); - info.frags[0].page = rx_buf->page; - info.frags[0].page_offset = efx_rx_buf_offset(rx_buf); - info.frags[0].size = rx_buf->len; - info.nr_frags = 1; - info.ip_summed = CHECKSUM_UNNECESSARY; - info.len = rx_buf->len; + if (!skb) { + put_page(rx_buf->page); + goto out; + } + + skb_shinfo(skb)->frags[0].page = rx_buf->page; + skb_shinfo(skb)->frags[0].page_offset = + efx_rx_buf_offset(rx_buf); + skb_shinfo(skb)->frags[0].size = rx_buf->len; + skb_shinfo(skb)->nr_frags = 1; + + skb->len = rx_buf->len; + skb->data_len = rx_buf->len; + skb->truesize += rx_buf->len; + skb->ip_summed = CHECKSUM_UNNECESSARY; - napi_gro_frags(napi, &info); + napi_gro_frags(napi); +out: EFX_BUG_ON_PARANOID(rx_buf->skb); rx_buf->page = NULL; } else { diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index e1ff5b14310..7ff9af1d0f0 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -118,8 +118,7 @@ extern int vlan_hwaccel_do_receive(struct sk_buff *skb); extern int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp, unsigned int vlan_tci, struct sk_buff *skb); extern int vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp, - unsigned int vlan_tci, - struct napi_gro_fraginfo *info); + unsigned int vlan_tci); #else static inline struct net_device *vlan_dev_real_dev(const struct net_device *dev) @@ -154,8 +153,7 @@ static inline int vlan_gro_receive(struct napi_struct *napi, } static inline int vlan_gro_frags(struct napi_struct *napi, - struct vlan_group *grp, unsigned int vlan_tci, - struct napi_gro_fraginfo *info) + struct vlan_group *grp, unsigned int vlan_tci) { return NET_RX_DROP; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2e7783f4a75..54db3ebf219 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1047,14 +1047,6 @@ struct packet_type { struct list_head list; }; -struct napi_gro_fraginfo { - skb_frag_t frags[MAX_SKB_FRAGS]; - unsigned int nr_frags; - unsigned int ip_summed; - unsigned int len; - __wsum csum; -}; - #include #include @@ -1442,12 +1434,18 @@ extern int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); extern void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb); -extern struct sk_buff * napi_fraginfo_skb(struct napi_struct *napi, - struct napi_gro_fraginfo *info); +extern struct sk_buff * napi_get_frags(struct napi_struct *napi); extern int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret); -extern int napi_gro_frags(struct napi_struct *napi, - struct napi_gro_fraginfo *info); +extern struct sk_buff * napi_frags_skb(struct napi_struct *napi); +extern int napi_gro_frags(struct napi_struct *napi); + +static inline void napi_free_frags(struct napi_struct *napi) +{ + kfree_skb(napi->skb); + napi->skb = NULL; +} + extern void netif_nit_deliver(struct sk_buff *skb); extern int dev_valid_name(const char *name); extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *); diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 654e45f5719..c1f51e4a01b 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -114,9 +114,9 @@ int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp, EXPORT_SYMBOL(vlan_gro_receive); int vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp, - unsigned int vlan_tci, struct napi_gro_fraginfo *info) + unsigned int vlan_tci) { - struct sk_buff *skb = napi_fraginfo_skb(napi, info); + struct sk_buff *skb = napi_frags_skb(napi); if (!skb) return NET_RX_DROP; diff --git a/net/core/dev.c b/net/core/dev.c index 91d792d17e0..619fa141b8f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2519,16 +2519,10 @@ void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) } EXPORT_SYMBOL(napi_reuse_skb); -struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi, - struct napi_gro_fraginfo *info) +struct sk_buff *napi_get_frags(struct napi_struct *napi) { struct net_device *dev = napi->dev; struct sk_buff *skb = napi->skb; - struct ethhdr *eth; - skb_frag_t *frag; - int i; - - napi->skb = NULL; if (!skb) { skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN); @@ -2536,47 +2530,14 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi, goto out; skb_reserve(skb, NET_IP_ALIGN); - } - - BUG_ON(info->nr_frags > MAX_SKB_FRAGS); - frag = &info->frags[info->nr_frags - 1]; - for (i = skb_shinfo(skb)->nr_frags; i < info->nr_frags; i++) { - skb_fill_page_desc(skb, i, frag->page, frag->page_offset, - frag->size); - frag++; + napi->skb = skb; } - skb_shinfo(skb)->nr_frags = info->nr_frags; - - skb->data_len = info->len; - skb->len += info->len; - skb->truesize += info->len; - - skb_reset_mac_header(skb); - skb_gro_reset_offset(skb); - - eth = skb_gro_header(skb, sizeof(*eth)); - if (!eth) { - napi_reuse_skb(napi, skb); - skb = NULL; - goto out; - } - - skb_gro_pull(skb, sizeof(*eth)); - - /* - * This works because the only protocols we care about don't require - * special handling. We'll fix it up properly at the end. - */ - skb->protocol = eth->h_proto; - - skb->ip_summed = info->ip_summed; - skb->csum = info->csum; out: return skb; } -EXPORT_SYMBOL(napi_fraginfo_skb); +EXPORT_SYMBOL(napi_get_frags); int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret) { @@ -2606,9 +2567,39 @@ int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret) } EXPORT_SYMBOL(napi_frags_finish); -int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info) +struct sk_buff *napi_frags_skb(struct napi_struct *napi) +{ + struct sk_buff *skb = napi->skb; + struct ethhdr *eth; + + napi->skb = NULL; + + skb_reset_mac_header(skb); + skb_gro_reset_offset(skb); + + eth = skb_gro_header(skb, sizeof(*eth)); + if (!eth) { + napi_reuse_skb(napi, skb); + skb = NULL; + goto out; + } + + skb_gro_pull(skb, sizeof(*eth)); + + /* + * This works because the only protocols we care about don't require + * special handling. We'll fix it up properly at the end. + */ + skb->protocol = eth->h_proto; + +out: + return skb; +} +EXPORT_SYMBOL(napi_frags_skb); + +int napi_gro_frags(struct napi_struct *napi) { - struct sk_buff *skb = napi_fraginfo_skb(napi, info); + struct sk_buff *skb = napi_frags_skb(napi); if (!skb) return NET_RX_DROP; @@ -2712,7 +2703,7 @@ void netif_napi_del(struct napi_struct *napi) struct sk_buff *skb, *next; list_del_init(&napi->dev_list); - kfree_skb(napi->skb); + napi_free_frags(napi); for (skb = napi->gro_list; skb; skb = next) { next = skb->next; -- cgit v1.2.3-70-g09d2 From 93eb677d74a4f7d3edfb678c94f6c0544d9fbad2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 15 Apr 2009 13:24:06 -0400 Subject: ftrace: use module notifier for function tracer The hooks in the module code for the function tracer must be called before any of that module code runs. The function tracer hooks modify the module (replacing calls to mcount to nops). If the code is executed while the change occurs, then the CPU can take a GPF. To handle the above with a bit of paranoia, I originally implemented the hooks as calls directly from the module code. After examining the notifier calls, it looks as though the start up notify is called before any of the module's code is executed. This makes the use of the notify safe with ftrace. Only the startup notify is required to be "safe". The shutdown simply removes the entries from the ftrace function list, and does not modify any code. This change has another benefit. It removes a issue with a reverse dependency in the mutexes of ftrace_lock and module_mutex. [ Impact: fix lock dependency bug, cleanup ] Cc: Rusty Russell Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 7 ---- include/linux/module.h | 4 +++ kernel/module.c | 19 ++++------- kernel/trace/ftrace.c | 90 +++++++++++++++++++++++++++++++++++--------------- 4 files changed, 75 insertions(+), 45 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 53869bef610..97c83e1bc58 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -233,8 +233,6 @@ extern int ftrace_arch_read_dyn_info(char *buf, int size); extern int skip_trace(unsigned long ip); -extern void ftrace_release(void *start, unsigned long size); - extern void ftrace_disable_daemon(void); extern void ftrace_enable_daemon(void); #else @@ -325,13 +323,8 @@ static inline void __ftrace_enabled_restore(int enabled) #ifdef CONFIG_FTRACE_MCOUNT_RECORD extern void ftrace_init(void); -extern void ftrace_init_module(struct module *mod, - unsigned long *start, unsigned long *end); #else static inline void ftrace_init(void) { } -static inline void -ftrace_init_module(struct module *mod, - unsigned long *start, unsigned long *end) { } #endif /* diff --git a/include/linux/module.h b/include/linux/module.h index 6155fa44168..a8f2c0aa4c3 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -341,6 +341,10 @@ struct module struct ftrace_event_call *trace_events; unsigned int num_trace_events; #endif +#ifdef CONFIG_FTRACE_MCOUNT_RECORD + unsigned long *ftrace_callsites; + unsigned int num_ftrace_callsites; +#endif #ifdef CONFIG_MODULE_UNLOAD /* What modules depend on me? */ diff --git a/kernel/module.c b/kernel/module.c index a0394706f10..2383e60fcf3 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1490,9 +1490,6 @@ static void free_module(struct module *mod) /* Free any allocated parameters. */ destroy_params(mod->kp, mod->num_kp); - /* release any pointers to mcount in this module */ - ftrace_release(mod->module_core, mod->core_size); - /* This may be NULL, but that's OK */ module_free(mod, mod->module_init); kfree(mod->args); @@ -1893,11 +1890,9 @@ static noinline struct module *load_module(void __user *umod, unsigned int symindex = 0; unsigned int strindex = 0; unsigned int modindex, versindex, infoindex, pcpuindex; - unsigned int num_mcount; struct module *mod; long err = 0; void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ - unsigned long *mseg; mm_segment_t old_fs; DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", @@ -2179,7 +2174,13 @@ static noinline struct module *load_module(void __user *umod, sizeof(*mod->trace_events), &mod->num_trace_events); #endif - +#ifdef CONFIG_FTRACE_MCOUNT_RECORD + /* sechdrs[0].sh_size is always zero */ + mod->ftrace_callsites = section_objs(hdr, sechdrs, secstrings, + "__mcount_loc", + sizeof(*mod->ftrace_callsites), + &mod->num_ftrace_callsites); +#endif #ifdef CONFIG_MODVERSIONS if ((mod->num_syms && !mod->crcs) || (mod->num_gpl_syms && !mod->gpl_crcs) @@ -2244,11 +2245,6 @@ static noinline struct module *load_module(void __user *umod, dynamic_debug_setup(debug, num_debug); } - /* sechdrs[0].sh_size is always zero */ - mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc", - sizeof(*mseg), &num_mcount); - ftrace_init_module(mod, mseg, mseg + num_mcount); - err = module_finalize(hdr, sechdrs, mod); if (err < 0) goto cleanup; @@ -2309,7 +2305,6 @@ static noinline struct module *load_module(void __user *umod, cleanup: kobject_del(&mod->mkobj.kobj); kobject_put(&mod->mkobj.kobj); - ftrace_release(mod->module_core, mod->core_size); free_unload: module_unload_free(mod); #if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index a2348898858..5b606f45b6c 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -916,30 +916,6 @@ static void ftrace_free_rec(struct dyn_ftrace *rec) rec->flags |= FTRACE_FL_FREE; } -void ftrace_release(void *start, unsigned long size) -{ - struct dyn_ftrace *rec; - struct ftrace_page *pg; - unsigned long s = (unsigned long)start; - unsigned long e = s + size; - - if (ftrace_disabled || !start) - return; - - mutex_lock(&ftrace_lock); - do_for_each_ftrace_rec(pg, rec) { - if ((rec->ip >= s) && (rec->ip < e)) { - /* - * rec->ip is changed in ftrace_free_rec() - * It should not between s and e if record was freed. - */ - FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE); - ftrace_free_rec(rec); - } - } while_for_each_ftrace_rec(); - mutex_unlock(&ftrace_lock); -} - static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) { struct dyn_ftrace *rec; @@ -2752,14 +2728,72 @@ static int ftrace_convert_nops(struct module *mod, return 0; } -void ftrace_init_module(struct module *mod, - unsigned long *start, unsigned long *end) +#ifdef CONFIG_MODULES +void ftrace_release(void *start, void *end) +{ + struct dyn_ftrace *rec; + struct ftrace_page *pg; + unsigned long s = (unsigned long)start; + unsigned long e = (unsigned long)end; + + if (ftrace_disabled || !start || start == end) + return; + + mutex_lock(&ftrace_lock); + do_for_each_ftrace_rec(pg, rec) { + if ((rec->ip >= s) && (rec->ip < e)) { + /* + * rec->ip is changed in ftrace_free_rec() + * It should not between s and e if record was freed. + */ + FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE); + ftrace_free_rec(rec); + } + } while_for_each_ftrace_rec(); + mutex_unlock(&ftrace_lock); +} + +static void ftrace_init_module(struct module *mod, + unsigned long *start, unsigned long *end) { if (ftrace_disabled || start == end) return; ftrace_convert_nops(mod, start, end); } +static int ftrace_module_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + struct module *mod = data; + + switch (val) { + case MODULE_STATE_COMING: + ftrace_init_module(mod, mod->ftrace_callsites, + mod->ftrace_callsites + + mod->num_ftrace_callsites); + break; + case MODULE_STATE_GOING: + ftrace_release(mod->ftrace_callsites, + mod->ftrace_callsites + + mod->num_ftrace_callsites); + break; + } + + return 0; +} +#else +static int ftrace_module_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + return 0; +} +#endif /* CONFIG_MODULES */ + +struct notifier_block ftrace_module_nb = { + .notifier_call = ftrace_module_notify, + .priority = 0, +}; + extern unsigned long __start_mcount_loc[]; extern unsigned long __stop_mcount_loc[]; @@ -2791,6 +2825,10 @@ void __init ftrace_init(void) __start_mcount_loc, __stop_mcount_loc); + ret = register_module_notifier(&ftrace_module_nb); + if (!ret) + pr_warning("Failed to register trace ftrace module notifier\n"); + return; failed: ftrace_disabled = 1; -- cgit v1.2.3-70-g09d2 From d1b182a8d49ed6416325b4e0a1cb0f17cd4e702a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 15 Apr 2009 16:53:47 -0400 Subject: tracing/events/ring-buffer: expose format of ring buffer headers to users Currently, every thing needed to read the binary output from the ring buffers is available, with the exception of the way the ring buffers handles itself internally. This patch creates two special files in the debugfs/tracing/events directory: # cat /debug/tracing/events/header_page field: u64 timestamp; offset:0; size:8; field: local_t commit; offset:8; size:8; field: char data; offset:16; size:4080; # cat /debug/tracing/events/header_event type : 2 bits len : 3 bits time_delta : 27 bits array : 32 bits padding : type == 0 time_extend : type == 1 data : type == 3 This is to allow a userspace app to see if the ring buffer format changes or not. [ Impact: allow userspace apps to know of ringbuffer format changes ] Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 5 +++++ kernel/trace/ring_buffer.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ kernel/trace/trace_events.c | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 87 insertions(+) (limited to 'include') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index f0aa486d131..fac8f1ac6f4 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -166,6 +166,11 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data); int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page, size_t len, int cpu, int full); +struct trace_seq; + +int ring_buffer_print_entry_header(struct trace_seq *s); +int ring_buffer_print_page_header(struct trace_seq *s); + enum ring_buffer_flags { RB_FL_OVERWRITE = 1 << 0, }; diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index f935bd5ec3e..84a6055f37c 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -21,6 +21,28 @@ #include "trace.h" +/* + * The ring buffer header is special. We must manually up keep it. + */ +int ring_buffer_print_entry_header(struct trace_seq *s) +{ + int ret; + + ret = trace_seq_printf(s, "\ttype : 2 bits\n"); + ret = trace_seq_printf(s, "\tlen : 3 bits\n"); + ret = trace_seq_printf(s, "\ttime_delta : 27 bits\n"); + ret = trace_seq_printf(s, "\tarray : 32 bits\n"); + ret = trace_seq_printf(s, "\n"); + ret = trace_seq_printf(s, "\tpadding : type == %d\n", + RINGBUF_TYPE_PADDING); + ret = trace_seq_printf(s, "\ttime_extend : type == %d\n", + RINGBUF_TYPE_TIME_EXTEND); + ret = trace_seq_printf(s, "\tdata : type == %d\n", + RINGBUF_TYPE_DATA); + + return ret; +} + /* * The ring buffer is made up of a list of pages. A separate list of pages is * allocated for each CPU. A writer may only write to a buffer that is @@ -340,6 +362,28 @@ static inline int test_time_stamp(u64 delta) #define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE) +int ring_buffer_print_page_header(struct trace_seq *s) +{ + struct buffer_data_page field; + int ret; + + ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t" + "offset:0;\tsize:%u;\n", + (unsigned int)sizeof(field.time_stamp)); + + ret = trace_seq_printf(s, "\tfield: local_t commit;\t" + "offset:%u;\tsize:%u;\n", + (unsigned int)offsetof(typeof(field), commit), + (unsigned int)sizeof(field.commit)); + + ret = trace_seq_printf(s, "\tfield: char data;\t" + "offset:%u;\tsize:%u;\n", + (unsigned int)offsetof(typeof(field), data), + (unsigned int)BUF_PAGE_SIZE); + + return ret; +} + /* * head_page == tail_page && head == tail then buffer is empty. */ diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index f81d6eec4e4..7163a2bb021 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -610,6 +610,30 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, return cnt; } +static ssize_t +show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) +{ + int (*func)(struct trace_seq *s) = filp->private_data; + struct trace_seq *s; + int r; + + if (*ppos) + return 0; + + s = kmalloc(sizeof(*s), GFP_KERNEL); + if (!s) + return -ENOMEM; + + trace_seq_init(s); + + func(s); + r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); + + kfree(s); + + return r; +} + static const struct seq_operations show_event_seq_ops = { .start = t_start, .next = t_next, @@ -667,6 +691,11 @@ static const struct file_operations ftrace_subsystem_filter_fops = { .write = subsystem_filter_write, }; +static const struct file_operations ftrace_show_header_fops = { + .open = tracing_open_generic, + .read = show_header, +}; + static struct dentry *event_trace_events_dir(void) { static struct dentry *d_tracer; @@ -909,6 +938,15 @@ static __init int event_trace_init(void) if (!d_events) return 0; + /* ring buffer internal formats */ + trace_create_file("header_page", 0444, d_events, + ring_buffer_print_page_header, + &ftrace_show_header_fops); + + trace_create_file("header_event", 0444, d_events, + ring_buffer_print_entry_header, + &ftrace_show_header_fops); + for_each_event(call, __start_ftrace_events, __stop_ftrace_events) { /* The linker may leave blanks */ if (!call->name) -- cgit v1.2.3-70-g09d2 From 76aa81118ddfbb3dc31533030cf3ec329dd067a6 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 16 Apr 2009 23:35:39 -0700 Subject: tracing: avoid warnings from zero-arg tracepoints Tracepoints with no arguments can issue two warnings: "field" defined by not used "ret" is uninitialized in this function Mark field as being OK to leave unused, and initialize ret. [ Impact: fix false positive compiler warnings. ] Signed-off-by: Jeremy Fitzhardinge Acked-by: Steven Rostedt Cc: mathieu.desnoyers@polymtl.ca LKML-Reference: <1239950139-1119-5-git-send-email-jeremy@goop.org> Signed-off-by: Ingo Molnar --- include/trace/ftrace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 60c5323bee6..39a3351f2e7 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -160,8 +160,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ static int \ ftrace_format_##call(struct trace_seq *s) \ { \ - struct ftrace_raw_##call field; \ - int ret; \ + struct ftrace_raw_##call field __attribute__((unused)); \ + int ret = 0; \ \ tstruct; \ \ -- cgit v1.2.3-70-g09d2 From 46de405f25f1d9fa73b657ffbb752aa0cc87a91d Mon Sep 17 00:00:00 2001 From: Zhaolei Date: Fri, 17 Apr 2009 10:53:43 +0800 Subject: tracing: Remove include/trace/kmem_event_types.h kmem_event_types.h is no longer necessary since tracepoint definitions are put into include/trace/events/kmem.h [ Impact: remove now-unused file. ] Signed-off-by: Zhao Lei Acked-by: Steven Rostedt Cc: Frederic Weisbecker Cc: Tom Zanussi LKML-Reference: <49E7EF37.2080205@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/kmem_event_types.h | 193 --------------------------------------- 1 file changed, 193 deletions(-) delete mode 100644 include/trace/kmem_event_types.h (limited to 'include') diff --git a/include/trace/kmem_event_types.h b/include/trace/kmem_event_types.h deleted file mode 100644 index 4ff420fe467..00000000000 --- a/include/trace/kmem_event_types.h +++ /dev/null @@ -1,193 +0,0 @@ - -/* use instead */ -#ifndef TRACE_EVENT -# error Do not include this file directly. -# error Unless you know what you are doing. -#endif - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM kmem - -TRACE_EVENT(kmalloc, - - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), - - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - ), - - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - __entry->gfp_flags) -); - -TRACE_EVENT(kmem_cache_alloc, - - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), - - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - ), - - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - __entry->gfp_flags) -); - -TRACE_EVENT(kmalloc_node, - - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), - - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - __field( int, node ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - __entry->node = node; - ), - - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - __entry->gfp_flags, - __entry->node) -); - -TRACE_EVENT(kmem_cache_alloc_node, - - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), - - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - __field( int, node ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - __entry->node = node; - ), - - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - __entry->gfp_flags, - __entry->node) -); - -TRACE_EVENT(kfree, - - TP_PROTO(unsigned long call_site, const void *ptr), - - TP_ARGS(call_site, ptr), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - ), - - TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) -); - -TRACE_EVENT(kmem_cache_free, - - TP_PROTO(unsigned long call_site, const void *ptr), - - TP_ARGS(call_site, ptr), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - ), - - TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) -); - -#undef TRACE_SYSTEM -- cgit v1.2.3-70-g09d2 From b0afdc126d0515e76890f0a5f26b28501cfa298e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 17 Apr 2009 13:02:22 -0400 Subject: tracing/events: enable code with EVENT_TRACING not EVENT_TRACER The CONFIG_EVENT_TRACER is the way to turn on event tracing when no other tracing has been configured. All code to get enabled should depend on CONFIG_EVENT_TRACING. That is what is enabled when TRACING (or CONFIG_EVENT_TRACER) is selected. This patch enables the include/trace/ftrace.h file when CONFIG_EVENT_TRACING is enabled. [ Impact: fix warning in event tracer selftest ] Signed-off-by: Steven Rostedt --- include/trace/define_trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index 18869417109..7f1f23d601e 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -56,7 +56,7 @@ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -#ifdef CONFIG_EVENT_TRACER +#ifdef CONFIG_EVENT_TRACING #include #endif -- cgit v1.2.3-70-g09d2 From 261842b7c9099f56de2eb969c8ad65402d68e00e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 16 Apr 2009 21:41:52 -0400 Subject: tracing: add same level recursion detection The tracing infrastructure allows for recursion. That is, an interrupt may interrupt the act of tracing an event, and that interrupt may very well perform its own trace. This is a recursive trace, and is fine to do. The problem arises when there is a bug, and the utility doing the trace calls something that recurses back into the tracer. This recursion is not caused by an external event like an interrupt, but by code that is not expected to recurse. The result could be a lockup. This patch adds a bitmask to the task structure that keeps track of the trace recursion. To find the interrupt depth, the following algorithm is used: level = hardirq_count() + softirq_count() + in_nmi; Here, level will be the depth of interrutps and softirqs, and even handles the nmi. Then the corresponding bit is set in the recursion bitmask. If the bit was already set, we know we had a recursion at the same level and we warn about it and fail the writing to the buffer. After the data has been committed to the buffer, we clear the bit. No atomics are needed. The only races are with interrupts and they reset the bitmask before returning anywy. [ Impact: detect same irq level trace recursion ] Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 7 +++++++ include/linux/init_task.h | 1 + include/linux/sched.h | 4 +++- kernel/trace/ring_buffer.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 53 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 97c83e1bc58..39b95c56587 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -488,8 +488,15 @@ static inline int test_tsk_trace_graph(struct task_struct *tsk) extern int ftrace_dump_on_oops; +#ifdef CONFIG_PREEMPT +#define INIT_TRACE_RECURSION .trace_recursion = 0, +#endif + #endif /* CONFIG_TRACING */ +#ifndef INIT_TRACE_RECURSION +#define INIT_TRACE_RECURSION +#endif #ifdef CONFIG_HW_BRANCH_TRACER diff --git a/include/linux/init_task.h b/include/linux/init_task.h index dcfb93337e9..6fc21852986 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -187,6 +187,7 @@ extern struct cred init_cred; INIT_TRACE_IRQFLAGS \ INIT_LOCKDEP \ INIT_FTRACE_GRAPH \ + INIT_TRACE_RECURSION \ } diff --git a/include/linux/sched.h b/include/linux/sched.h index b4c38bc8049..7ede5e49091 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1428,7 +1428,9 @@ struct task_struct { #ifdef CONFIG_TRACING /* state flags for use by tracers */ unsigned long trace; -#endif + /* bitmask of trace recursion */ + unsigned long trace_recursion; +#endif /* CONFIG_TRACING */ }; /* Future-safe accessor for struct task_struct's cpus_allowed. */ diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 84a6055f37c..b421b0ea911 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1481,6 +1481,40 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, return event; } +static int trace_irq_level(void) +{ + return hardirq_count() + softirq_count() + in_nmi(); +} + +static int trace_recursive_lock(void) +{ + int level; + + level = trace_irq_level(); + + if (unlikely(current->trace_recursion & (1 << level))) { + /* Disable all tracing before we do anything else */ + tracing_off_permanent(); + WARN_ON_ONCE(1); + return -1; + } + + current->trace_recursion |= 1 << level; + + return 0; +} + +static void trace_recursive_unlock(void) +{ + int level; + + level = trace_irq_level(); + + WARN_ON_ONCE(!current->trace_recursion & (1 << level)); + + current->trace_recursion &= ~(1 << level); +} + static DEFINE_PER_CPU(int, rb_need_resched); /** @@ -1514,6 +1548,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) /* If we are tracing schedule, we don't want to recurse */ resched = ftrace_preempt_disable(); + if (trace_recursive_lock()) + goto out_nocheck; + cpu = raw_smp_processor_id(); if (!cpumask_test_cpu(cpu, buffer->cpumask)) @@ -1543,6 +1580,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) return event; out: + trace_recursive_unlock(); + + out_nocheck: ftrace_preempt_enable(resched); return NULL; } @@ -1581,6 +1621,8 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer, rb_commit(cpu_buffer, event); + trace_recursive_unlock(); + /* * Only the last preempt count needs to restore preemption. */ -- cgit v1.2.3-70-g09d2 From bd3ce6556072bdc8ea66dfd5448e184f189bdc7f Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Fri, 17 Apr 2009 20:12:35 -0700 Subject: Input: rotary_encoder - add support for REL_* axes The rotary encoder driver only supports returning input events for ABS_* axes, this adds support for REL_* axes. The relative axis input event is reported as -1 for each counter-clockwise step and +1 for each clockwise step. The ability to clamp the position of ABS_* axes between 0 and a maximum of "steps" has also been added. Signed-off-by: H Hartley Sweeten Signed-off-by: Daniel Mack Signed-off-by: Dmitry Torokhov --- Documentation/input/rotary-encoder.txt | 9 ++++- drivers/input/misc/rotary_encoder.c | 61 +++++++++++++++++++++++----------- include/linux/rotary_encoder.h | 2 ++ 3 files changed, 51 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/Documentation/input/rotary-encoder.txt b/Documentation/input/rotary-encoder.txt index 435102a26d9..3a6aec40c0b 100644 --- a/Documentation/input/rotary-encoder.txt +++ b/Documentation/input/rotary-encoder.txt @@ -67,7 +67,12 @@ data with it. struct rotary_encoder_platform_data is declared in include/linux/rotary-encoder.h and needs to be filled with the number of steps the encoder has and can carry information about externally inverted -signals (because of used invertig buffer or other reasons). +signals (because of an inverting buffer or other reasons). The encoder +can be set up to deliver input information as either an absolute or relative +axes. For relative axes the input event returns +/-1 for each step. For +absolute axes the position of the encoder can either roll over between zero +and the number of steps or will clamp at the maximum and zero depending on +the configuration. Because GPIO to IRQ mapping is platform specific, this information must be given in seperately to the driver. See the example below. @@ -85,6 +90,8 @@ be given in seperately to the driver. See the example below. static struct rotary_encoder_platform_data my_rotary_encoder_info = { .steps = 24, .axis = ABS_X, + .relative_axis = false, + .rollover = false, .gpio_a = GPIO_ROTARY_A, .gpio_b = GPIO_ROTARY_B, .inverted_a = 0, diff --git a/drivers/input/misc/rotary_encoder.c b/drivers/input/misc/rotary_encoder.c index 5bb3ab51b8c..c806fbf1e17 100644 --- a/drivers/input/misc/rotary_encoder.c +++ b/drivers/input/misc/rotary_encoder.c @@ -26,13 +26,17 @@ #define DRV_NAME "rotary-encoder" struct rotary_encoder { - unsigned int irq_a; - unsigned int irq_b; - unsigned int pos; - unsigned int armed; - unsigned int dir; struct input_dev *input; struct rotary_encoder_platform_data *pdata; + + unsigned int axis; + unsigned int pos; + + unsigned int irq_a; + unsigned int irq_b; + + bool armed; + unsigned char dir; /* 0 - clockwise, 1 - CCW */ }; static irqreturn_t rotary_encoder_irq(int irq, void *dev_id) @@ -53,21 +57,32 @@ static irqreturn_t rotary_encoder_irq(int irq, void *dev_id) if (!encoder->armed) break; - if (encoder->dir) { - /* turning counter-clockwise */ - encoder->pos += pdata->steps; - encoder->pos--; - encoder->pos %= pdata->steps; + if (pdata->relative_axis) { + input_report_rel(encoder->input, pdata->axis, + encoder->dir ? -1 : 1); } else { - /* turning clockwise */ - encoder->pos++; - encoder->pos %= pdata->steps; + unsigned int pos = encoder->pos; + + if (encoder->dir) { + /* turning counter-clockwise */ + if (pdata->rollover) + pos += pdata->steps; + if (pos) + pos--; + } else { + /* turning clockwise */ + if (pdata->rollover || pos < pdata->steps) + pos++; + } + if (pdata->rollover) + pos %= pdata->steps; + encoder->pos = pos; + input_report_abs(encoder->input, pdata->axis, + encoder->pos); } - - input_report_abs(encoder->input, pdata->axis, encoder->pos); input_sync(encoder->input); - encoder->armed = 0; + encoder->armed = false; break; case 0x1: @@ -77,7 +92,7 @@ static irqreturn_t rotary_encoder_irq(int irq, void *dev_id) break; case 0x3: - encoder->armed = 1; + encoder->armed = true; break; } @@ -113,9 +128,15 @@ static int __devinit rotary_encoder_probe(struct platform_device *pdev) input->name = pdev->name; input->id.bustype = BUS_HOST; input->dev.parent = &pdev->dev; - input->evbit[0] = BIT_MASK(EV_ABS); - input_set_abs_params(encoder->input, - pdata->axis, 0, pdata->steps, 0, 1); + + if (pdata->relative_axis) { + input->evbit[0] = BIT_MASK(EV_REL); + input->relbit[0] = BIT_MASK(pdata->axis); + } else { + input->evbit[0] = BIT_MASK(EV_ABS); + input_set_abs_params(encoder->input, + pdata->axis, 0, pdata->steps, 0, 1); + } err = input_register_device(input); if (err) { diff --git a/include/linux/rotary_encoder.h b/include/linux/rotary_encoder.h index 12d63a30c34..215278b8df2 100644 --- a/include/linux/rotary_encoder.h +++ b/include/linux/rotary_encoder.h @@ -8,6 +8,8 @@ struct rotary_encoder_platform_data { unsigned int gpio_b; unsigned int inverted_a; unsigned int inverted_b; + bool relative_axis; + bool rollover; }; #endif /* __ROTARY_ENCODER_H__ */ -- cgit v1.2.3-70-g09d2 From 8e668b5b3455207e4540fc7ccab9ecf70142f288 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 17 Apr 2009 17:17:55 -0400 Subject: tracing: remove format attribute of inline function Due to a cut and paste error, I added the gcc attribute for printf format to the static inline stub of trace_seq_printf. This will cause a compile failure. [ Impact: fix compiler error when CONFIG_TRACING is off ] Reported-by: Ingo Molnar Signed-off-by: Steven Rostedt Cc: Andrew Morton Cc: =?ISO-8859-15?Q?Fr=E9d=E9ric_Weisbecker?= LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/trace_seq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index 15ca2c71af1..37db9bdfbc1 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -42,7 +42,6 @@ extern int trace_seq_path(struct trace_seq *s, struct path *path); #else /* CONFIG_TRACING */ static inline int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))) { return 0; } -- cgit v1.2.3-70-g09d2 From 46a802e852c2106d755f697819ea80cd3ffdc222 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 07:00:41 +0900 Subject: ide kill unused ide_cmd->special Impact: removal of unused field No one uses ide_cmd->special anymore. Kill it. Signed-off-by: Tejun Heo --- include/linux/ide.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/ide.h b/include/linux/ide.h index ff65fffb078..846a1e13240 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -324,7 +324,6 @@ struct ide_cmd { unsigned int cursg_ofs; struct request *rq; /* copy of request */ - void *special; /* valid_t generally */ }; /* ATAPI packet command flags */ -- cgit v1.2.3-70-g09d2 From a1df5169f9bf08f6067029bfb840a05e282b1b97 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 19 Apr 2009 07:00:42 +0900 Subject: ide: add helpers for preparing sense requests This is in preparation of removing the queueing of a sense request out of the IRQ handler path. Use struct request_sense as a general sense buffer for all ATAPI devices ide-{floppy,tape,cd}. tj: * blk_get_request(__GFP_WAIT) can't be called from do_request() as it can cause deadlock. Converted to use inline struct request and blk_rq_init(). * Added xfer / cdb len selection depending on device type. * All sense prep logics folded into ide_prep_sense() which never fails. * hwif->rq clearing and sense_rq used handling moved into ide_queue_sense_rq(). * blk_rq_map_kern() conversion is moved to later patch. CC: Bartlomiej Zolnierkiewicz CC: FUJITA Tomonori Signed-off-by: Borislav Petkov Signed-off-by: Tejun Heo --- drivers/ide/ide-atapi.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/ide.h | 11 +++++++++ 2 files changed, 72 insertions(+) (limited to 'include') diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index 2894577237b..c6e03485a63 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -191,6 +191,67 @@ void ide_create_request_sense_cmd(ide_drive_t *drive, struct ide_atapi_pc *pc) } EXPORT_SYMBOL_GPL(ide_create_request_sense_cmd); +void ide_prep_sense(ide_drive_t *drive, struct request *rq) +{ + struct request_sense *sense = &drive->sense_data; + struct request *sense_rq = &drive->sense_rq; + unsigned int cmd_len, sense_len; + + debug_log("%s: enter\n", __func__); + + switch (drive->media) { + case ide_floppy: + cmd_len = 255; + sense_len = 18; + break; + case ide_tape: + cmd_len = 20; + sense_len = 20; + break; + default: + cmd_len = 18; + sense_len = 18; + } + + BUG_ON(sense_len > sizeof(*sense)); + + if (blk_sense_request(rq) || drive->sense_rq_armed) + return; + + memset(sense, 0, sizeof(*sense)); + + blk_rq_init(rq->q, sense_rq); + sense_rq->rq_disk = rq->rq_disk; + + sense_rq->data = sense; + sense_rq->cmd[0] = GPCMD_REQUEST_SENSE; + sense_rq->cmd[4] = cmd_len; + sense_rq->data_len = sense_len; + + sense_rq->cmd_type = REQ_TYPE_SENSE; + sense_rq->cmd_flags |= REQ_PREEMPT; + + if (drive->media == ide_tape) + sense_rq->cmd[13] = REQ_IDETAPE_PC1; + + drive->sense_rq_armed = true; +} +EXPORT_SYMBOL_GPL(ide_prep_sense); + +void ide_queue_sense_rq(ide_drive_t *drive, void *special) +{ + BUG_ON(!drive->sense_rq_armed); + + drive->sense_rq.special = special; + drive->sense_rq_armed = false; + + drive->hwif->rq = NULL; + + elv_add_request(drive->queue, &drive->sense_rq, + ELEVATOR_INSERT_FRONT, 0); +} +EXPORT_SYMBOL_GPL(ide_queue_sense_rq); + /* * Called when an error was detected during the last packet command. * We queue a request sense packet command in the head of the request list. diff --git a/include/linux/ide.h b/include/linux/ide.h index 846a1e13240..a69ccac5641 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -26,6 +26,9 @@ #include #include +/* for request_sense */ +#include + #if defined(CONFIG_CRIS) || defined(CONFIG_FRV) || defined(CONFIG_MN10300) # define SUPPORT_VLB_SYNC 0 #else @@ -602,6 +605,11 @@ struct ide_drive_s { struct ide_atapi_pc request_sense_pc; struct request request_sense_rq; + + /* current sense rq and buffer */ + bool sense_rq_armed; + struct request sense_rq; + struct request_sense sense_data; }; typedef struct ide_drive_s ide_drive_t; @@ -1175,6 +1183,9 @@ int ide_set_media_lock(ide_drive_t *, struct gendisk *, int); void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *); void ide_retry_pc(ide_drive_t *, struct gendisk *); +void ide_prep_sense(ide_drive_t *drive, struct request *rq); +void ide_queue_sense_rq(ide_drive_t *drive, void *special); + int ide_cd_expiry(ide_drive_t *); int ide_cd_get_xferlen(struct request *); -- cgit v1.2.3-70-g09d2 From 6b544fcc8cd0a04eb42de9d1ecdd345e979d6ada Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 19 Apr 2009 07:00:42 +0900 Subject: ide-atapi: convert ide-{floppy,tape} to using preallocated sense buffer Since we're issuing REQ_TYPE_SENSE now we need to allow those types of rqs in the ->do_request callbacks. As a future improvement, sense_len assignment might be unified across all ATAPI devices. Borislav to check with specs and test. As a result, get rid of ide_queue_pc_head() and drive->request_sense_rq. tj: * Init request sense ide_atapi_pc from sense request. In the longer timer, it would probably better to fold ide_create_request_sense_cmd() into its only current user - ide_floppy_get_format_progress(). * ide_retry_pc() no longer takes @disk. CC: Bartlomiej Zolnierkiewicz CC: FUJITA Tomonori Signed-off-by: Borislav Petkov Signed-off-by: Tejun Heo --- drivers/ide/ide-atapi.c | 48 ++++++++++++++---------------------------------- drivers/ide/ide-floppy.c | 4 +++- drivers/ide/ide-tape.c | 7 +++++-- include/linux/ide.h | 3 +-- 4 files changed, 23 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index c6e03485a63..972c522516f 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -79,34 +79,6 @@ void ide_init_pc(struct ide_atapi_pc *pc) } EXPORT_SYMBOL_GPL(ide_init_pc); -/* - * Generate a new packet command request in front of the request queue, before - * the current request, so that it will be processed immediately, on the next - * pass through the driver. - */ -static void ide_queue_pc_head(ide_drive_t *drive, struct gendisk *disk, - struct ide_atapi_pc *pc, struct request *rq) -{ - blk_rq_init(NULL, rq); - rq->cmd_type = REQ_TYPE_SPECIAL; - rq->cmd_flags |= REQ_PREEMPT; - rq->special = (char *)pc; - rq->rq_disk = disk; - - if (pc->req_xfer) { - rq->data = pc->buf; - rq->data_len = pc->req_xfer; - } - - memcpy(rq->cmd, pc->c, 12); - if (drive->media == ide_tape) - rq->cmd[13] = REQ_IDETAPE_PC1; - - drive->hwif->rq = NULL; - - elv_add_request(drive->queue, rq, ELEVATOR_INSERT_FRONT, 0); -} - /* * Add a special packet command request to the tail of the request queue, * and wait for it to be serviced. @@ -254,18 +226,26 @@ EXPORT_SYMBOL_GPL(ide_queue_sense_rq); /* * Called when an error was detected during the last packet command. - * We queue a request sense packet command in the head of the request list. + * We queue a request sense packet command at the head of the request + * queue. */ -void ide_retry_pc(ide_drive_t *drive, struct gendisk *disk) +void ide_retry_pc(ide_drive_t *drive) { - struct request *rq = &drive->request_sense_rq; + struct request *sense_rq = &drive->sense_rq; struct ide_atapi_pc *pc = &drive->request_sense_pc; (void)ide_read_error(drive); - ide_create_request_sense_cmd(drive, pc); + + /* init pc from sense_rq */ + ide_init_pc(pc); + memcpy(pc->c, sense_rq->cmd, 12); + pc->buf = sense_rq->data; + pc->req_xfer = sense_rq->data_len; + if (drive->media == ide_tape) set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags); - ide_queue_pc_head(drive, disk, pc, rq); + + ide_queue_sense_rq(drive, pc); } EXPORT_SYMBOL_GPL(ide_retry_pc); @@ -404,7 +384,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) debug_log("[cmd %x]: check condition\n", rq->cmd[0]); /* Retry operation */ - ide_retry_pc(drive, rq->rq_disk); + ide_retry_pc(drive); /* queued, but not started */ return ide_stopped; diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index 94600331a27..d3302cc891e 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -263,7 +263,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, } pc = &floppy->queued_pc; idefloppy_create_rw_cmd(drive, pc, rq, (unsigned long)block); - } else if (blk_special_request(rq)) { + } else if (blk_special_request(rq) || blk_sense_request(rq)) { pc = (struct ide_atapi_pc *)rq->special; } else if (blk_pc_request(rq)) { pc = &floppy->queued_pc; @@ -273,6 +273,8 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, goto out_end; } + ide_prep_sense(drive, rq); + memset(&cmd, 0, sizeof(cmd)); if (rq_data_dir(rq)) diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index aadf53cfac6..8324dfa78a3 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -695,7 +695,7 @@ static ide_startstop_t idetape_media_access_finished(ide_drive_t *drive) printk(KERN_ERR "ide-tape: %s: I/O error, ", tape->name); /* Retry operation */ - ide_retry_pc(drive, tape->disk); + ide_retry_pc(drive); return ide_stopped; } pc->error = 0; @@ -752,7 +752,7 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, (unsigned long long)rq->sector, rq->nr_sectors, rq->current_nr_sectors); - if (!blk_special_request(rq)) { + if (!(blk_special_request(rq) || blk_sense_request(rq))) { /* We do not support buffer cache originated requests. */ printk(KERN_NOTICE "ide-tape: %s: Unsupported request in " "request queue (%d)\n", drive->name, rq->cmd_type); @@ -840,6 +840,9 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, BUG(); out: + /* prepare sense request for this command */ + ide_prep_sense(drive, rq); + memset(&cmd, 0, sizeof(cmd)); if (rq_data_dir(rq)) diff --git a/include/linux/ide.h b/include/linux/ide.h index a69ccac5641..9e67ccac3c1 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -604,7 +604,6 @@ struct ide_drive_s { unsigned long atapi_flags; struct ide_atapi_pc request_sense_pc; - struct request request_sense_rq; /* current sense rq and buffer */ bool sense_rq_armed; @@ -1181,7 +1180,7 @@ int ide_do_test_unit_ready(ide_drive_t *, struct gendisk *); int ide_do_start_stop(ide_drive_t *, struct gendisk *, int); int ide_set_media_lock(ide_drive_t *, struct gendisk *, int); void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *); -void ide_retry_pc(ide_drive_t *, struct gendisk *); +void ide_retry_pc(ide_drive_t *drive); void ide_prep_sense(ide_drive_t *drive, struct request *rq); void ide_queue_sense_rq(ide_drive_t *drive, void *special); -- cgit v1.2.3-70-g09d2 From 5c4be57249e2e09136446597d2fe2a967c6ffef0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 07:00:42 +0900 Subject: ide-cd,atapi: use bio for internal commands Impact: unify request data buffer handling rq->data is used mostly to pass kernel buffer through request queue without using bio. There are only a couple of places which still do this in kernel and converting to bio isn't difficult. This patch converts ide-cd and atapi to use bio instead of rq->data for request sense and internal pc commands. With previous change to unify sense request handling, this is relatively easily achieved by adding blk_rq_map_kern() during sense_rq prep and PC issue. If blk_rq_map_kern() fails for sense, the error is deferred till sense issue and aborts the failed command which triggered the sense. Note that this is a slim possibility as sense prep is done on each command issue, so for the above condition to actually trigger, all preps since the last sense issue till the issue of the request which would require a sense should fail. * do_request functions might sleep now. This should be okay as ide request_fn - do_ide_request() - is invoked only from make_request and plug work. Make sure this is the case by adding might_sleep() to do_ide_request(). * Functions which access the read sense data before the sense request is complete now should access bio_data(sense_rq->bio) as the sense buffer might have been copied during blk_rq_map_kern(). * ide-tape updated to map sg. * cdrom_do_block_pc() now doesn't have to deal with REQ_TYPE_ATA_PC special case. Simplified. * tp_ops->output/input_data path dropped from ide_pc_intr(). Signed-off-by: Tejun Heo --- drivers/ide/ide-atapi.c | 52 +++++++++++++++++++++++++++++-------------------- drivers/ide/ide-cd.c | 28 +++++++++++++------------- drivers/ide/ide-io.c | 3 +++ drivers/ide/ide-tape.c | 3 +++ include/linux/ide.h | 2 +- 5 files changed, 52 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index 972c522516f..5cefe12f562 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -94,16 +94,18 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk, rq->special = (char *)pc; if (pc->req_xfer) { - rq->data = pc->buf; - rq->data_len = pc->req_xfer; + error = blk_rq_map_kern(drive->queue, rq, pc->buf, pc->req_xfer, + GFP_NOIO); + if (error) + goto put_req; } memcpy(rq->cmd, pc->c, 12); if (drive->media == ide_tape) rq->cmd[13] = REQ_IDETAPE_PC1; error = blk_execute_rq(drive->queue, disk, rq, 0); +put_req: blk_put_request(rq); - return error; } EXPORT_SYMBOL_GPL(ide_queue_pc_tail); @@ -168,6 +170,7 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq) struct request_sense *sense = &drive->sense_data; struct request *sense_rq = &drive->sense_rq; unsigned int cmd_len, sense_len; + int err; debug_log("%s: enter\n", __func__); @@ -193,13 +196,19 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq) memset(sense, 0, sizeof(*sense)); blk_rq_init(rq->q, sense_rq); - sense_rq->rq_disk = rq->rq_disk; - sense_rq->data = sense; + err = blk_rq_map_kern(drive->queue, sense_rq, sense, sense_len, + GFP_NOIO); + if (unlikely(err)) { + if (printk_ratelimit()) + printk(KERN_WARNING "%s: failed to map sense buffer\n", + drive->name); + return; + } + + sense_rq->rq_disk = rq->rq_disk; sense_rq->cmd[0] = GPCMD_REQUEST_SENSE; sense_rq->cmd[4] = cmd_len; - sense_rq->data_len = sense_len; - sense_rq->cmd_type = REQ_TYPE_SENSE; sense_rq->cmd_flags |= REQ_PREEMPT; @@ -210,9 +219,14 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq) } EXPORT_SYMBOL_GPL(ide_prep_sense); -void ide_queue_sense_rq(ide_drive_t *drive, void *special) +int ide_queue_sense_rq(ide_drive_t *drive, void *special) { - BUG_ON(!drive->sense_rq_armed); + /* deferred failure from ide_prep_sense() */ + if (!drive->sense_rq_armed) { + printk(KERN_WARNING "%s: failed queue sense request\n", + drive->name); + return -ENOMEM; + } drive->sense_rq.special = special; drive->sense_rq_armed = false; @@ -221,6 +235,7 @@ void ide_queue_sense_rq(ide_drive_t *drive, void *special) elv_add_request(drive->queue, &drive->sense_rq, ELEVATOR_INSERT_FRONT, 0); + return 0; } EXPORT_SYMBOL_GPL(ide_queue_sense_rq); @@ -239,13 +254,14 @@ void ide_retry_pc(ide_drive_t *drive) /* init pc from sense_rq */ ide_init_pc(pc); memcpy(pc->c, sense_rq->cmd, 12); - pc->buf = sense_rq->data; + pc->buf = bio_data(sense_rq->bio); /* pointer to mapped address */ pc->req_xfer = sense_rq->data_len; if (drive->media == ide_tape) set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags); - ide_queue_sense_rq(drive, pc); + if (ide_queue_sense_rq(drive, pc)) + ide_complete_rq(drive, -EIO, blk_rq_bytes(drive->hwif->rq)); } EXPORT_SYMBOL_GPL(ide_retry_pc); @@ -317,7 +333,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) struct ide_cmd *cmd = &hwif->cmd; struct request *rq = hwif->rq; const struct ide_tp_ops *tp_ops = hwif->tp_ops; - xfer_func_t *xferfunc; unsigned int timeout, done; u16 bcount; u8 stat, ireason, dsc = 0; @@ -411,7 +426,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) rq->errors = -EIO; } - if (drive->media == ide_tape) + if (drive->media == ide_tape && !rq->bio) done = ide_rq_bytes(rq); /* FIXME */ else done = blk_rq_bytes(rq); @@ -448,16 +463,11 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) return ide_do_reset(drive); } - xferfunc = write ? tp_ops->output_data : tp_ops->input_data; - - if (drive->media == ide_floppy && pc->buf == NULL) { + if (drive->media == ide_tape && pc->bh) + done = drive->pc_io_buffers(drive, pc, bcount, write); + else { done = min_t(unsigned int, bcount, cmd->nleft); ide_pio_bytes(drive, cmd, write, done); - } else if (drive->media == ide_tape && pc->bh) { - done = drive->pc_io_buffers(drive, pc, bcount, write); - } else { - done = min_t(unsigned int, bcount, pc->req_xfer - pc->xferred); - xferfunc(drive, NULL, pc->cur_pos, done); } /* Update the current position */ diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 7b21c7eac5b..392a5bdf2fd 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -210,10 +210,12 @@ static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq) { /* * For REQ_TYPE_SENSE, "rq->special" points to the original - * failed request + * failed request. Also, the sense data should be read + * directly from rq which might be different from the original + * sense buffer if it got copied during mapping. */ struct request *failed = (struct request *)rq->special; - struct request_sense *sense = &drive->sense_data; + void *sense = bio_data(rq->bio); if (failed) { if (failed->sense) { @@ -398,7 +400,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) /* if we got a CHECK_CONDITION status, queue a request sense command */ if (stat & ATA_ERR) - ide_queue_sense_rq(drive, NULL); + return ide_queue_sense_rq(drive, NULL) ? 2 : 1; return 1; end_request: @@ -412,8 +414,7 @@ end_request: hwif->rq = NULL; - ide_queue_sense_rq(drive, rq); - return 1; + return ide_queue_sense_rq(drive, rq) ? 2 : 1; } else return 2; } @@ -507,8 +508,12 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd, rq->cmd_flags |= cmd_flags; rq->timeout = timeout; if (buffer) { - rq->data = buffer; - rq->data_len = *bufflen; + error = blk_rq_map_kern(drive->queue, rq, buffer, + *bufflen, GFP_NOIO); + if (error) { + blk_put_request(rq); + return error; + } } error = blk_execute_rq(drive->queue, info->disk, rq, 0); @@ -802,15 +807,10 @@ static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq) drive->dma = 0; /* sg request */ - if (rq->bio || ((rq->cmd_type == REQ_TYPE_ATA_PC) && rq->data_len)) { + if (rq->bio) { struct request_queue *q = drive->queue; + char *buf = bio_data(rq->bio); unsigned int alignment; - char *buf; - - if (rq->bio) - buf = bio_data(rq->bio); - else - buf = rq->data; drive->dma = !!(drive->dev_flags & IDE_DFLAG_USING_DMA); diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 9b9e8b1aae5..3245c2dbda3 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -481,6 +481,9 @@ void do_ide_request(struct request_queue *q) spin_unlock_irq(q->queue_lock); + /* HLD do_request() callback might sleep, make sure it's okay */ + might_sleep(); + if (ide_lock_host(host, hwif)) goto plug_device_2; diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 8324dfa78a3..9b762a2d5d9 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -850,6 +850,9 @@ out: cmd.rq = rq; + ide_init_sg_cmd(&cmd, pc->req_xfer); + ide_map_sg(drive, &cmd); + return ide_tape_issue_pc(drive, &cmd, pc); } diff --git a/include/linux/ide.h b/include/linux/ide.h index 9e67ccac3c1..1957461ac76 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1183,7 +1183,7 @@ void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *); void ide_retry_pc(ide_drive_t *drive); void ide_prep_sense(ide_drive_t *drive, struct request *rq); -void ide_queue_sense_rq(ide_drive_t *drive, void *special); +int ide_queue_sense_rq(ide_drive_t *drive, void *special); int ide_cd_expiry(ide_drive_t *); -- cgit v1.2.3-70-g09d2 From 6d7003877c2f0578f1c08f66d05c3f72ef4ae596 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 08:46:03 +0900 Subject: ide-atapi: kill unused fields and callbacks Impact: remove fields and code paths which are no longer necessary Now that ide-tape uses standard mechanisms to transfer data, special case handling for bh handling can be dropped from ide-atapi. Drop the followings. * pc->cur_pos, b_count, bh and b_data * drive->pc_update_buffers() and pc_io_buffers(). Signed-off-by: Tejun Heo --- drivers/ide/ide-atapi.c | 17 ++++------------- drivers/ide/ide-tape.c | 1 - include/linux/ide.h | 12 ------------ 3 files changed, 4 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index b9dd4503cbc..afe5a432387 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -359,11 +359,8 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) drive->name, rq_data_dir(pc->rq) ? "write" : "read"); pc->flags |= PC_FLAG_DMA_ERROR; - } else { + } else pc->xferred = pc->req_xfer; - if (drive->pc_update_buffers) - drive->pc_update_buffers(drive, pc); - } debug_log("%s: DMA finished\n", drive->name); } @@ -463,16 +460,11 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) return ide_do_reset(drive); } - if (drive->media == ide_tape && pc->bh) - done = drive->pc_io_buffers(drive, pc, bcount, write); - else { - done = min_t(unsigned int, bcount, cmd->nleft); - ide_pio_bytes(drive, cmd, write, done); - } + done = min_t(unsigned int, bcount, cmd->nleft); + ide_pio_bytes(drive, cmd, write, done); - /* Update the current position */ + /* Update transferred byte count */ pc->xferred += done; - pc->cur_pos += done; bcount -= done; @@ -650,7 +642,6 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, struct ide_cmd *cmd) /* We haven't transferred any data yet */ pc->xferred = 0; - pc->cur_pos = pc->buf; valid_tf = IDE_VALID_DEVICE; bcount = ((drive->media == ide_tape) ? diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 2599579e417..8dfc68892d6 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -591,7 +591,6 @@ static void ide_tape_create_rw_cmd(idetape_tape_t *tape, ide_init_pc(pc); put_unaligned(cpu_to_be32(length), (unsigned int *) &pc->c[1]); pc->c[1] = 1; - pc->bh = NULL; pc->buf = NULL; pc->buf_size = length * tape->blk_size; pc->req_xfer = pc->buf_size; diff --git a/include/linux/ide.h b/include/linux/ide.h index 1957461ac76..34c128f0a33 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -362,11 +362,7 @@ struct ide_atapi_pc { /* data buffer */ u8 *buf; - /* current buffer position */ - u8 *cur_pos; int buf_size; - /* missing/available data on the current buffer */ - int b_count; /* the corresponding request */ struct request *rq; @@ -379,10 +375,6 @@ struct ide_atapi_pc { */ u8 pc_buf[IDE_PC_BUFFER_SIZE]; - /* idetape only */ - struct idetape_bh *bh; - char *b_data; - unsigned long timeout; }; @@ -595,10 +587,6 @@ struct ide_drive_s { /* callback for packet commands */ int (*pc_callback)(struct ide_drive_s *, int); - void (*pc_update_buffers)(struct ide_drive_s *, struct ide_atapi_pc *); - int (*pc_io_buffers)(struct ide_drive_s *, struct ide_atapi_pc *, - unsigned int, int); - ide_startstop_t (*irq_handler)(struct ide_drive_s *); unsigned long atapi_flags; -- cgit v1.2.3-70-g09d2 From 937582382c71b75b29fbb92615629494e1a05ac0 Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Fri, 17 Apr 2009 16:42:14 +0800 Subject: x86, intr-remap: enable interrupt remapping early Currently, when x2apic is not enabled, interrupt remapping will be enabled in init_dmars(), where it is too late to remap ioapic interrupts, that is, ioapic interrupts are really in compatibility mode, not remappable mode. This patch always enables interrupt remapping before ioapic setup, it guarantees all interrupts will be remapped when interrupt remapping is enabled. Thus it doesn't need to set the compatibility interrupt bit. [ Impact: refactor intr-remap init sequence, enable fuller remap mode ] Signed-off-by: Suresh Siddha Signed-off-by: Weidong Han Acked-by: David Woodhouse Cc: iommu@lists.linux-foundation.org Cc: allen.m.kay@intel.com Cc: fenghua.yu@intel.com LKML-Reference: <1239957736-6161-4-git-send-email-weidong.han@intel.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 7 ++-- arch/x86/kernel/apic/apic.c | 76 +++++++++++++++++++++----------------------- drivers/pci/intel-iommu.c | 9 ------ drivers/pci/intr_remapping.c | 28 ++++++++-------- include/linux/dmar.h | 1 + 5 files changed, 54 insertions(+), 67 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index d4cb7e590c0..fbdd65446c7 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -169,7 +169,6 @@ static inline u64 native_x2apic_icr_read(void) extern int x2apic, x2apic_phys; extern void check_x2apic(void); extern void enable_x2apic(void); -extern void enable_IR_x2apic(void); extern void x2apic_icr_write(u32 low, u32 id); static inline int x2apic_enabled(void) { @@ -190,18 +189,18 @@ static inline void check_x2apic(void) static inline void enable_x2apic(void) { } -static inline void enable_IR_x2apic(void) -{ -} static inline int x2apic_enabled(void) { return 0; } #define x2apic 0 +#define x2apic_preenabled 0 #endif +extern void enable_IR_x2apic(void); + extern int get_physical_broadcast(void); extern void apic_disable(void); diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 83e47febcc8..0cf1eea750c 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -141,6 +141,8 @@ static int x2apic_preenabled; static int disable_x2apic; static __init int setup_nox2apic(char *str) { + if (x2apic_enabled()) + panic("Bios already enabled x2apic, can't enforce nox2apic"); disable_x2apic = 1; setup_clear_cpu_cap(X86_FEATURE_X2APIC); return 0; @@ -1345,6 +1347,7 @@ void enable_x2apic(void) wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); } } +#endif /* CONFIG_X86_X2APIC */ void __init enable_IR_x2apic(void) { @@ -1353,32 +1356,21 @@ void __init enable_IR_x2apic(void) unsigned long flags; struct IO_APIC_route_entry **ioapic_entries = NULL; - if (!cpu_has_x2apic) - return; - - if (!x2apic_preenabled && disable_x2apic) { - pr_info("Skipped enabling x2apic and Interrupt-remapping " - "because of nox2apic\n"); - return; + ret = dmar_table_init(); + if (ret) { + pr_debug("dmar_table_init() failed with %d:\n", ret); + goto ir_failed; } - if (x2apic_preenabled && disable_x2apic) - panic("Bios already enabled x2apic, can't enforce nox2apic"); - - if (!x2apic_preenabled && skip_ioapic_setup) { - pr_info("Skipped enabling x2apic and Interrupt-remapping " - "because of skipping io-apic setup\n"); - return; + if (!intr_remapping_supported()) { + pr_debug("intr-remapping not supported\n"); + goto ir_failed; } - ret = dmar_table_init(); - if (ret) { - pr_info("dmar_table_init() failed with %d:\n", ret); - if (x2apic_preenabled) - panic("x2apic enabled by bios. But IR enabling failed"); - else - pr_info("Not enabling x2apic,Intr-remapping\n"); + if (!x2apic_preenabled && skip_ioapic_setup) { + pr_info("Skipped enabling intr-remap because of skipping " + "io-apic setup\n"); return; } @@ -1398,20 +1390,25 @@ void __init enable_IR_x2apic(void) mask_IO_APIC_setup(ioapic_entries); mask_8259A(); - ret = enable_intr_remapping(EIM_32BIT_APIC_ID); - - if (ret && x2apic_preenabled) { - local_irq_restore(flags); - panic("x2apic enabled by bios. But IR enabling failed"); - } +#ifdef CONFIG_X86_X2APIC + if (cpu_has_x2apic) + ret = enable_intr_remapping(EIM_32BIT_APIC_ID); + else +#endif + ret = enable_intr_remapping(EIM_8BIT_APIC_ID); if (ret) goto end_restore; - if (!x2apic) { + pr_info("Enabled Interrupt-remapping\n"); + +#ifdef CONFIG_X86_X2APIC + if (cpu_has_x2apic && !x2apic) { x2apic = 1; enable_x2apic(); + pr_info("Enabled x2apic\n"); } +#endif end_restore: if (ret) @@ -1426,30 +1423,29 @@ end_restore: local_irq_restore(flags); end: - if (!ret) { - if (!x2apic_preenabled) - pr_info("Enabled x2apic and interrupt-remapping\n"); - else - pr_info("Enabled Interrupt-remapping\n"); - } else - pr_err("Failed to enable Interrupt-remapping and x2apic\n"); if (ioapic_entries) free_ioapic_entries(ioapic_entries); + + if (!ret) + return; + +ir_failed: + if (x2apic_preenabled) + panic("x2apic enabled by bios. But IR enabling failed"); + else if (cpu_has_x2apic) + pr_info("Not enabling x2apic,Intr-remapping\n"); #else if (!cpu_has_x2apic) return; if (x2apic_preenabled) panic("x2apic enabled prior OS handover," - " enable CONFIG_INTR_REMAP"); - - pr_info("Enable CONFIG_INTR_REMAP for enabling intr-remapping " - " and x2apic\n"); + " enable CONFIG_X86_X2APIC, CONFIG_INTR_REMAP"); #endif return; } -#endif /* CONFIG_X86_X2APIC */ + #ifdef CONFIG_X86_64 /* diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 001b328adf8..9ce8f0764be 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1968,15 +1968,6 @@ static int __init init_dmars(void) } } -#ifdef CONFIG_INTR_REMAP - if (!intr_remapping_enabled) { - ret = enable_intr_remapping(0); - if (ret) - printk(KERN_ERR - "IOMMU: enable interrupt remapping failed\n"); - } -#endif - /* * For each rmrr * for each dev attached to rmrr diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index f5e0ea724a6..5c2142656e9 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -423,20 +423,6 @@ static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode) readl, (sts & DMA_GSTS_IRTPS), sts); spin_unlock_irqrestore(&iommu->register_lock, flags); - if (mode == 0) { - spin_lock_irqsave(&iommu->register_lock, flags); - - /* enable comaptiblity format interrupt pass through */ - cmd = iommu->gcmd | DMA_GCMD_CFI; - iommu->gcmd |= DMA_GCMD_CFI; - writel(cmd, iommu->reg + DMAR_GCMD_REG); - - IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, - readl, (sts & DMA_GSTS_CFIS), sts); - - spin_unlock_irqrestore(&iommu->register_lock, flags); - } - /* * global invalidation of interrupt entry cache before enabling * interrupt-remapping. @@ -516,6 +502,20 @@ end: spin_unlock_irqrestore(&iommu->register_lock, flags); } +int __init intr_remapping_supported(void) +{ + struct dmar_drhd_unit *drhd; + + for_each_drhd_unit(drhd) { + struct intel_iommu *iommu = drhd->iommu; + + if (!ecap_ir_support(iommu->ecap)) + return 0; + } + + return 1; +} + int __init enable_intr_remapping(int eim) { struct dmar_drhd_unit *drhd; diff --git a/include/linux/dmar.h b/include/linux/dmar.h index e397dc342cd..06f592a7f73 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -108,6 +108,7 @@ struct irte { }; #ifdef CONFIG_INTR_REMAP extern int intr_remapping_enabled; +extern int intr_remapping_supported(void); extern int enable_intr_remapping(int); extern void disable_intr_remapping(void); extern int reenable_intr_remapping(int); -- cgit v1.2.3-70-g09d2 From a0f82f64e26929776c58a5c93c2ecb38e3d82815 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 19 Apr 2009 09:43:48 +0000 Subject: syncookies: remove last_synq_overflow from struct tcp_sock last_synq_overflow eats 4 or 8 bytes in struct tcp_sock, even though it is only used when a listening sockets syn queue is full. We can (ab)use rx_opt.ts_recent_stamp to store the same information; it is not used otherwise as long as a socket is in listen state. Move linger2 around to avoid splitting struct mtu_probe across cacheline boundary on 32 bit arches. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/linux/tcp.h | 4 +--- include/net/tcp.h | 13 +++++++++++++ net/ipv4/syncookies.c | 5 ++--- net/ipv6/syncookies.c | 4 ++-- 4 files changed, 18 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 9d5078bd23a..8afac76cd74 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -377,7 +377,7 @@ struct tcp_sock { unsigned int keepalive_time; /* time before keep alive takes place */ unsigned int keepalive_intvl; /* time interval between keep alive probes */ - unsigned long last_synq_overflow; + int linger2; /* Receiver side RTT estimation */ struct { @@ -406,8 +406,6 @@ struct tcp_sock { /* TCP MD5 Signagure Option information */ struct tcp_md5sig_info *md5sig_info; #endif - - int linger2; }; static inline struct tcp_sock *tcp_sk(const struct sock *sk) diff --git a/include/net/tcp.h b/include/net/tcp.h index 1b94b9bfe2d..b55b4891029 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -265,6 +265,19 @@ static inline int tcp_too_many_orphans(struct sock *sk, int num) atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]); } +/* syncookies: remember time of last synqueue overflow */ +static inline void tcp_synq_overflow(struct sock *sk) +{ + tcp_sk(sk)->rx_opt.ts_recent_stamp = jiffies; +} + +/* syncookies: no recent synqueue overflow on this listening socket? */ +static inline int tcp_synq_no_recent_overflow(const struct sock *sk) +{ + unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; + return time_after(jiffies, last_overflow + TCP_TIMEOUT_INIT); +} + extern struct proto tcp_prot; #define TCP_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.tcp_statistics, field) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index b35a950d2e0..cd2b97f1b6e 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -161,13 +161,12 @@ static __u16 const msstab[] = { */ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) { - struct tcp_sock *tp = tcp_sk(sk); const struct iphdr *iph = ip_hdr(skb); const struct tcphdr *th = tcp_hdr(skb); int mssind; const __u16 mss = *mssp; - tp->last_synq_overflow = jiffies; + tcp_synq_overflow(sk); /* XXX sort msstab[] by probability? Binary search? */ for (mssind = 0; mss > msstab[mssind + 1]; mssind++) @@ -268,7 +267,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, if (!sysctl_tcp_syncookies || !th->ack) goto out; - if (time_after(jiffies, tp->last_synq_overflow + TCP_TIMEOUT_INIT) || + if (tcp_synq_no_recent_overflow(sk) || (mss = cookie_check(skb, cookie)) == 0) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); goto out; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 711175e0571..8c2513982b6 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -131,7 +131,7 @@ __u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) int mssind; const __u16 mss = *mssp; - tcp_sk(sk)->last_synq_overflow = jiffies; + tcp_synq_overflow(sk); for (mssind = 0; mss > msstab[mssind + 1]; mssind++) ; @@ -175,7 +175,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) if (!sysctl_tcp_syncookies || !th->ack) goto out; - if (time_after(jiffies, tp->last_synq_overflow + TCP_TIMEOUT_INIT) || + if (tcp_synq_no_recent_overflow(sk) || (mss = cookie_check(skb, cookie)) == 0) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); goto out; -- cgit v1.2.3-70-g09d2 From 23de29de2d8b227943be191d59fb6d983996d55e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 20 Apr 2009 12:59:29 -0400 Subject: tracing: remove dangling semicolon Due to a cut and paste error, the trace_seq_putc had a semicolon after the prototype but before the stub function when tracing is disabled. [Impact: fix compile error ] Signed-off-by: Steven Rostedt --- include/linux/trace_seq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index 37db9bdfbc1..ba9627f00d3 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -63,7 +63,7 @@ static inline int trace_seq_puts(struct trace_seq *s, const char *str) { return 0; } -static inline int trace_seq_putc(struct trace_seq *s, unsigned char c); +static inline int trace_seq_putc(struct trace_seq *s, unsigned char c) { return 0; } -- cgit v1.2.3-70-g09d2 From b75576d76d4be50196773f36709cb7a4f5ac2ab7 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 20 Apr 2009 17:56:13 +0100 Subject: ASoC: Make the DAPM power check an operation on the widget Rather than having switch statements at point of use make the DAPM power check a member of the widget structure and set it when we instantiate the widget. Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 2 ++ sound/soc/soc-dapm.c | 27 +++++++++++++-------------- 2 files changed, 15 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index fcc929da033..839a97b6326 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -367,6 +367,8 @@ struct snd_soc_dapm_widget { unsigned char suspend:1; /* was active before suspend */ unsigned char pmdown:1; /* waiting for timeout */ + int (*power_check)(struct snd_soc_dapm_widget *w); + /* external events */ unsigned short event_flags; /* flags to specify event types */ int (*event)(struct snd_soc_dapm_widget*, struct snd_kcontrol *, int); diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 22522e2d83a..d3d17354e76 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -631,20 +631,7 @@ static int dapm_power_widget(struct snd_soc_codec *codec, int event, { int power, ret; - /* Work out the new power state */ switch (w->id) { - case snd_soc_dapm_vmid: - /* No action required */ - return 0; - - case snd_soc_dapm_adc: - power = dapm_adc_check_power(w); - break; - - case snd_soc_dapm_dac: - power = dapm_dac_check_power(w); - break; - case snd_soc_dapm_pre: if (!w->event) return 0; @@ -680,10 +667,13 @@ static int dapm_power_widget(struct snd_soc_codec *codec, int event, return 0; default: - power = dapm_generic_check_power(w); break; } + if (!w->power_check) + return 0; + + power = w->power_check(w); if (w->power == power) return 0; w->power = power; @@ -1147,15 +1137,22 @@ int snd_soc_dapm_new_widgets(struct snd_soc_codec *codec) case snd_soc_dapm_switch: case snd_soc_dapm_mixer: case snd_soc_dapm_mixer_named_ctl: + w->power_check = dapm_generic_check_power; dapm_new_mixer(codec, w); break; case snd_soc_dapm_mux: case snd_soc_dapm_value_mux: + w->power_check = dapm_generic_check_power; dapm_new_mux(codec, w); break; case snd_soc_dapm_adc: + w->power_check = dapm_adc_check_power; + break; case snd_soc_dapm_dac: + w->power_check = dapm_dac_check_power; + break; case snd_soc_dapm_pga: + w->power_check = dapm_generic_check_power; dapm_new_pga(codec, w); break; case snd_soc_dapm_input: @@ -1165,6 +1162,8 @@ int snd_soc_dapm_new_widgets(struct snd_soc_codec *codec) case snd_soc_dapm_hp: case snd_soc_dapm_mic: case snd_soc_dapm_line: + w->power_check = dapm_generic_check_power; + break; case snd_soc_dapm_vmid: case snd_soc_dapm_pre: case snd_soc_dapm_post: -- cgit v1.2.3-70-g09d2 From f1f9b3b1795da8625e0e6096813c9d18d4a344ce Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 20 Apr 2009 20:38:21 +0200 Subject: perfcounters, sched: remove __task_delta_exec() This function was left orphan by the latest round of sw-counter cleanups. [ Impact: remove unused kernel function ] Signed-off-by: Ingo Molnar --- include/linux/kernel_stat.h | 1 - kernel/sched.c | 23 ----------------------- 2 files changed, 24 deletions(-) (limited to 'include') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 080d1fd461d..a77c6007dc9 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -85,7 +85,6 @@ static inline unsigned int kstat_irqs(unsigned int irq) /* * Lock/unlock the current runqueue - to extract task statistics: */ -extern unsigned long long __task_delta_exec(struct task_struct *tsk, int update); extern unsigned long long task_delta_exec(struct task_struct *); extern void account_user_time(struct task_struct *, cputime_t, cputime_t); diff --git a/kernel/sched.c b/kernel/sched.c index b66a08c2480..a69278eef42 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4546,29 +4546,6 @@ DEFINE_PER_CPU(struct kernel_stat, kstat); EXPORT_PER_CPU_SYMBOL(kstat); -/* - * Return any ns on the sched_clock that have not yet been banked in - * @p in case that task is currently running. - */ -unsigned long long __task_delta_exec(struct task_struct *p, int update) -{ - s64 delta_exec; - struct rq *rq; - - rq = task_rq(p); - WARN_ON_ONCE(!runqueue_is_locked()); - WARN_ON_ONCE(!task_current(rq, p)); - - if (update) - update_rq_clock(rq); - - delta_exec = rq->clock - p->se.exec_start; - - WARN_ON_ONCE(delta_exec < 0); - - return delta_exec; -} - /* * Return any ns on the sched_clock that have not yet been banked in * @p in case that task is currently running. -- cgit v1.2.3-70-g09d2 From cd474f2d548af3c0eb932d9d47ec11483861aa6f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 21 Apr 2009 08:53:08 +0200 Subject: ALSA: Remove deprecated snd_card_new() Signed-off-by: Takashi Iwai --- include/sound/core.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/sound/core.h b/include/sound/core.h index 3dea79829ac..a26bbdcc676 100644 --- a/include/sound/core.h +++ b/include/sound/core.h @@ -300,16 +300,6 @@ int snd_card_create(int idx, const char *id, struct module *module, int extra_size, struct snd_card **card_ret); -static inline __deprecated -struct snd_card *snd_card_new(int idx, const char *id, - struct module *module, int extra_size) -{ - struct snd_card *card; - if (snd_card_create(idx, id, module, extra_size, &card) < 0) - return NULL; - return card; -} - int snd_card_disconnect(struct snd_card *card); int snd_card_free(struct snd_card *card); int snd_card_free_when_closed(struct snd_card *card); -- cgit v1.2.3-70-g09d2 From ef9dfa4b1052af23a603de382d4665b2d1fccc61 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 21 Apr 2009 08:53:41 +0200 Subject: ALSA: Remove deprecated include/sound/driver.h Signed-off-by: Takashi Iwai --- include/sound/driver.h | 1 - 1 file changed, 1 deletion(-) delete mode 100644 include/sound/driver.h (limited to 'include') diff --git a/include/sound/driver.h b/include/sound/driver.h deleted file mode 100644 index f0359437d01..00000000000 --- a/include/sound/driver.h +++ /dev/null @@ -1 +0,0 @@ -#warning "This file is deprecated" -- cgit v1.2.3-70-g09d2 From fc1edaf9e7cc4d4696f83dee495b8f158d01c4eb Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 20 Apr 2009 13:02:27 -0700 Subject: x86: x2apic, IR: Clean up X86_X2APIC and INTR_REMAP config checks Add x2apic_supported() to clean up CONFIG_X86_X2APIC checks. Fix CONFIG_INTR_REMAP checks. [ Impact: cleanup ] Signed-off-by: Suresh Siddha Cc: dwmw2@infradead.org Cc: Suresh Siddha Cc: Weidong Han LKML-Reference: <20090420200450.128993000@linux-os.sc.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 10 ++++---- arch/x86/include/asm/io_apic.h | 2 -- arch/x86/include/asm/irq_remapping.h | 2 +- arch/x86/kernel/apic/apic.c | 49 +++++++++--------------------------- arch/x86/kernel/apic/io_apic.c | 2 -- arch/x86/kernel/apic/probe_64.c | 2 +- include/linux/dmar.h | 2 ++ 7 files changed, 21 insertions(+), 48 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index fbdd65446c7..3738438a91f 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -107,8 +107,7 @@ extern u32 native_safe_apic_wait_icr_idle(void); extern void native_apic_icr_write(u32 low, u32 id); extern u64 native_apic_icr_read(void); -#define EIM_8BIT_APIC_ID 0 -#define EIM_32BIT_APIC_ID 1 +extern int x2apic_mode; #ifdef CONFIG_X86_X2APIC /* @@ -166,7 +165,7 @@ static inline u64 native_x2apic_icr_read(void) return val; } -extern int x2apic, x2apic_phys; +extern int x2apic_phys; extern void check_x2apic(void); extern void enable_x2apic(void); extern void x2apic_icr_write(u32 low, u32 id); @@ -182,6 +181,8 @@ static inline int x2apic_enabled(void) return 1; return 0; } + +#define x2apic_supported() (cpu_has_x2apic) #else static inline void check_x2apic(void) { @@ -194,9 +195,8 @@ static inline int x2apic_enabled(void) return 0; } -#define x2apic 0 #define x2apic_preenabled 0 - +#define x2apic_supported() 0 #endif extern void enable_IR_x2apic(void); diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 9d826e43601..34eaa37f7ad 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -161,7 +161,6 @@ extern int io_apic_set_pci_routing(int ioapic, int pin, int irq, extern int (*ioapic_renumber_irq)(int ioapic, int irq); extern void ioapic_init_mappings(void); -#ifdef CONFIG_X86_64 extern struct IO_APIC_route_entry **alloc_ioapic_entries(void); extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries); extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); @@ -169,7 +168,6 @@ extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); extern void reinit_intr_remapped_IO_APIC(int intr_remapping, struct IO_APIC_route_entry **ioapic_entries); -#endif extern void probe_nr_irqs_gsi(void); diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 0396760fccb..f275e224450 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -1,6 +1,6 @@ #ifndef _ASM_X86_IRQ_REMAPPING_H #define _ASM_X86_IRQ_REMAPPING_H -#define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8) +#define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8) #endif /* _ASM_X86_IRQ_REMAPPING_H */ diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 7b41a32339e..2b30e520dce 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -134,8 +134,8 @@ static __init int setup_apicpmtimer(char *s) __setup("apicpmtimer", setup_apicpmtimer); #endif +int x2apic_mode; #ifdef CONFIG_X86_X2APIC -int x2apic; /* x2apic enabled before OS handover */ static int x2apic_preenabled; static int disable_x2apic; @@ -858,7 +858,7 @@ void clear_local_APIC(void) u32 v; /* APIC hasn't been mapped yet */ - if (!x2apic && !apic_phys) + if (!x2apic_mode && !apic_phys) return; maxlvt = lapic_get_maxlvt(); @@ -1330,7 +1330,7 @@ void check_x2apic(void) { if (x2apic_enabled()) { pr_info("x2apic enabled by BIOS, switching to x2apic ops\n"); - x2apic_preenabled = x2apic = 1; + x2apic_preenabled = x2apic_mode = 1; } } @@ -1338,7 +1338,7 @@ void enable_x2apic(void) { int msr, msr2; - if (!x2apic) + if (!x2apic_mode) return; rdmsr(MSR_IA32_APICBASE, msr, msr2); @@ -1390,25 +1390,17 @@ void __init enable_IR_x2apic(void) mask_IO_APIC_setup(ioapic_entries); mask_8259A(); -#ifdef CONFIG_X86_X2APIC - if (cpu_has_x2apic) - ret = enable_intr_remapping(EIM_32BIT_APIC_ID); - else -#endif - ret = enable_intr_remapping(EIM_8BIT_APIC_ID); - + ret = enable_intr_remapping(x2apic_supported()); if (ret) goto end_restore; pr_info("Enabled Interrupt-remapping\n"); -#ifdef CONFIG_X86_X2APIC - if (cpu_has_x2apic && !x2apic) { - x2apic = 1; + if (x2apic_supported() && !x2apic_mode) { + x2apic_mode = 1; enable_x2apic(); pr_info("Enabled x2apic\n"); } -#endif end_restore: if (ret) @@ -1576,7 +1568,7 @@ void __init early_init_lapic_mapping(void) */ void __init init_apic_mappings(void) { - if (x2apic) { + if (x2apic_mode) { boot_cpu_physical_apicid = read_apic_id(); return; } @@ -2010,10 +2002,10 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) local_irq_save(flags); disable_local_APIC(); -#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) disable_intr_remapping(); -#endif + local_irq_restore(flags); return 0; } @@ -2023,8 +2015,6 @@ static int lapic_resume(struct sys_device *dev) unsigned int l, h; unsigned long flags; int maxlvt; - -#ifdef CONFIG_INTR_REMAP int ret; struct IO_APIC_route_entry **ioapic_entries = NULL; @@ -2050,17 +2040,8 @@ static int lapic_resume(struct sys_device *dev) mask_8259A(); } - if (x2apic) + if (x2apic_mode) enable_x2apic(); -#else - if (!apic_pm_state.active) - return 0; - - local_irq_save(flags); - if (x2apic) - enable_x2apic(); -#endif - else { /* * Make sure the APICBASE points to the right address @@ -2098,18 +2079,12 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_ESR, 0); apic_read(APIC_ESR); -#ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) { - if (x2apic) - reenable_intr_remapping(EIM_32BIT_APIC_ID); - else - reenable_intr_remapping(EIM_8BIT_APIC_ID); - + reenable_intr_remapping(x2apic_mode); unmask_8259A(); restore_IO_APIC_setup(ioapic_entries); free_ioapic_entries(ioapic_entries); } -#endif local_irq_restore(flags); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index ea22a86e3cd..3a45d2ec974 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -736,7 +736,6 @@ static int __init ioapic_pirq_setup(char *str) __setup("pirq=", ioapic_pirq_setup); #endif /* CONFIG_X86_32 */ -#ifdef CONFIG_INTR_REMAP struct IO_APIC_route_entry **alloc_ioapic_entries(void) { int apic; @@ -857,7 +856,6 @@ void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries) kfree(ioapic_entries); } -#endif /* * Find the IRQ entry number of a certain pin. diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index 1783652bb0e..bc3e880f9b8 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -50,7 +50,7 @@ static struct apic *apic_probe[] __initdata = { void __init default_setup_apic_routing(void) { #ifdef CONFIG_X86_X2APIC - if (x2apic && (apic != &apic_x2apic_phys && + if (x2apic_mode && (apic != &apic_x2apic_phys && #ifdef CONFIG_X86_UV apic != &apic_x2apic_uv_x && #endif diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 06f592a7f73..10ff5c49882 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -158,6 +158,8 @@ static inline struct intel_iommu *map_ioapic_to_ir(int apic) } #define irq_remapped(irq) (0) #define enable_intr_remapping(mode) (-1) +#define disable_intr_remapping() (0) +#define reenable_intr_remapping(mode) (0) #define intr_remapping_enabled (0) #endif -- cgit v1.2.3-70-g09d2 From 03ad032bb78b2732b607ed198e951240e1d21e59 Mon Sep 17 00:00:00 2001 From: Peter Holik Date: Sat, 18 Apr 2009 07:24:17 +0000 Subject: export usbnet_get_ethernet_addr from usbnet and fixed cdc_ether.c because of using the same function get_ethernet_addr as cdc_ether.c i export usbnet_get_ethernet_addr from usbnet and fixed cdc_ether (suggested by Oliver Neukum). Signed-off-by: Peter Holik Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ether.c | 33 +-------------------------------- drivers/net/usb/usbnet.c | 31 +++++++++++++++++++++++++++++++ include/linux/usb/usbnet.h | 1 + 3 files changed, 33 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 55e8ecc3a9e..01fd528306e 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include @@ -389,36 +388,6 @@ static void cdc_status(struct usbnet *dev, struct urb *urb) } } -static u8 nibble(unsigned char c) -{ - if (likely(isdigit(c))) - return c - '0'; - c = toupper(c); - if (likely(isxdigit(c))) - return 10 + c - 'A'; - return 0; -} - -static inline int -get_ethernet_addr(struct usbnet *dev, struct usb_cdc_ether_desc *e) -{ - int tmp, i; - unsigned char buf [13]; - - tmp = usb_string(dev->udev, e->iMACAddress, buf, sizeof buf); - if (tmp != 12) { - dev_dbg(&dev->udev->dev, - "bad MAC string %d fetch, %d\n", e->iMACAddress, tmp); - if (tmp >= 0) - tmp = -EINVAL; - return tmp; - } - for (i = tmp = 0; i < 6; i++, tmp += 2) - dev->net->dev_addr [i] = - (nibble(buf [tmp]) << 4) + nibble(buf [tmp + 1]); - return 0; -} - static int cdc_bind(struct usbnet *dev, struct usb_interface *intf) { int status; @@ -428,7 +397,7 @@ static int cdc_bind(struct usbnet *dev, struct usb_interface *intf) if (status < 0) return status; - status = get_ethernet_addr(dev, info->ether); + status = usbnet_get_ethernet_addr(dev, info->ether->iMACAddress); if (status < 0) { usb_set_intfdata(info->data, NULL); usb_driver_release_interface(driver_of(intf), info->data); diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 2b8b9036aff..c94de624314 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -156,6 +157,36 @@ int usbnet_get_endpoints(struct usbnet *dev, struct usb_interface *intf) } EXPORT_SYMBOL_GPL(usbnet_get_endpoints); +static u8 nibble(unsigned char c) +{ + if (likely(isdigit(c))) + return c - '0'; + c = toupper(c); + if (likely(isxdigit(c))) + return 10 + c - 'A'; + return 0; +} + +int usbnet_get_ethernet_addr(struct usbnet *dev, int iMACAddress) +{ + int tmp, i; + unsigned char buf [13]; + + tmp = usb_string(dev->udev, iMACAddress, buf, sizeof buf); + if (tmp != 12) { + dev_dbg(&dev->udev->dev, + "bad MAC string %d fetch, %d\n", iMACAddress, tmp); + if (tmp >= 0) + tmp = -EINVAL; + return tmp; + } + for (i = tmp = 0; i < 6; i++, tmp += 2) + dev->net->dev_addr [i] = + (nibble(buf [tmp]) << 4) + nibble(buf [tmp + 1]); + return 0; +} +EXPORT_SYMBOL_GPL(usbnet_get_ethernet_addr); + static void intr_complete (struct urb *urb); static int init_status (struct usbnet *dev, struct usb_interface *intf) diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 36fabb95c7d..5d44059f6d6 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -183,6 +183,7 @@ extern void usbnet_tx_timeout (struct net_device *net); extern int usbnet_change_mtu (struct net_device *net, int new_mtu); extern int usbnet_get_endpoints(struct usbnet *, struct usb_interface *); +extern int usbnet_get_ethernet_addr(struct usbnet *, int); extern void usbnet_defer_kevent (struct usbnet *, int); extern void usbnet_skb_return (struct usbnet *, struct sk_buff *); extern void usbnet_unlink_rx_urbs(struct usbnet *); -- cgit v1.2.3-70-g09d2 From b1b67dd45a6b629eb41553856805aaa1614fbb83 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 20 Apr 2009 04:49:28 +0000 Subject: net: factor out ethtool invocation of vlan/macvlan drivers Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 22 +++------------------- include/linux/netdevice.h | 24 +++++++++++++++++++++++- net/8021q/vlan_dev.c | 23 +++-------------------- 3 files changed, 29 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 214a8cf2b70..329cd50d0e2 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -374,36 +374,20 @@ static void macvlan_ethtool_get_drvinfo(struct net_device *dev, static u32 macvlan_ethtool_get_rx_csum(struct net_device *dev) { const struct macvlan_dev *vlan = netdev_priv(dev); - struct net_device *lowerdev = vlan->lowerdev; - - if (lowerdev->ethtool_ops == NULL || - lowerdev->ethtool_ops->get_rx_csum == NULL) - return 0; - return lowerdev->ethtool_ops->get_rx_csum(lowerdev); + return dev_ethtool_get_rx_csum(vlan->lowerdev); } static int macvlan_ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { const struct macvlan_dev *vlan = netdev_priv(dev); - struct net_device *lowerdev = vlan->lowerdev; - - if (!lowerdev->ethtool_ops || - !lowerdev->ethtool_ops->get_settings) - return -EOPNOTSUPP; - - return lowerdev->ethtool_ops->get_settings(lowerdev, cmd); + return dev_ethtool_get_settings(vlan->lowerdev, cmd); } static u32 macvlan_ethtool_get_flags(struct net_device *dev) { const struct macvlan_dev *vlan = netdev_priv(dev); - struct net_device *lowerdev = vlan->lowerdev; - - if (!lowerdev->ethtool_ops || - !lowerdev->ethtool_ops->get_flags) - return 0; - return lowerdev->ethtool_ops->get_flags(lowerdev); + return dev_ethtool_get_flags(vlan->lowerdev); } static const struct ethtool_ops macvlan_ethtool_ops = { diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 54db3ebf219..31167451d08 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -42,6 +42,7 @@ #include #include +#include #include #include #ifdef CONFIG_DCB @@ -49,7 +50,6 @@ #endif struct vlan_group; -struct ethtool_ops; struct netpoll_info; /* 802.11 specific */ struct wireless_dev; @@ -1906,6 +1906,28 @@ static inline int skb_bond_should_drop(struct sk_buff *skb) } extern struct pernet_operations __net_initdata loopback_net_ops; + +static inline int dev_ethtool_get_settings(struct net_device *dev, + struct ethtool_cmd *cmd) +{ + if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings) + return -EOPNOTSUPP; + return dev->ethtool_ops->get_settings(dev, cmd); +} + +static inline u32 dev_ethtool_get_rx_csum(struct net_device *dev) +{ + if (!dev->ethtool_ops || !dev->ethtool_ops->get_rx_csum) + return 0; + return dev->ethtool_ops->get_rx_csum(dev); +} + +static inline u32 dev_ethtool_get_flags(struct net_device *dev) +{ + if (!dev->ethtool_ops || !dev->ethtool_ops->get_flags) + return 0; + return dev->ethtool_ops->get_flags(dev); +} #endif /* __KERNEL__ */ #endif /* _LINUX_DEV_H */ diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 6b092136401..04dc8c8a685 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -666,13 +666,7 @@ static int vlan_ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { const struct vlan_dev_info *vlan = vlan_dev_info(dev); - struct net_device *real_dev = vlan->real_dev; - - if (!real_dev->ethtool_ops || - !real_dev->ethtool_ops->get_settings) - return -EOPNOTSUPP; - - return real_dev->ethtool_ops->get_settings(real_dev, cmd); + return dev_ethtool_get_settings(vlan->real_dev, cmd); } static void vlan_ethtool_get_drvinfo(struct net_device *dev, @@ -686,24 +680,13 @@ static void vlan_ethtool_get_drvinfo(struct net_device *dev, static u32 vlan_ethtool_get_rx_csum(struct net_device *dev) { const struct vlan_dev_info *vlan = vlan_dev_info(dev); - struct net_device *real_dev = vlan->real_dev; - - if (real_dev->ethtool_ops == NULL || - real_dev->ethtool_ops->get_rx_csum == NULL) - return 0; - return real_dev->ethtool_ops->get_rx_csum(real_dev); + return dev_ethtool_get_rx_csum(vlan->real_dev); } static u32 vlan_ethtool_get_flags(struct net_device *dev) { const struct vlan_dev_info *vlan = vlan_dev_info(dev); - struct net_device *real_dev = vlan->real_dev; - - if (!(real_dev->features & NETIF_F_HW_VLAN_RX) || - real_dev->ethtool_ops == NULL || - real_dev->ethtool_ops->get_flags == NULL) - return 0; - return real_dev->ethtool_ops->get_flags(real_dev); + return dev_ethtool_get_flags(vlan->real_dev); } static const struct ethtool_ops vlan_ethtool_ops = { -- cgit v1.2.3-70-g09d2 From 0a1ec07a67bd8b0033dace237249654d015efa21 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 20 Apr 2009 01:25:46 +0000 Subject: net: skb_copy_datagram_const_iovec() There's an skb_copy_datagram_iovec() to copy out of a paged skb, but it modifies the iovec, and does not support starting at an offset in the destination. We want both in tun.c, so let's add the function. It's a carbon copy of skb_copy_datagram_iovec() with enough changes to be annoying. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 +++ include/linux/socket.h | 2 ++ net/core/datagram.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++ net/core/iovec.c | 26 ++++++++++++++ 4 files changed, 125 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5fd389162f0..af2b21bdda8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1717,6 +1717,11 @@ extern int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, struct iovec *from, int len); +extern int skb_copy_datagram_const_iovec(const struct sk_buff *from, + int offset, + const struct iovec *to, + int to_offset, + int size); extern void skb_free_datagram(struct sock *sk, struct sk_buff *skb); extern int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags); diff --git a/include/linux/socket.h b/include/linux/socket.h index 421afb4d29b..171b08db9c4 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -318,6 +318,8 @@ extern int csum_partial_copy_fromiovecend(unsigned char *kdata, extern int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode); extern int memcpy_toiovec(struct iovec *v, unsigned char *kdata, int len); +extern int memcpy_toiovecend(const struct iovec *v, unsigned char *kdata, + int offset, int len); extern int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr, int __user *ulen); extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr); extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); diff --git a/net/core/datagram.c b/net/core/datagram.c index d0de644b378..4dbb05cd572 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -338,6 +338,98 @@ fault: return -EFAULT; } +/** + * skb_copy_datagram_const_iovec - Copy a datagram to an iovec. + * @skb: buffer to copy + * @offset: offset in the buffer to start copying from + * @to: io vector to copy to + * @to_offset: offset in the io vector to start copying to + * @len: amount of data to copy from buffer to iovec + * + * Returns 0 or -EFAULT. + * Note: the iovec is not modified during the copy. + */ +int skb_copy_datagram_const_iovec(const struct sk_buff *skb, int offset, + const struct iovec *to, int to_offset, + int len) +{ + int start = skb_headlen(skb); + int i, copy = start - offset; + + /* Copy header. */ + if (copy > 0) { + if (copy > len) + copy = len; + if (memcpy_toiovecend(to, skb->data + offset, to_offset, copy)) + goto fault; + if ((len -= copy) == 0) + return 0; + offset += copy; + to_offset += copy; + } + + /* Copy paged appendix. Hmm... why does this look so complicated? */ + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + int end; + + WARN_ON(start > offset + len); + + end = start + skb_shinfo(skb)->frags[i].size; + if ((copy = end - offset) > 0) { + int err; + u8 *vaddr; + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + struct page *page = frag->page; + + if (copy > len) + copy = len; + vaddr = kmap(page); + err = memcpy_toiovecend(to, vaddr + frag->page_offset + + offset - start, to_offset, copy); + kunmap(page); + if (err) + goto fault; + if (!(len -= copy)) + return 0; + offset += copy; + to_offset += copy; + } + start = end; + } + + if (skb_shinfo(skb)->frag_list) { + struct sk_buff *list = skb_shinfo(skb)->frag_list; + + for (; list; list = list->next) { + int end; + + WARN_ON(start > offset + len); + + end = start + list->len; + if ((copy = end - offset) > 0) { + if (copy > len) + copy = len; + if (skb_copy_datagram_const_iovec(list, + offset - start, + to, to_offset, + copy)) + goto fault; + if ((len -= copy) == 0) + return 0; + offset += copy; + to_offset += copy; + } + start = end; + } + } + if (!len) + return 0; + +fault: + return -EFAULT; +} +EXPORT_SYMBOL(skb_copy_datagram_const_iovec); + /** * skb_copy_datagram_from_iovec - Copy a datagram from an iovec. * @skb: buffer to copy diff --git a/net/core/iovec.c b/net/core/iovec.c index 4c9c0121c9d..a215545c0a3 100644 --- a/net/core/iovec.c +++ b/net/core/iovec.c @@ -97,6 +97,31 @@ int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len) return 0; } +/* + * Copy kernel to iovec. Returns -EFAULT on error. + */ + +int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata, + int offset, int len) +{ + int copy; + for (; len > 0; ++iov) { + /* Skip over the finished iovecs */ + if (unlikely(offset >= iov->iov_len)) { + offset -= iov->iov_len; + continue; + } + copy = min_t(unsigned int, iov->iov_len - offset, len); + offset = 0; + if (copy_to_user(iov->iov_base, kdata, copy)) + return -EFAULT; + kdata += copy; + len -= copy; + } + + return 0; +} + /* * Copy iovec to kernel. Returns -EFAULT on error. * @@ -236,3 +261,4 @@ EXPORT_SYMBOL(csum_partial_copy_fromiovecend); EXPORT_SYMBOL(memcpy_fromiovec); EXPORT_SYMBOL(memcpy_fromiovecend); EXPORT_SYMBOL(memcpy_toiovec); +EXPORT_SYMBOL(memcpy_toiovecend); -- cgit v1.2.3-70-g09d2 From 6f26c9a7555e5bcca3560919db9b852015077dae Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 20 Apr 2009 01:26:11 +0000 Subject: tun: fix tun_chr_aio_write so that aio works aio_write gets const struct iovec * but tun_chr_aio_write casts this to struct iovec * and modifies the iovec. As a result, attempts to use io_submit to send packets to a tun device fail with weird errors such as EINVAL. Since tun is the only user of skb_copy_datagram_from_iovec, we can fix this simply by changing the later so that it does not touch the iovec passed to it. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/tun.c | 13 ++++++++----- include/linux/skbuff.h | 3 ++- include/linux/socket.h | 4 ++-- net/core/datagram.c | 20 ++++++++++++++------ net/core/iovec.c | 7 ++++--- 5 files changed, 30 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 3b513e29d39..589f0ca668d 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -540,31 +540,34 @@ static inline struct sk_buff *tun_alloc_skb(struct tun_struct *tun, /* Get packet from user space buffer */ static __inline__ ssize_t tun_get_user(struct tun_struct *tun, - struct iovec *iv, size_t count, + const struct iovec *iv, size_t count, int noblock) { struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) }; struct sk_buff *skb; size_t len = count, align = 0; struct virtio_net_hdr gso = { 0 }; + int offset = 0; if (!(tun->flags & TUN_NO_PI)) { if ((len -= sizeof(pi)) > count) return -EINVAL; - if(memcpy_fromiovec((void *)&pi, iv, sizeof(pi))) + if (memcpy_fromiovecend((void *)&pi, iv, 0, sizeof(pi))) return -EFAULT; + offset += sizeof(pi); } if (tun->flags & TUN_VNET_HDR) { if ((len -= sizeof(gso)) > count) return -EINVAL; - if (memcpy_fromiovec((void *)&gso, iv, sizeof(gso))) + if (memcpy_fromiovecend((void *)&gso, iv, offset, sizeof(gso))) return -EFAULT; if (gso.hdr_len > len) return -EINVAL; + offset += sizeof(pi); } if ((tun->flags & TUN_TYPE_MASK) == TUN_TAP_DEV) { @@ -581,7 +584,7 @@ static __inline__ ssize_t tun_get_user(struct tun_struct *tun, return PTR_ERR(skb); } - if (skb_copy_datagram_from_iovec(skb, 0, iv, len)) { + if (skb_copy_datagram_from_iovec(skb, 0, iv, offset, len)) { tun->dev->stats.rx_dropped++; kfree_skb(skb); return -EFAULT; @@ -673,7 +676,7 @@ static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv, DBG(KERN_INFO "%s: tun_chr_write %ld\n", tun->dev->name, count); - result = tun_get_user(tun, (struct iovec *)iv, iov_length(iv, count), + result = tun_get_user(tun, iv, iov_length(iv, count), file->f_flags & O_NONBLOCK); tun_put(tun); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index af2b21bdda8..1b5c3d298f4 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1715,7 +1715,8 @@ extern int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, struct iovec *iov); extern int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, - struct iovec *from, + const struct iovec *from, + int from_offset, int len); extern int skb_copy_datagram_const_iovec(const struct sk_buff *from, int offset, diff --git a/include/linux/socket.h b/include/linux/socket.h index 171b08db9c4..42a0396f2c5 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -309,8 +309,8 @@ struct ucred { #ifdef __KERNEL__ extern int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len); -extern int memcpy_fromiovecend(unsigned char *kdata, struct iovec *iov, - int offset, int len); +extern int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov, + int offset, int len); extern int csum_partial_copy_fromiovecend(unsigned char *kdata, struct iovec *iov, int offset, diff --git a/net/core/datagram.c b/net/core/datagram.c index 4dbb05cd572..914d5fa773b 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -435,13 +435,15 @@ EXPORT_SYMBOL(skb_copy_datagram_const_iovec); * @skb: buffer to copy * @offset: offset in the buffer to start copying to * @from: io vector to copy to + * @from_offset: offset in the io vector to start copying from * @len: amount of data to copy to buffer from iovec * * Returns 0 or -EFAULT. - * Note: the iovec is modified during the copy. + * Note: the iovec is not modified during the copy. */ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, - struct iovec *from, int len) + const struct iovec *from, int from_offset, + int len) { int start = skb_headlen(skb); int i, copy = start - offset; @@ -450,11 +452,12 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, if (copy > 0) { if (copy > len) copy = len; - if (memcpy_fromiovec(skb->data + offset, from, copy)) + if (memcpy_fromiovecend(skb->data + offset, from, 0, copy)) goto fault; if ((len -= copy) == 0) return 0; offset += copy; + from_offset += copy; } /* Copy paged appendix. Hmm... why does this look so complicated? */ @@ -473,8 +476,9 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, if (copy > len) copy = len; vaddr = kmap(page); - err = memcpy_fromiovec(vaddr + frag->page_offset + - offset - start, from, copy); + err = memcpy_fromiovecend(vaddr + frag->page_offset + + offset - start, + from, from_offset, copy); kunmap(page); if (err) goto fault; @@ -482,6 +486,7 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, if (!(len -= copy)) return 0; offset += copy; + from_offset += copy; } start = end; } @@ -500,11 +505,14 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, copy = len; if (skb_copy_datagram_from_iovec(list, offset - start, - from, copy)) + from, + from_offset, + copy)) goto fault; if ((len -= copy) == 0) return 0; offset += copy; + from_offset += copy; } start = end; } diff --git a/net/core/iovec.c b/net/core/iovec.c index a215545c0a3..40a76ce19d9 100644 --- a/net/core/iovec.c +++ b/net/core/iovec.c @@ -147,10 +147,11 @@ int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len) } /* - * For use with ip_build_xmit + * Copy iovec from kernel. Returns -EFAULT on error. */ -int memcpy_fromiovecend(unsigned char *kdata, struct iovec *iov, int offset, - int len) + +int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov, + int offset, int len) { /* Skip over the finished iovecs */ while (offset >= iov->iov_len) { -- cgit v1.2.3-70-g09d2 From 7a4f453b6d7379a7c380825949977c5a838aa012 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 22 Apr 2009 16:53:34 +0800 Subject: tracing/events: make struct trace_entry->type to be int type struct trace_entry->type is unsigned char, while trace event's id is int type, thus for a event with id >= 256, it's entry->type is cast to (id % 256), and then we can't see the trace output of this event. # insmod trace-events-sample.ko # echo foo_bar > /mnt/tracing/set_event # cat /debug/tracing/events/trace-events-sample/foo_bar/id 256 # cat /mnt/tracing/trace_pipe <...>-3548 [001] 215.091142: Unknown type 0 <...>-3548 [001] 216.089207: Unknown type 0 <...>-3548 [001] 217.087271: Unknown type 0 <...>-3548 [001] 218.085332: Unknown type 0 [ Impact: fix output for trace events with id >= 256 ] Signed-off-by: Li Zefan Acked-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Tom Zanussi LKML-Reference: <49EEDB0E.5070207@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 4 ++-- include/trace/ftrace.h | 2 +- kernel/trace/trace.c | 4 ++-- kernel/trace/trace.h | 2 +- kernel/trace/trace_events.c | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 75f3ac01a87..2a4a4074991 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -16,7 +16,7 @@ struct dentry; * bash-15816 [01] 235.197585: idle_cpu <- irq_enter */ struct trace_entry { - unsigned char type; + int type; unsigned char flags; unsigned char preempt_count; int pid; @@ -73,7 +73,7 @@ enum print_line_t { struct ring_buffer_event * -trace_current_buffer_lock_reserve(unsigned char type, unsigned long len, +trace_current_buffer_lock_reserve(int type, unsigned long len, unsigned long flags, int pc); void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, unsigned long flags, int pc); diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 39a3351f2e7..15ef08d9add 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -198,7 +198,7 @@ ftrace_define_fields_##call(void) \ struct ftrace_event_call *event_call = &event_##call; \ int ret; \ \ - __common_field(unsigned char, type); \ + __common_field(int, type); \ __common_field(unsigned char, flags); \ __common_field(unsigned char, preempt_count); \ __common_field(int, pid); \ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b9a3adce922..b6183bc9eca 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -838,7 +838,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, } struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, - unsigned char type, + int type, unsigned long len, unsigned long flags, int pc) { @@ -881,7 +881,7 @@ void trace_buffer_unlock_commit(struct trace_array *tr, } struct ring_buffer_event * -trace_current_buffer_lock_reserve(unsigned char type, unsigned long len, +trace_current_buffer_lock_reserve(int type, unsigned long len, unsigned long flags, int pc) { return trace_buffer_lock_reserve(&global_trace, diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 247948e81b0..7d55bcf50e4 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -422,7 +422,7 @@ void init_tracer_sysprof_debugfs(struct dentry *d_tracer); struct ring_buffer_event; struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, - unsigned char type, + int type, unsigned long len, unsigned long flags, int pc); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 9ea55a7dfde..5d6e879cf87 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -398,7 +398,7 @@ static int trace_write_header(struct trace_seq *s) "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" "\n", - FIELD(unsigned char, type), + FIELD(int, type), FIELD(unsigned char, flags), FIELD(unsigned char, preempt_count), FIELD(int, pid), -- cgit v1.2.3-70-g09d2 From 9cbf117662e24c6d33245666804487f92c21b59d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 19 Apr 2009 04:51:29 +0200 Subject: tracing/events: provide string with undefined size support This patch provides the support for dynamic size strings on event tracing. The key concept is to use a structure with an ending char array field of undefined size and use such ability to allocate the minimal size on the ring buffer to make one or more string entries fit inside, as opposite to a fixed length strings with upper bound. The strings themselves are represented using fields which have an offset value from the beginning of the entry. This patch provides three new macros: __string(item, src) This one declares a string to the structure inside TP_STRUCT__entry. You need to provide the name of the string field and the source that will be copied inside. This will also add the dynamic size of the string needed for the ring buffer entry allocation. A stack allocated structure is used to temporarily store the offset of each strings, avoiding double calls to strlen() on each event insertion. __get_str(field) This one will give you a pointer to the string you have created. This is an abstract helper to resolve the absolute address given the field name which is a relative address from the beginning of the trace_structure. __assign_str(dst, src) Use this macro to automatically perform the string copy from src to dst. src must be a variable to assign and dst is the name of a __string field. Example on how to use it: TRACE_EVENT(my_event, TP_PROTO(char *src1, char *src2), TP_ARGS(src1, src2), TP_STRUCT__entry( __string(str1, src1) __string(str2, src2) ), TP_fast_assign( __assign_str(str1, src1); __assign_str(str2, src2); ), TP_printk("%s %s", __get_str(src1), __get_str(src2)) ) Of course you can mix-up any __field or __array inside this TRACE_EVENT. The position of the __string or __assign_str doesn't matter. Changes in v2: Address the suggestion of Steven Rostedt: drop the opening_string() macro and redefine __ending_string() to get the size of the string to be copied instead of overwritting the whole ring buffer allocation. Changes in v3: Address other suggestions of Steven Rostedt and Peter Zijlstra with some changes: drop the __ending_string and the need to have only one string field. Use offsets instead of absolute addresses. [ Impact: allow more compact memory usage for string tracing ] Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Li Zefan Cc: Peter Zijlstra --- include/trace/ftrace.h | 88 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 85 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 15ef08d9add..5a7d18c4363 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -27,6 +27,9 @@ #undef __field #define __field(type, item) type item; +#undef __string +#define __string(item, src) int __str_loc_##item; + #undef TP_STRUCT__entry #define TP_STRUCT__entry(args...) args @@ -35,14 +38,53 @@ struct ftrace_raw_##name { \ struct trace_entry ent; \ tstruct \ + char __str_data[0]; \ }; \ static struct ftrace_event_call event_##name #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + /* * Stage 2 of the trace events. * + * Include the following: + * + * struct ftrace_str_offsets_ { + * int ; + * int ; + * [...] + * }; + * + * The __string() macro will create each int , this is to + * keep the offset of each string from the beggining of the event + * once we perform the strlen() of the src strings. + * + */ + +#undef TRACE_FORMAT +#define TRACE_FORMAT(call, proto, args, fmt) + +#undef __array +#define __array(type, item, len) + +#undef __field +#define __field(type, item); + +#undef __string +#define __string(item, src) int item; + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ + struct ftrace_str_offsets_##call { \ + tstruct; \ + }; + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +/* + * Stage 3 of the trace events. + * * Override the macros in to include the following: * * enum print_line_t @@ -80,6 +122,9 @@ #undef TP_printk #define TP_printk(fmt, args...) fmt "\n", args +#undef __get_str +#define __get_str(field) (char *)__entry + __entry->__str_loc_##field + #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ enum print_line_t \ @@ -146,6 +191,16 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ if (!ret) \ return 0; +#undef __string +#define __string(item, src) \ + ret = trace_seq_printf(s, "\tfield: __str_loc " #item ";\t" \ + "offset:%u;tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), \ + __str_loc_##item), \ + (unsigned int)sizeof(field.__str_loc_##item)); \ + if (!ret) \ + return 0; + #undef __entry #define __entry REC @@ -189,6 +244,12 @@ ftrace_format_##call(struct trace_seq *s) \ if (ret) \ return ret; +#undef __string +#define __string(item, src) \ + ret = trace_define_field(event_call, "__str_loc", #item, \ + offsetof(typeof(field), __str_loc_##item), \ + sizeof(field.__str_loc_##item)); + #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, func, print) \ int \ @@ -212,7 +273,7 @@ ftrace_define_fields_##call(void) \ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* - * Stage 3 of the trace events. + * Stage 4 of the trace events. * * Override the macros in to include the following: * @@ -409,6 +470,23 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #undef __entry #define __entry entry +#undef __field +#define __field(type, item) + +#undef __array +#define __array(type, item, len) + +#undef __string +#define __string(item, src) \ + __str_offsets.item = __str_size + \ + offsetof(typeof(*entry), __str_data); \ + __str_size += strlen(src) + 1; + +#undef __assign_str +#define __assign_str(dst, src) \ + __entry->__str_loc_##dst = __str_offsets.dst; \ + strcpy(__get_str(dst), src); + #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ _TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \ @@ -417,18 +495,22 @@ static struct ftrace_event_call event_##call; \ \ static void ftrace_raw_event_##call(proto) \ { \ + struct ftrace_str_offsets_##call __maybe_unused __str_offsets; \ struct ftrace_event_call *call = &event_##call; \ struct ring_buffer_event *event; \ struct ftrace_raw_##call *entry; \ unsigned long irq_flags; \ + int __str_size = 0; \ int pc; \ \ local_save_flags(irq_flags); \ pc = preempt_count(); \ \ + tstruct; \ + \ event = trace_current_buffer_lock_reserve(event_##call.id, \ - sizeof(struct ftrace_raw_##call), \ - irq_flags, pc); \ + sizeof(struct ftrace_raw_##call) + __str_size,\ + irq_flags, pc); \ if (!event) \ return; \ entry = ring_buffer_event_data(event); \ -- cgit v1.2.3-70-g09d2 From 7e7ca9a22dbbc5c91763cd16923c7509918709b6 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 19 Apr 2009 04:54:49 +0200 Subject: tracing/lock: provide lock_acquired event support for dynamic size string Now that we can support the dynamic sized string, make the lock tracing able to use it, making it safe against modules removal and consuming the right amount of memory needed for each lock name Changes in v2: adapt to the __ending_string() updates and the opening_string() removal. [ Impact: protect lock tracer against module removal ] Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Steven Rostedt --- include/trace/events/lockdep.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/trace/events/lockdep.h b/include/trace/events/lockdep.h index 45e326b5c7f..3ca315c1429 100644 --- a/include/trace/events/lockdep.h +++ b/include/trace/events/lockdep.h @@ -38,16 +38,16 @@ TRACE_EVENT(lock_acquired, TP_ARGS(lock, ip, waittime), TP_STRUCT__entry( - __field(const char *, name) + __string(name, lock->name) __field(unsigned long, wait_usec) __field(unsigned long, wait_nsec_rem) ), TP_fast_assign( - __entry->name = lock->name; + __assign_str(name, lock->name); __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC); __entry->wait_usec = (unsigned long) waittime; ), - TP_printk("%s (%lu.%03lu us)", __entry->name, __entry->wait_usec, + TP_printk("%s (%lu.%03lu us)", __get_str(name), __entry->wait_usec, __entry->wait_nsec_rem) ); -- cgit v1.2.3-70-g09d2 From 6a74aa40907757ec98d8710ff66cd4cfe064e7d8 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 22 Apr 2009 00:41:09 +0200 Subject: tracing/events: protect __get_str() The __get_str() macro is used in a code part then its content should be protected with parenthesis. [ Impact: make macro definition more robust ] Reported-by: Steven Rostedt Signed-off-by: Frederic Weisbecker --- include/trace/ftrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 5a7d18c4363..a77f71a46db 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -123,7 +123,7 @@ #define TP_printk(fmt, args...) fmt "\n", args #undef __get_str -#define __get_str(field) (char *)__entry + __entry->__str_loc_##field +#define __get_str(field) ((char *)__entry + __entry->__str_loc_##field) #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ -- cgit v1.2.3-70-g09d2 From 246d0a17f5e09af0794960164269fc8988a8811c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 22 Apr 2009 18:24:55 +0100 Subject: ASoC: Add power supply widget to DAPM Many modern CODECs have shared resources on chip which must be enabled for portions of the chip to work but which can be disabled at other times in order to achieve power savings. Examples of such resources include power supplies and some internal clocks. Since these widgets are dependencies for the audio path but do not carry audio signals they require slightly different handling to most widgets - they do not contribute to the audio path and so should not be counted as either inputs or outputs during path walks. Cases where one supply provides a supply for another will require additional work. There is also room for more optimisation of the graph walking to avoid repeated checks for the same thing. Signed-off-by: Mark Brown --- Documentation/sound/alsa/soc/dapm.txt | 1 + include/sound/soc-dapm.h | 7 +++++- sound/soc/soc-dapm.c | 44 +++++++++++++++++++++++++++++++---- 3 files changed, 46 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/Documentation/sound/alsa/soc/dapm.txt b/Documentation/sound/alsa/soc/dapm.txt index 9e6763264a2..9ac842be9b4 100644 --- a/Documentation/sound/alsa/soc/dapm.txt +++ b/Documentation/sound/alsa/soc/dapm.txt @@ -62,6 +62,7 @@ Audio DAPM widgets fall into a number of types:- o Mic - Mic (and optional Jack) o Line - Line Input/Output (and optional Jack) o Speaker - Speaker + o Supply - Power or clock supply widget used by other widgets. o Pre - Special PRE widget (exec before all others) o Post - Special POST widget (exec after all others) diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index 839a97b6326..533f9f25649 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -154,12 +154,16 @@ .shift = wshift, .invert = winvert, \ .event = wevent, .event_flags = wflags} -/* generic register modifier widget */ +/* generic widgets */ #define SND_SOC_DAPM_REG(wid, wname, wreg, wshift, wmask, won_val, woff_val) \ { .id = wid, .name = wname, .kcontrols = NULL, .num_kcontrols = 0, \ .reg = -((wreg) + 1), .shift = wshift, .mask = wmask, \ .on_val = won_val, .off_val = woff_val, .event = dapm_reg_event, \ .event_flags = SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD} +#define SND_SOC_DAPM_SUPPLY(wname, wreg, wshift, winvert, wevent, wflags) \ +{ .id = snd_soc_dapm_supply, .name = wname, .reg = wreg, \ + .shift = wshift, .invert = winvert, .event = wevent, \ + .event_flags = wflags} /* dapm kcontrol types */ #define SOC_DAPM_SINGLE(xname, reg, shift, max, invert) \ @@ -308,6 +312,7 @@ enum snd_soc_dapm_type { snd_soc_dapm_vmid, /* codec bias/vmid - to minimise pops */ snd_soc_dapm_pre, /* machine specific pre widget - exec first */ snd_soc_dapm_post, /* machine specific post widget - exec last */ + snd_soc_dapm_supply, /* power/clock supply */ }; /* diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index d3d17354e76..7847f80e96d 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -52,17 +52,19 @@ /* dapm power sequences - make this per codec in the future */ static int dapm_up_seq[] = { - snd_soc_dapm_pre, snd_soc_dapm_micbias, snd_soc_dapm_mic, - snd_soc_dapm_mux, snd_soc_dapm_value_mux, snd_soc_dapm_dac, - snd_soc_dapm_mixer, snd_soc_dapm_mixer_named_ctl, snd_soc_dapm_pga, - snd_soc_dapm_adc, snd_soc_dapm_hp, snd_soc_dapm_spk, snd_soc_dapm_post + snd_soc_dapm_pre, snd_soc_dapm_supply, snd_soc_dapm_micbias, + snd_soc_dapm_mic, snd_soc_dapm_mux, snd_soc_dapm_value_mux, + snd_soc_dapm_dac, snd_soc_dapm_mixer, snd_soc_dapm_mixer_named_ctl, + snd_soc_dapm_pga, snd_soc_dapm_adc, snd_soc_dapm_hp, snd_soc_dapm_spk, + snd_soc_dapm_post }; static int dapm_down_seq[] = { snd_soc_dapm_pre, snd_soc_dapm_adc, snd_soc_dapm_hp, snd_soc_dapm_spk, snd_soc_dapm_pga, snd_soc_dapm_mixer_named_ctl, snd_soc_dapm_mixer, snd_soc_dapm_dac, snd_soc_dapm_mic, snd_soc_dapm_micbias, - snd_soc_dapm_mux, snd_soc_dapm_value_mux, snd_soc_dapm_post + snd_soc_dapm_mux, snd_soc_dapm_value_mux, snd_soc_dapm_supply, + snd_soc_dapm_post }; static int dapm_status = 1; @@ -165,6 +167,7 @@ static void dapm_set_path_status(struct snd_soc_dapm_widget *w, case snd_soc_dapm_dac: case snd_soc_dapm_micbias: case snd_soc_dapm_vmid: + case snd_soc_dapm_supply: p->connect = 1; break; /* does effect routing - dynamically connected */ @@ -435,6 +438,9 @@ static int is_connected_output_ep(struct snd_soc_dapm_widget *widget) struct snd_soc_dapm_path *path; int con = 0; + if (widget->id == snd_soc_dapm_supply) + return 0; + if (widget->id == snd_soc_dapm_adc && widget->active) return 1; @@ -471,6 +477,9 @@ static int is_connected_input_ep(struct snd_soc_dapm_widget *widget) struct snd_soc_dapm_path *path; int con = 0; + if (widget->id == snd_soc_dapm_supply) + return 0; + /* active stream ? */ if (widget->id == snd_soc_dapm_dac && widget->active) return 1; @@ -622,6 +631,26 @@ static int dapm_dac_check_power(struct snd_soc_dapm_widget *w) } } +/* Check to see if a power supply is needed */ +static int dapm_supply_check_power(struct snd_soc_dapm_widget *w) +{ + struct snd_soc_dapm_path *path; + int power = 0; + + /* Check if one of our outputs is connected */ + list_for_each_entry(path, &w->sinks, list_source) { + if (path->sink && path->sink->power_check && + path->sink->power_check(path->sink)) { + power = 1; + break; + } + } + + dapm_clear_walk(w->codec); + + return power; +} + /* * Scan a single DAPM widget for a complete audio path and update the * power status appropriately. @@ -752,6 +781,7 @@ static void dbg_dump_dapm(struct snd_soc_codec* codec, const char *action) case snd_soc_dapm_pga: case snd_soc_dapm_mixer: case snd_soc_dapm_mixer_named_ctl: + case snd_soc_dapm_supply: if (w->name) { in = is_connected_input_ep(w); dapm_clear_walk(w->codec); @@ -880,6 +910,7 @@ static ssize_t dapm_widget_show(struct device *dev, case snd_soc_dapm_pga: case snd_soc_dapm_mixer: case snd_soc_dapm_mixer_named_ctl: + case snd_soc_dapm_supply: if (w->name) count += sprintf(buf + count, "%s: %s\n", w->name, w->power ? "On":"Off"); @@ -1044,6 +1075,7 @@ static int snd_soc_dapm_add_route(struct snd_soc_codec *codec, case snd_soc_dapm_vmid: case snd_soc_dapm_pre: case snd_soc_dapm_post: + case snd_soc_dapm_supply: list_add(&path->list, &codec->dapm_paths); list_add(&path->list_sink, &wsink->sources); list_add(&path->list_source, &wsource->sinks); @@ -1164,6 +1196,8 @@ int snd_soc_dapm_new_widgets(struct snd_soc_codec *codec) case snd_soc_dapm_line: w->power_check = dapm_generic_check_power; break; + case snd_soc_dapm_supply: + w->power_check = dapm_supply_check_power; case snd_soc_dapm_vmid: case snd_soc_dapm_pre: case snd_soc_dapm_post: -- cgit v1.2.3-70-g09d2 From c1c6b14b22af0f85d05a70405dc3fba5de840c7b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 30 Mar 2009 11:32:46 +0200 Subject: rfkill: remove deprecated state constants I only did superficial review, but these constants are stupid to have and without proper warnings nobody will review the code anyway, no amount of shouting will help. Also fix wimax to use correct states. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- arch/arm/mach-pxa/tosa-bt.c | 4 ++-- drivers/net/usb/hso.c | 4 ++-- include/linux/rfkill.h | 8 -------- net/wimax/op-rfkill.c | 8 ++++---- 4 files changed, 8 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/arch/arm/mach-pxa/tosa-bt.c b/arch/arm/mach-pxa/tosa-bt.c index fb0294bd431..bde42aa2937 100644 --- a/arch/arm/mach-pxa/tosa-bt.c +++ b/arch/arm/mach-pxa/tosa-bt.c @@ -38,9 +38,9 @@ static void tosa_bt_off(struct tosa_bt_data *data) static int tosa_bt_toggle_radio(void *data, enum rfkill_state state) { pr_info("BT_RADIO going: %s\n", - state == RFKILL_STATE_ON ? "on" : "off"); + state == RFKILL_STATE_UNBLOCKED ? "on" : "off"); - if (state == RFKILL_STATE_ON) { + if (state == RFKILL_STATE_UNBLOCKED) { pr_info("TOSA_BT: going ON\n"); tosa_bt_on(data); } else { diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index f84b78d94c4..d696e5fbc17 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -2484,7 +2484,7 @@ static int add_net_device(struct hso_device *hso_dev) static int hso_radio_toggle(void *data, enum rfkill_state state) { struct hso_device *hso_dev = data; - int enabled = (state == RFKILL_STATE_ON); + int enabled = (state == RFKILL_STATE_UNBLOCKED); int rv; mutex_lock(&hso_dev->mutex); @@ -2522,7 +2522,7 @@ static void hso_create_rfkill(struct hso_device *hso_dev, snprintf(rfkn, 20, "hso-%d", interface->altsetting->desc.bInterfaceNumber); hso_net->rfkill->name = rfkn; - hso_net->rfkill->state = RFKILL_STATE_ON; + hso_net->rfkill->state = RFKILL_STATE_UNBLOCKED; hso_net->rfkill->data = hso_dev; hso_net->rfkill->toggle_radio = hso_radio_toggle; if (rfkill_register(hso_net->rfkill) < 0) { diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index 164332cbb77..e1ec7d9aa49 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -52,14 +52,6 @@ enum rfkill_state { RFKILL_STATE_MAX, /* marker for last valid state */ }; -/* - * These are DEPRECATED, drivers using them should be verified to - * comply with the rfkill usage guidelines in Documentation/rfkill.txt - * and then converted to use the new names for rfkill_state - */ -#define RFKILL_STATE_OFF RFKILL_STATE_SOFT_BLOCKED -#define RFKILL_STATE_ON RFKILL_STATE_UNBLOCKED - /** * struct rfkill - rfkill control structure. * @name: Name of the switch. diff --git a/net/wimax/op-rfkill.c b/net/wimax/op-rfkill.c index 2b75aee0421..870032faece 100644 --- a/net/wimax/op-rfkill.c +++ b/net/wimax/op-rfkill.c @@ -113,7 +113,7 @@ void wimax_report_rfkill_hw(struct wimax_dev *wimax_dev, if (state != wimax_dev->rf_hw) { wimax_dev->rf_hw = state; rfkill_state = state == WIMAX_RF_ON ? - RFKILL_STATE_OFF : RFKILL_STATE_ON; + RFKILL_STATE_UNBLOCKED : RFKILL_STATE_SOFT_BLOCKED; if (wimax_dev->rf_hw == WIMAX_RF_ON && wimax_dev->rf_sw == WIMAX_RF_ON) wimax_state = WIMAX_ST_READY; @@ -259,10 +259,10 @@ int wimax_rfkill_toggle_radio(void *data, enum rfkill_state state) d_fnstart(3, dev, "(wimax_dev %p state %u)\n", wimax_dev, state); switch (state) { - case RFKILL_STATE_ON: + case RFKILL_STATE_SOFT_BLOCKED: rf_state = WIMAX_RF_OFF; break; - case RFKILL_STATE_OFF: + case RFKILL_STATE_UNBLOCKED: rf_state = WIMAX_RF_ON; break; default: @@ -361,7 +361,7 @@ int wimax_rfkill_add(struct wimax_dev *wimax_dev) wimax_dev->rfkill = rfkill; rfkill->name = wimax_dev->name; - rfkill->state = RFKILL_STATE_OFF; + rfkill->state = RFKILL_STATE_UNBLOCKED; rfkill->data = wimax_dev; rfkill->toggle_radio = wimax_rfkill_toggle_radio; rfkill->user_claim_unsupported = 1; -- cgit v1.2.3-70-g09d2 From 621cac85297de5ba655e3430b007dd2e0da91da6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 27 Mar 2009 14:14:31 +0100 Subject: rfkill: remove user_claim stuff Almost all drivers do not support user_claim, so remove it completely and always report -EOPNOTSUPP to userspace. Since userspace cannot really drive rfkill _anyway_ (due to the odd restrictions imposed by the documentation) having this code is just pointless. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- Documentation/rfkill.txt | 16 +++++------ drivers/net/wireless/ath9k/main.c | 1 - drivers/net/wireless/b43/rfkill.c | 1 - drivers/net/wireless/b43legacy/rfkill.c | 1 - drivers/net/wireless/iwlwifi/iwl-rfkill.c | 1 - drivers/platform/x86/acer-wmi.c | 1 - drivers/platform/x86/hp-wmi.c | 3 --- drivers/platform/x86/sony-laptop.c | 4 --- drivers/platform/x86/toshiba_acpi.c | 1 - include/linux/rfkill.h | 6 ----- net/rfkill/rfkill.c | 45 +++---------------------------- net/wimax/op-rfkill.c | 1 - 12 files changed, 9 insertions(+), 72 deletions(-) (limited to 'include') diff --git a/Documentation/rfkill.txt b/Documentation/rfkill.txt index 4d3ee317a4a..40c3a3f1081 100644 --- a/Documentation/rfkill.txt +++ b/Documentation/rfkill.txt @@ -521,16 +521,12 @@ status of the system. Input devices may issue events that are related to rfkill. These are the various KEY_* events and SW_* events supported by rfkill-input.c. -******IMPORTANT****** -When rfkill-input is ACTIVE, userspace is NOT TO CHANGE THE STATE OF AN RFKILL -SWITCH IN RESPONSE TO AN INPUT EVENT also handled by rfkill-input, unless it -has set to true the user_claim attribute for that particular switch. This rule -is *absolute*; do NOT violate it. -******IMPORTANT****** - -Userspace must not assume it is the only source of control for rfkill switches. -Their state CAN and WILL change due to firmware actions, direct user actions, -and the rfkill-input EPO override for *_RFKILL_ALL. +Userspace may not change the state of an rfkill switch in response to an +input event, it should refrain from changing states entirely. + +Userspace cannot assume it is the only source of control for rfkill switches. +Their state can change due to firmware actions, direct user actions, and the +rfkill-input EPO override for *_RFKILL_ALL. When rfkill-input is not active, userspace must initiate a rfkill status change by writing to the "state" attribute in order for anything to happen. diff --git a/drivers/net/wireless/ath9k/main.c b/drivers/net/wireless/ath9k/main.c index 13d4e6756c9..0607df20e49 100644 --- a/drivers/net/wireless/ath9k/main.c +++ b/drivers/net/wireless/ath9k/main.c @@ -1267,7 +1267,6 @@ static int ath_init_sw_rfkill(struct ath_softc *sc) sc->rf_kill.rfkill->data = sc; sc->rf_kill.rfkill->toggle_radio = ath_sw_toggle_radio; sc->rf_kill.rfkill->state = RFKILL_STATE_UNBLOCKED; - sc->rf_kill.rfkill->user_claim_unsupported = 1; return 0; } diff --git a/drivers/net/wireless/b43/rfkill.c b/drivers/net/wireless/b43/rfkill.c index afad4235869..9e1d00bc24d 100644 --- a/drivers/net/wireless/b43/rfkill.c +++ b/drivers/net/wireless/b43/rfkill.c @@ -139,7 +139,6 @@ void b43_rfkill_init(struct b43_wldev *dev) rfk->rfkill->state = RFKILL_STATE_UNBLOCKED; rfk->rfkill->data = dev; rfk->rfkill->toggle_radio = b43_rfkill_soft_toggle; - rfk->rfkill->user_claim_unsupported = 1; rfk->poll_dev = input_allocate_polled_device(); if (!rfk->poll_dev) { diff --git a/drivers/net/wireless/b43legacy/rfkill.c b/drivers/net/wireless/b43legacy/rfkill.c index b32bf6a94f1..4b0c7d27a51 100644 --- a/drivers/net/wireless/b43legacy/rfkill.c +++ b/drivers/net/wireless/b43legacy/rfkill.c @@ -142,7 +142,6 @@ void b43legacy_rfkill_init(struct b43legacy_wldev *dev) rfk->rfkill->state = RFKILL_STATE_UNBLOCKED; rfk->rfkill->data = dev; rfk->rfkill->toggle_radio = b43legacy_rfkill_soft_toggle; - rfk->rfkill->user_claim_unsupported = 1; rfk->poll_dev = input_allocate_polled_device(); if (!rfk->poll_dev) { diff --git a/drivers/net/wireless/iwlwifi/iwl-rfkill.c b/drivers/net/wireless/iwlwifi/iwl-rfkill.c index 2ad9faf1508..65605ad44e4 100644 --- a/drivers/net/wireless/iwlwifi/iwl-rfkill.c +++ b/drivers/net/wireless/iwlwifi/iwl-rfkill.c @@ -91,7 +91,6 @@ int iwl_rfkill_init(struct iwl_priv *priv) priv->rfkill->data = priv; priv->rfkill->state = RFKILL_STATE_UNBLOCKED; priv->rfkill->toggle_radio = iwl_rfkill_soft_rf_kill; - priv->rfkill->user_claim_unsupported = 1; priv->rfkill->dev.class->suspend = NULL; priv->rfkill->dev.class->resume = NULL; diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c index 0f6e43bf4fc..62d02b3c998 100644 --- a/drivers/platform/x86/acer-wmi.c +++ b/drivers/platform/x86/acer-wmi.c @@ -1005,7 +1005,6 @@ enum rfkill_type type, char *name, u32 cap) *data = cap; rfkill_dev->data = data; rfkill_dev->toggle_radio = acer_rfkill_set; - rfkill_dev->user_claim_unsupported = 1; err = rfkill_register(rfkill_dev); if (err) { diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index 50d9019de2b..fe171fad12c 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -434,7 +434,6 @@ static int __init hp_wmi_bios_setup(struct platform_device *device) wifi_rfkill->name = "hp-wifi"; wifi_rfkill->state = hp_wmi_wifi_state(); wifi_rfkill->toggle_radio = hp_wmi_wifi_set; - wifi_rfkill->user_claim_unsupported = 1; err = rfkill_register(wifi_rfkill); if (err) goto add_sysfs_error; @@ -446,7 +445,6 @@ static int __init hp_wmi_bios_setup(struct platform_device *device) bluetooth_rfkill->name = "hp-bluetooth"; bluetooth_rfkill->state = hp_wmi_bluetooth_state(); bluetooth_rfkill->toggle_radio = hp_wmi_bluetooth_set; - bluetooth_rfkill->user_claim_unsupported = 1; err = rfkill_register(bluetooth_rfkill); if (err) goto register_bluetooth_error; @@ -457,7 +455,6 @@ static int __init hp_wmi_bios_setup(struct platform_device *device) wwan_rfkill->name = "hp-wwan"; wwan_rfkill->state = hp_wmi_wwan_state(); wwan_rfkill->toggle_radio = hp_wmi_wwan_set; - wwan_rfkill->user_claim_unsupported = 1; err = rfkill_register(wwan_rfkill); if (err) goto register_wwan_err; diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c index d3c92d777bd..184e99e7268 100644 --- a/drivers/platform/x86/sony-laptop.c +++ b/drivers/platform/x86/sony-laptop.c @@ -1097,7 +1097,6 @@ static int sony_nc_setup_wifi_rfkill(struct acpi_device *device) sony_wifi_rfkill->name = "sony-wifi"; sony_wifi_rfkill->toggle_radio = sony_nc_rfkill_set; sony_wifi_rfkill->get_state = sony_nc_rfkill_get; - sony_wifi_rfkill->user_claim_unsupported = 1; sony_wifi_rfkill->data = (void *)SONY_WIFI; err = rfkill_register(sony_wifi_rfkill); if (err) @@ -1119,7 +1118,6 @@ static int sony_nc_setup_bluetooth_rfkill(struct acpi_device *device) sony_bluetooth_rfkill->name = "sony-bluetooth"; sony_bluetooth_rfkill->toggle_radio = sony_nc_rfkill_set; sony_bluetooth_rfkill->get_state = sony_nc_rfkill_get; - sony_bluetooth_rfkill->user_claim_unsupported = 1; sony_bluetooth_rfkill->data = (void *)SONY_BLUETOOTH; err = rfkill_register(sony_bluetooth_rfkill); if (err) @@ -1140,7 +1138,6 @@ static int sony_nc_setup_wwan_rfkill(struct acpi_device *device) sony_wwan_rfkill->name = "sony-wwan"; sony_wwan_rfkill->toggle_radio = sony_nc_rfkill_set; sony_wwan_rfkill->get_state = sony_nc_rfkill_get; - sony_wwan_rfkill->user_claim_unsupported = 1; sony_wwan_rfkill->data = (void *)SONY_WWAN; err = rfkill_register(sony_wwan_rfkill); if (err) @@ -1161,7 +1158,6 @@ static int sony_nc_setup_wimax_rfkill(struct acpi_device *device) sony_wimax_rfkill->name = "sony-wimax"; sony_wimax_rfkill->toggle_radio = sony_nc_rfkill_set; sony_wimax_rfkill->get_state = sony_nc_rfkill_get; - sony_wimax_rfkill->user_claim_unsupported = 1; sony_wimax_rfkill->data = (void *)SONY_WIMAX; err = rfkill_register(sony_wimax_rfkill); if (err) diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c index 9f187265db8..4345089f517 100644 --- a/drivers/platform/x86/toshiba_acpi.c +++ b/drivers/platform/x86/toshiba_acpi.c @@ -803,7 +803,6 @@ static int __init toshiba_acpi_init(void) toshiba_acpi.rfk_dev->name = toshiba_acpi.bt_name; toshiba_acpi.rfk_dev->toggle_radio = bt_rfkill_toggle_radio; - toshiba_acpi.rfk_dev->user_claim_unsupported = 1; toshiba_acpi.rfk_dev->data = &toshiba_acpi; if (hci_get_bt_on(&bt_on) == HCI_SUCCESS && bt_on) { diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index e1ec7d9aa49..de18ef227e0 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -58,9 +58,6 @@ enum rfkill_state { * @type: Radio type which the button controls, the value stored * here should be a value from enum rfkill_type. * @state: State of the switch, "UNBLOCKED" means radio can operate. - * @user_claim_unsupported: Whether the hardware supports exclusive - * RF-kill control by userspace. Set this before registering. - * @user_claim: Set when the switch is controlled exlusively by userspace. * @mutex: Guards switch state transitions. It serializes callbacks * and also protects the state. * @data: Pointer to the RF button drivers private data which will be @@ -83,9 +80,6 @@ struct rfkill { const char *name; enum rfkill_type type; - bool user_claim_unsupported; - bool user_claim; - /* the mutex serializes callbacks and also protects * the state */ struct mutex mutex; diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c index 3eaa39403c1..df1269c5ca7 100644 --- a/net/rfkill/rfkill.c +++ b/net/rfkill/rfkill.c @@ -200,7 +200,7 @@ static void __rfkill_switch_all(const enum rfkill_type type, rfkill_global_states[type].current_state = state; list_for_each_entry(rfkill, &rfkill_list, node) { - if ((!rfkill->user_claim) && (rfkill->type == type)) { + if (rfkill->type == type) { mutex_lock(&rfkill->mutex); rfkill_toggle_radio(rfkill, state, 0); mutex_unlock(&rfkill->mutex); @@ -447,53 +447,14 @@ static ssize_t rfkill_claim_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct rfkill *rfkill = to_rfkill(dev); - - return sprintf(buf, "%d\n", rfkill->user_claim); + return sprintf(buf, "%d\n", 0); } static ssize_t rfkill_claim_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct rfkill *rfkill = to_rfkill(dev); - unsigned long claim_tmp; - bool claim; - int error; - - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - if (rfkill->user_claim_unsupported) - return -EOPNOTSUPP; - - error = strict_strtoul(buf, 0, &claim_tmp); - if (error) - return error; - claim = !!claim_tmp; - - /* - * Take the global lock to make sure the kernel is not in - * the middle of rfkill_switch_all - */ - error = mutex_lock_killable(&rfkill_global_mutex); - if (error) - return error; - - if (rfkill->user_claim != claim) { - if (!claim && !rfkill_epo_lock_active) { - mutex_lock(&rfkill->mutex); - rfkill_toggle_radio(rfkill, - rfkill_global_states[rfkill->type].current_state, - 0); - mutex_unlock(&rfkill->mutex); - } - rfkill->user_claim = claim; - } - - mutex_unlock(&rfkill_global_mutex); - - return error ? error : count; + return -EOPNOTSUPP; } static struct device_attribute rfkill_dev_attrs[] = { diff --git a/net/wimax/op-rfkill.c b/net/wimax/op-rfkill.c index 870032faece..a3616e2ccb8 100644 --- a/net/wimax/op-rfkill.c +++ b/net/wimax/op-rfkill.c @@ -364,7 +364,6 @@ int wimax_rfkill_add(struct wimax_dev *wimax_dev) rfkill->state = RFKILL_STATE_UNBLOCKED; rfkill->data = wimax_dev; rfkill->toggle_radio = wimax_rfkill_toggle_radio; - rfkill->user_claim_unsupported = 1; /* Initialize the input device for the hw key */ input_dev = input_allocate_device(); -- cgit v1.2.3-70-g09d2 From 07f62d01c1f476a3b328a44faafdfd103a4dedc3 Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Fri, 27 Mar 2009 22:20:27 +0800 Subject: cfg80211: remove duplicated #include Remove duplicated #include in include/net/cfg80211.h. Signed-off-by: Huang Weiyi Signed-off-by: John W. Linville --- include/net/cfg80211.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 5389afdc129..d1b8f0d53c5 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -10,7 +10,6 @@ #include #include /* remove once we remove the wext stuff */ -#include /* * 802.11 configuration in-kernel interface -- cgit v1.2.3-70-g09d2 From 53b46b8444f600cc1744521ea096ea0c5d494dd0 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Fri, 27 Mar 2009 20:53:56 +0200 Subject: nl80211: Generate deauth/disassoc event for locally generated frames Previously, nl80211 mlme events were generated only for received deauthentication and disassociation frames. We need to do the same for locally generated ones in order to let applications know that we disconnected (e.g., when AP does not reply to a probe). Rename the nl80211 and cfg80211 functions (s/rx_//) to make it clearer that they are used for both received and locally generated frames. Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- include/net/cfg80211.h | 16 ++++++++-------- net/mac80211/mlme.c | 8 ++++++-- net/wireless/mlme.c | 13 ++++++------- net/wireless/nl80211.c | 11 +++++------ net/wireless/nl80211.h | 12 ++++++------ 5 files changed, 31 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d1b8f0d53c5..ec33096fc65 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -917,28 +917,28 @@ void cfg80211_send_rx_auth(struct net_device *dev, const u8 *buf, size_t len); void cfg80211_send_rx_assoc(struct net_device *dev, const u8 *buf, size_t len); /** - * cfg80211_send_rx_deauth - notification of processed deauthentication + * cfg80211_send_deauth - notification of processed deauthentication * @dev: network device * @buf: deauthentication frame (header + body) * @len: length of the frame data * * This function is called whenever deauthentication has been processed in - * station mode. + * station mode. This includes both received deauthentication frames and + * locally generated ones. */ -void cfg80211_send_rx_deauth(struct net_device *dev, const u8 *buf, - size_t len); +void cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len); /** - * cfg80211_send_rx_disassoc - notification of processed disassociation + * cfg80211_send_disassoc - notification of processed disassociation * @dev: network device * @buf: disassociation response frame (header + body) * @len: length of the frame data * * This function is called whenever disassociation has been processed in - * station mode. + * station mode. This includes both received disassociation frames and locally + * generated ones. */ -void cfg80211_send_rx_disassoc(struct net_device *dev, const u8 *buf, - size_t len); +void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len); /** * cfg80211_hold_bss - exclude bss from expiration diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 132938b073d..08db02c237c 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -325,6 +325,10 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, /* u.deauth.reason_code == u.disassoc.reason_code */ mgmt->u.deauth.reason_code = cpu_to_le16(reason); + if (stype == IEEE80211_STYPE_DEAUTH) + cfg80211_send_deauth(sdata->dev, (u8 *) mgmt, skb->len); + else + cfg80211_send_disassoc(sdata->dev, (u8 *) mgmt, skb->len); ieee80211_tx_skb(sdata, skb, ifmgd->flags & IEEE80211_STA_MFP_ENABLED); } @@ -1187,7 +1191,7 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, true, false, 0); ifmgd->flags &= ~IEEE80211_STA_AUTHENTICATED; - cfg80211_send_rx_deauth(sdata->dev, (u8 *) mgmt, len); + cfg80211_send_deauth(sdata->dev, (u8 *) mgmt, len); } @@ -1218,7 +1222,7 @@ static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, } ieee80211_set_disassoc(sdata, false, false, reason_code); - cfg80211_send_rx_disassoc(sdata->dev, (u8 *) mgmt, len); + cfg80211_send_disassoc(sdata->dev, (u8 *) mgmt, len); } diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index bec5721b6f9..33ff7cca496 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -28,19 +28,18 @@ void cfg80211_send_rx_assoc(struct net_device *dev, const u8 *buf, size_t len) } EXPORT_SYMBOL(cfg80211_send_rx_assoc); -void cfg80211_send_rx_deauth(struct net_device *dev, const u8 *buf, size_t len) +void cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len) { struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - nl80211_send_rx_deauth(rdev, dev, buf, len); + nl80211_send_deauth(rdev, dev, buf, len); } -EXPORT_SYMBOL(cfg80211_send_rx_deauth); +EXPORT_SYMBOL(cfg80211_send_deauth); -void cfg80211_send_rx_disassoc(struct net_device *dev, const u8 *buf, - size_t len) +void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len) { struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - nl80211_send_rx_disassoc(rdev, dev, buf, len); + nl80211_send_disassoc(rdev, dev, buf, len); } -EXPORT_SYMBOL(cfg80211_send_rx_disassoc); +EXPORT_SYMBOL(cfg80211_send_disassoc); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c04df6a6af7..195424eee77 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3415,17 +3415,16 @@ void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev, nl80211_send_mlme_event(rdev, netdev, buf, len, NL80211_CMD_ASSOCIATE); } -void nl80211_send_rx_deauth(struct cfg80211_registered_device *rdev, - struct net_device *netdev, const u8 *buf, - size_t len) +void nl80211_send_deauth(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *buf, size_t len) { nl80211_send_mlme_event(rdev, netdev, buf, len, NL80211_CMD_DEAUTHENTICATE); } -void nl80211_send_rx_disassoc(struct cfg80211_registered_device *rdev, - struct net_device *netdev, const u8 *buf, - size_t len) +void nl80211_send_disassoc(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *buf, + size_t len) { nl80211_send_mlme_event(rdev, netdev, buf, len, NL80211_CMD_DISASSOCIATE); diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index b77af4ab80b..55686fc264f 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -17,11 +17,11 @@ extern void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev, extern void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *buf, size_t len); -extern void nl80211_send_rx_deauth(struct cfg80211_registered_device *rdev, - struct net_device *netdev, - const u8 *buf, size_t len); -extern void nl80211_send_rx_disassoc(struct cfg80211_registered_device *rdev, - struct net_device *netdev, - const u8 *buf, size_t len); +extern void nl80211_send_deauth(struct cfg80211_registered_device *rdev, + struct net_device *netdev, + const u8 *buf, size_t len); +extern void nl80211_send_disassoc(struct cfg80211_registered_device *rdev, + struct net_device *netdev, + const u8 *buf, size_t len); #endif /* __NET_WIRELESS_NL80211_H */ -- cgit v1.2.3-70-g09d2 From a3b8b0569fbef725597f05278ec58083321f6e9d Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Fri, 27 Mar 2009 21:59:49 +0200 Subject: nl80211: Add Michael MIC failure event Define a new nl80211 event, NL80211_CMD_MICHAEL_MIC_FAILURE, to be used to notify user space about locally detected Michael MIC failures. This matches with the MLME-MICHAELMICFAILURE.indication() primitive. Since we do not actually have TSC in the skb anymore when mac80211_ev_michael_mic_failure() is called, that function is changed to take in the TSC as an optional parameter instead of as a requirement to include the TSC after the hdr field (which we did not really follow). For now, TSC is not included in the events from mac80211, but it could be added at some point. Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- include/linux/nl80211.h | 28 ++++++++++++++++++++++++++++ include/net/cfg80211.h | 16 ++++++++++++++++ net/mac80211/event.c | 17 +++++++++-------- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/rx.c | 2 +- net/mac80211/wpa.c | 2 +- net/wireless/mlme.c | 10 ++++++++++ net/wireless/nl80211.c | 40 ++++++++++++++++++++++++++++++++++++++++ net/wireless/nl80211.h | 5 +++++ 9 files changed, 111 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index cbe8ce3bf48..27f230f063b 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -199,6 +199,14 @@ * NL80211_CMD_AUTHENTICATE but for Disassociation frames (similar to * MLME-DISASSOCIATE.request and MLME-DISASSOCIATE.indication primitives). * + * @NL80211_CMD_MICHAEL_MIC_FAILURE: notification of a locally detected Michael + * MIC (part of TKIP) failure; sent on the "mlme" multicast group; the + * event includes %NL80211_ATTR_MAC to describe the source MAC address of + * the frame with invalid MIC, %NL80211_ATTR_KEY_TYPE to show the key + * type, %NL80211_ATTR_KEY_IDX to indicate the key identifier, and + * %NL80211_ATTR_KEY_SEQ to indicate the TSC value of the frame; this + * event matches with MLME-MICHAELMICFAILURE.indication() primitive + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -260,6 +268,8 @@ enum nl80211_commands { NL80211_CMD_DEAUTHENTICATE, NL80211_CMD_DISASSOCIATE, + NL80211_CMD_MICHAEL_MIC_FAILURE, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -408,6 +418,9 @@ enum nl80211_commands { * @NL80211_ATTR_REASON_CODE: ReasonCode for %NL80211_CMD_DEAUTHENTICATE and * %NL80211_CMD_DISASSOCIATE, u16 * + * @NL80211_ATTR_KEY_TYPE: Key Type, see &enum nl80211_key_type, represented as + * a u32 + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -492,6 +505,8 @@ enum nl80211_attrs { NL80211_ATTR_AUTH_TYPE, NL80211_ATTR_REASON_CODE, + NL80211_ATTR_KEY_TYPE, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -1062,4 +1077,17 @@ enum nl80211_auth_type { NL80211_AUTHTYPE_FT, NL80211_AUTHTYPE_NETWORK_EAP, }; + +/** + * enum nl80211_key_type - Key Type + * @NL80211_KEYTYPE_GROUP: Group (broadcast/multicast) key + * @NL80211_KEYTYPE_PAIRWISE: Pairwise (unicast/individual) key + * @NL80211_KEYTYPE_PEERKEY: PeerKey (DLS) + */ +enum nl80211_key_type { + NL80211_KEYTYPE_GROUP, + NL80211_KEYTYPE_PAIRWISE, + NL80211_KEYTYPE_PEERKEY, +}; + #endif /* __LINUX_NL80211_H */ diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index ec33096fc65..f8bf0c86650 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -957,4 +957,20 @@ void cfg80211_hold_bss(struct cfg80211_bss *bss); */ void cfg80211_unhold_bss(struct cfg80211_bss *bss); +/** + * cfg80211_michael_mic_failure - notification of Michael MIC failure (TKIP) + * @dev: network device + * @addr: The source MAC address of the frame + * @key_type: The key type that the received frame used + * @key_id: Key identifier (0..3) + * @tsc: The TSC value of the frame that generated the MIC failure (6 octets) + * + * This function is called whenever the local MAC detects a MIC failure in a + * received frame. This matches with MLME-MICHAELMICFAILURE.indication() + * primitive. + */ +void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr, + enum nl80211_key_type key_type, int key_id, + const u8 *tsc); + #endif /* __NET_CFG80211_H */ diff --git a/net/mac80211/event.c b/net/mac80211/event.c index 0d95561c0ee..f288d01a634 100644 --- a/net/mac80211/event.c +++ b/net/mac80211/event.c @@ -12,12 +12,12 @@ #include "ieee80211_i.h" /* - * indicate a failed Michael MIC to userspace; the passed packet - * (in the variable hdr) must be long enough to extract the TKIP - * fields like TSC + * Indicate a failed Michael MIC to userspace. If the caller knows the TSC of + * the frame that generated the MIC failure (i.e., if it was provided by the + * driver or is still in the frame), it should provide that information. */ void mac80211_ev_michael_mic_failure(struct ieee80211_sub_if_data *sdata, int keyidx, - struct ieee80211_hdr *hdr) + struct ieee80211_hdr *hdr, const u8 *tsc) { union iwreq_data wrqu; char *buf = kmalloc(128, GFP_ATOMIC); @@ -34,8 +34,9 @@ void mac80211_ev_michael_mic_failure(struct ieee80211_sub_if_data *sdata, int ke kfree(buf); } - /* - * TODO: re-add support for sending MIC failure indication - * with all info via nl80211 - */ + cfg80211_michael_mic_failure(sdata->dev, hdr->addr2, + (hdr->addr1[0] & 0x01) ? + NL80211_KEYTYPE_GROUP : + NL80211_KEYTYPE_PAIRWISE, + keyidx, tsc); } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index e6ed78cb16b..312347d102c 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1060,7 +1060,7 @@ u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len, int ieee80211_frame_duration(struct ieee80211_local *local, size_t len, int rate, int erp, int short_preamble); void mac80211_ev_michael_mic_failure(struct ieee80211_sub_if_data *sdata, int keyidx, - struct ieee80211_hdr *hdr); + struct ieee80211_hdr *hdr, const u8 *tsc); void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata); void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, int encrypt); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 5fa7aedd90e..19c4b4589fe 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1932,7 +1932,7 @@ static void ieee80211_rx_michael_mic_report(struct net_device *dev, !ieee80211_is_auth(hdr->frame_control)) goto ignore; - mac80211_ev_michael_mic_failure(rx->sdata, keyidx, hdr); + mac80211_ev_michael_mic_failure(rx->sdata, keyidx, hdr, NULL); ignore: dev_kfree_skb(rx->skb); rx->skb = NULL; diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 4f8bfea278f..dcfae8884b8 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -122,7 +122,7 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) return RX_DROP_UNUSABLE; mac80211_ev_michael_mic_failure(rx->sdata, rx->key->conf.keyidx, - (void *) skb->data); + (void *) skb->data, NULL); return RX_DROP_UNUSABLE; } diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 33ff7cca496..1407244a647 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -43,3 +43,13 @@ void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len) nl80211_send_disassoc(rdev, dev, buf, len); } EXPORT_SYMBOL(cfg80211_send_disassoc); + +void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr, + enum nl80211_key_type key_type, int key_id, + const u8 *tsc) +{ + struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + nl80211_michael_mic_failure(rdev, dev, addr, key_type, key_id, tsc); +} +EXPORT_SYMBOL(cfg80211_michael_mic_failure); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 195424eee77..1394115cde9 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3430,6 +3430,46 @@ void nl80211_send_disassoc(struct cfg80211_registered_device *rdev, NL80211_CMD_DISASSOCIATE); } +void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *addr, + enum nl80211_key_type key_type, int key_id, + const u8 *tsc) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_MICHAEL_MIC_FAILURE); + if (!hdr) { + nlmsg_free(msg); + return; + } + + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx); + NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex); + if (addr) + NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, addr); + NLA_PUT_U32(msg, NL80211_ATTR_KEY_TYPE, key_type); + NLA_PUT_U8(msg, NL80211_ATTR_KEY_IDX, key_id); + if (tsc) + NLA_PUT(msg, NL80211_ATTR_KEY_SEQ, 6, tsc); + + if (genlmsg_end(msg, hdr) < 0) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, GFP_KERNEL); + return; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); +} + /* initialisation/exit functions */ int nl80211_init(void) diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 55686fc264f..e4b92cccd15 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -23,5 +23,10 @@ extern void nl80211_send_deauth(struct cfg80211_registered_device *rdev, extern void nl80211_send_disassoc(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *buf, size_t len); +extern void +nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *addr, + enum nl80211_key_type key_type, + int key_id, const u8 *tsc); #endif /* __NET_WIRELESS_NL80211_H */ -- cgit v1.2.3-70-g09d2 From 06aa7afaaa21a4e7f1bcb196bd3f29193924a603 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Thu, 26 Mar 2009 23:40:09 +0200 Subject: cfg80211: add cfg80211_inform_bss Added cfg80211_inform_bss() for full-mac devices to use. Signed-off-by: Jussi Kivilinna Signed-off-by: John W. Linville --- include/net/cfg80211.h | 8 ++++++++ net/wireless/scan.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f8bf0c86650..2b1f6c69773 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -863,6 +863,14 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy, struct ieee80211_mgmt *mgmt, size_t len, s32 signal, gfp_t gfp); +struct cfg80211_bss* +cfg80211_inform_bss(struct wiphy *wiphy, + struct ieee80211_channel *channel, + const u8 *bssid, + u64 timestamp, u16 capability, u16 beacon_interval, + const u8 *ie, size_t ielen, + s32 signal, gfp_t gfp); + struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy, struct ieee80211_channel *channel, const u8 *bssid, diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 2ae65b39b52..1396248dc5c 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -414,6 +414,55 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, return found; } +struct cfg80211_bss* +cfg80211_inform_bss(struct wiphy *wiphy, + struct ieee80211_channel *channel, + const u8 *bssid, + u64 timestamp, u16 capability, u16 beacon_interval, + const u8 *ie, size_t ielen, + s32 signal, gfp_t gfp) +{ + struct cfg80211_internal_bss *res; + size_t privsz; + + if (WARN_ON(!wiphy)) + return NULL; + + privsz = wiphy->bss_priv_size; + + if (WARN_ON(wiphy->signal_type == NL80211_BSS_SIGNAL_UNSPEC && + (signal < 0 || signal > 100))) + return NULL; + + res = kzalloc(sizeof(*res) + privsz + ielen, gfp); + if (!res) + return NULL; + + memcpy(res->pub.bssid, bssid, ETH_ALEN); + res->pub.channel = channel; + res->pub.signal = signal; + res->pub.tsf = timestamp; + res->pub.beacon_interval = beacon_interval; + res->pub.capability = capability; + /* point to after the private area */ + res->pub.information_elements = (u8 *)res + sizeof(*res) + privsz; + memcpy(res->pub.information_elements, ie, ielen); + res->pub.len_information_elements = ielen; + + kref_init(&res->ref); + + res = cfg80211_bss_update(wiphy_to_dev(wiphy), res, 0); + if (!res) + return NULL; + + if (res->pub.capability & WLAN_CAPABILITY_ESS) + regulatory_hint_found_beacon(wiphy, channel, gfp); + + /* cfg80211_bss_update gives us a referenced result */ + return &res->pub; +} +EXPORT_SYMBOL(cfg80211_inform_bss); + struct cfg80211_bss * cfg80211_inform_bss_frame(struct wiphy *wiphy, struct ieee80211_channel *channel, -- cgit v1.2.3-70-g09d2 From 18a8365992a8041aa178ae9ad5f0d951d0457230 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 31 Mar 2009 12:12:05 +0200 Subject: cfg80211: introduce scan IE limit attribute This patch introduces a new attribute for a wiphy that tells userspace how long the information elements added to a probe request frame can be at most. It also updates the at76 to advertise that it cannot support that, and, for now until I can fix that, iwlwifi too. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/at76c50x-usb.c | 1 + drivers/net/wireless/iwlwifi/iwl-core.c | 1 + include/linux/nl80211.h | 4 ++++ include/net/wireless.h | 1 + net/mac80211/main.c | 13 ++++++++++++- net/mac80211/util.c | 2 ++ net/wireless/nl80211.c | 7 +++++++ 7 files changed, 28 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/net/wireless/at76c50x-usb.c b/drivers/net/wireless/at76c50x-usb.c index 69248ded510..55f947ac56d 100644 --- a/drivers/net/wireless/at76c50x-usb.c +++ b/drivers/net/wireless/at76c50x-usb.c @@ -2250,6 +2250,7 @@ static int at76_init_new_device(struct at76_priv *priv, /* mac80211 initialisation */ priv->hw->wiphy->max_scan_ssids = 1; + priv->hw->wiphy->max_scan_ie_len = 0; priv->hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION); priv->hw->wiphy->bands[IEEE80211_BAND_2GHZ] = &at76_supported_band; priv->hw->flags = IEEE80211_HW_RX_INCLUDES_FCS | diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c index 82abb1f9087..ef55f91374a 100644 --- a/drivers/net/wireless/iwlwifi/iwl-core.c +++ b/drivers/net/wireless/iwlwifi/iwl-core.c @@ -1306,6 +1306,7 @@ int iwl_setup_mac(struct iwl_priv *priv) hw->wiphy->custom_regulatory = true; hw->wiphy->max_scan_ssids = 1; + hw->wiphy->max_scan_ie_len = 0; /* XXX for now */ /* Default value; 4 EDCA QOS priorities */ hw->queues = 4; diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 27f230f063b..209cacee528 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -390,6 +390,8 @@ enum nl80211_commands { * * @NL80211_ATTR_MAX_NUM_SCAN_SSIDS: number of SSIDs you can scan with * a single scan request, a wiphy attribute. + * @NL80211_ATTR_MAX_SCAN_IE_LEN: maximum length of information elements + * that can be added to a scan request * * @NL80211_ATTR_SCAN_FREQUENCIES: nested attribute with frequencies (in MHz) * @NL80211_ATTR_SCAN_SSIDS: nested attribute with SSIDs, leave out for passive @@ -507,6 +509,8 @@ enum nl80211_attrs { NL80211_ATTR_KEY_TYPE, + NL80211_ATTR_MAX_SCAN_IE_LEN, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/net/wireless.h b/include/net/wireless.h index 64a76208580..2bcdeda46d8 100644 --- a/include/net/wireless.h +++ b/include/net/wireless.h @@ -222,6 +222,7 @@ struct wiphy { int bss_priv_size; u8 max_scan_ssids; + u16 max_scan_ie_len; /* If multiple wiphys are registered and you're handed e.g. * a regular netdev with assigned ieee80211_ptr, you won't diff --git a/net/mac80211/main.c b/net/mac80211/main.c index fbcbed6cad0..ee58a787369 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -728,7 +728,18 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, return NULL; wiphy->privid = mac80211_wiphy_privid; - wiphy->max_scan_ssids = 4; + + if (!ops->hw_scan) { + /* For hw_scan, driver needs to set these up. */ + wiphy->max_scan_ssids = 4; + + /* we support a maximum of 32 rates in cfg80211 */ + wiphy->max_scan_ie_len = IEEE80211_MAX_DATA_LEN + - 2 - 32 /* SSID */ + - 4 - 32 /* (ext) supp rates */; + + } + /* Yes, putting cfg80211_bss into ieee80211_bss is a hack */ wiphy->bss_priv_size = sizeof(struct ieee80211_bss) - sizeof(struct cfg80211_bss); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index fdf432f1455..05caf34f31d 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -890,6 +890,8 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, *pos = rate->bitrate / 5; } + /* if adding more here, adjust max_scan_ie_len */ + if (ie) memcpy(skb_put(skb, ie_len), ie, ie_len); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 1394115cde9..447fa1790b4 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -181,6 +181,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, NLA_PUT_STRING(msg, NL80211_ATTR_WIPHY_NAME, wiphy_name(&dev->wiphy)); NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS, dev->wiphy.max_scan_ssids); + NLA_PUT_U16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN, + dev->wiphy.max_scan_ie_len); nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES); if (!nl_modes) @@ -2528,6 +2530,11 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) else ie_len = 0; + if (ie_len > wiphy->max_scan_ie_len) { + err = -EINVAL; + goto out; + } + request = kzalloc(sizeof(*request) + sizeof(*ssid) * n_ssids + sizeof(channel) * n_channels -- cgit v1.2.3-70-g09d2 From de95a54b1aebe5592cae971ca5e5d9ec6a381a17 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 1 Apr 2009 11:58:36 +0200 Subject: mac80211: pass all probe request IEs to driver Instead of just passing the cfg80211-requested IEs, pass the locally generated ones as well. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 2 +- include/net/mac80211.h | 13 +++++--- net/mac80211/ieee80211_i.h | 9 ++++-- net/mac80211/main.c | 49 +++++++++++++++++++--------- net/mac80211/scan.c | 24 +++++++++++++- net/mac80211/util.c | 79 ++++++++++++++++++++++++++++------------------ net/wireless/nl80211.c | 3 +- 7 files changed, 123 insertions(+), 56 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 2b1f6c69773..d303c269a69 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -503,7 +503,7 @@ struct cfg80211_scan_request { int n_ssids; struct ieee80211_channel **channels; u32 n_channels; - u8 *ie; + const u8 *ie; size_t ie_len; /* internal */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 3b83a80e3fe..2c6f976831b 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1330,11 +1330,14 @@ enum ieee80211_ampdu_mlme_action { * the scan state machine in stack. The scan must honour the channel * configuration done by the regulatory agent in the wiphy's * registered bands. The hardware (or the driver) needs to make sure - * that power save is disabled. When the scan finishes, - * ieee80211_scan_completed() must be called; note that it also must - * be called when the scan cannot finish because the hardware is - * turned off! Anything else is a bug! Returns a negative error code - * which will be seen in userspace. + * that power save is disabled. + * The @req ie/ie_len members are rewritten by mac80211 to contain the + * entire IEs after the SSID, so that drivers need not look at these + * at all but just send them after the SSID -- mac80211 includes the + * (extended) supported rates and HT information (where applicable). + * When the scan finishes, ieee80211_scan_completed() must be called; + * note that it also must be called when the scan cannot finish due to + * any error unless this callback returned a negative error code. * * @sw_scan_start: Notifier function that is called just before a software scan * is started. Can be NULL, if the driver doesn't need this notification. diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 73d9f894ed5..cb80a80504e 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -671,7 +671,10 @@ struct ieee80211_local { struct cfg80211_scan_request int_scan_req; struct cfg80211_scan_request *scan_req; struct ieee80211_channel *scan_channel; + const u8 *orig_ies; + int orig_ies_len; int scan_channel_idx; + int scan_ies_len; enum { SCAN_SET_CHANNEL, SCAN_SEND_PROBE } scan_state; unsigned long last_scan_completed; @@ -1090,9 +1093,11 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, u16 transaction, u16 auth_alg, u8 *extra, size_t extra_len, const u8 *bssid, int encrypt); +int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, + const u8 *ie, size_t ie_len); void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, - u8 *ssid, size_t ssid_len, - u8 *ie, size_t ie_len); + const u8 *ssid, size_t ssid_len, + const u8 *ie, size_t ie_len); void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata, const size_t supp_rates_len, diff --git a/net/mac80211/main.c b/net/mac80211/main.c index ee58a787369..b3bbe78821d 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -729,22 +729,12 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, wiphy->privid = mac80211_wiphy_privid; - if (!ops->hw_scan) { - /* For hw_scan, driver needs to set these up. */ - wiphy->max_scan_ssids = 4; - - /* we support a maximum of 32 rates in cfg80211 */ - wiphy->max_scan_ie_len = IEEE80211_MAX_DATA_LEN - - 2 - 32 /* SSID */ - - 4 - 32 /* (ext) supp rates */; - - } - /* Yes, putting cfg80211_bss into ieee80211_bss is a hack */ wiphy->bss_priv_size = sizeof(struct ieee80211_bss) - sizeof(struct cfg80211_bss); local = wiphy_priv(wiphy); + local->hw.wiphy = wiphy; local->hw.priv = (char *)local + @@ -831,7 +821,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) enum ieee80211_band band; struct net_device *mdev; struct ieee80211_master_priv *mpriv; - int channels, i, j; + int channels, i, j, max_bitrates; /* * generic code guarantees at least one band, @@ -839,18 +829,23 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) * that hw.conf.channel is assigned */ channels = 0; + max_bitrates = 0; for (band = 0; band < IEEE80211_NUM_BANDS; band++) { struct ieee80211_supported_band *sband; sband = local->hw.wiphy->bands[band]; - if (sband && !local->oper_channel) { + if (!sband) + continue; + if (!local->oper_channel) { /* init channel we're on */ local->hw.conf.channel = local->oper_channel = local->scan_channel = &sband->channels[0]; } - if (sband) - channels += sband->n_channels; + channels += sband->n_channels; + + if (max_bitrates < sband->n_bitrates) + max_bitrates = sband->n_bitrates; } local->int_scan_req.n_channels = channels; @@ -870,6 +865,30 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC) local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_UNSPEC; + /* + * Calculate scan IE length -- we need this to alloc + * memory and to subtract from the driver limit. It + * includes the (extended) supported rates and HT + * information -- SSID is the driver's responsibility. + */ + local->scan_ies_len = 4 + max_bitrates; /* (ext) supp rates */ + + if (!local->ops->hw_scan) { + /* For hw_scan, driver needs to set these up. */ + local->hw.wiphy->max_scan_ssids = 4; + local->hw.wiphy->max_scan_ie_len = IEEE80211_MAX_DATA_LEN; + } + + /* + * If the driver supports any scan IEs, then assume the + * limit includes the IEs mac80211 will add, otherwise + * leave it at zero and let the driver sort it out; we + * still pass our IEs to the driver but userspace will + * not be allowed to in that case. + */ + if (local->hw.wiphy->max_scan_ie_len) + local->hw.wiphy->max_scan_ie_len -= local->scan_ies_len; + result = wiphy_register(local->hw.wiphy); if (result < 0) goto fail_wiphy_register; diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 3bf9839f591..4ec1bfc7f6a 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -285,6 +285,12 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) if (WARN_ON(!local->scan_req)) return; + if (local->hw_scanning) { + kfree(local->scan_req->ie); + local->scan_req->ie = local->orig_ies; + local->scan_req->ie_len = local->orig_ies_len; + } + if (local->scan_req != &local->int_scan_req) cfg80211_scan_done(local->scan_req, aborted); local->scan_req = NULL; @@ -457,12 +463,28 @@ int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata, } if (local->ops->hw_scan) { - int rc; + u8 *ies; + int rc, ielen; + + ies = kmalloc(2 + IEEE80211_MAX_SSID_LEN + + local->scan_ies_len + req->ie_len, GFP_KERNEL); + if (!ies) + return -ENOMEM; + + ielen = ieee80211_build_preq_ies(local, ies, + req->ie, req->ie_len); + local->orig_ies = req->ie; + local->orig_ies_len = req->ie_len; + req->ie = ies; + req->ie_len = ielen; local->hw_scanning = true; rc = local->ops->hw_scan(local_to_hw(local), req); if (rc) { local->hw_scanning = false; + kfree(ies); + req->ie_len = local->orig_ies_len; + req->ie = local->orig_ies; return rc; } local->scan_sdata = scan_sdata; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 05caf34f31d..72b091317a7 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -831,16 +831,57 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, ieee80211_tx_skb(sdata, skb, encrypt); } +int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, + const u8 *ie, size_t ie_len) +{ + struct ieee80211_supported_band *sband; + u8 *pos, *supp_rates_len, *esupp_rates_len = NULL; + int i; + + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + + pos = buffer; + + *pos++ = WLAN_EID_SUPP_RATES; + supp_rates_len = pos; + *pos++ = 0; + + for (i = 0; i < sband->n_bitrates; i++) { + struct ieee80211_rate *rate = &sband->bitrates[i]; + + if (esupp_rates_len) { + *esupp_rates_len += 1; + } else if (*supp_rates_len == 8) { + *pos++ = WLAN_EID_EXT_SUPP_RATES; + esupp_rates_len = pos; + *pos++ = 1; + } else + *supp_rates_len += 1; + + *pos++ = rate->bitrate / 5; + } + + /* + * If adding more here, adjust code in main.c + * that calculates local->scan_ies_len. + */ + + if (ie) { + memcpy(pos, ie, ie_len); + pos += ie_len; + } + + return pos - buffer; +} + void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, - u8 *ssid, size_t ssid_len, - u8 *ie, size_t ie_len) + const u8 *ssid, size_t ssid_len, + const u8 *ie, size_t ie_len) { struct ieee80211_local *local = sdata->local; - struct ieee80211_supported_band *sband; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; - u8 *pos, *supp_rates, *esupp_rates = NULL; - int i; + u8 *pos; skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + 200 + ie_len); @@ -867,33 +908,9 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, *pos++ = WLAN_EID_SSID; *pos++ = ssid_len; memcpy(pos, ssid, ssid_len); + pos += ssid_len; - supp_rates = skb_put(skb, 2); - supp_rates[0] = WLAN_EID_SUPP_RATES; - supp_rates[1] = 0; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - - for (i = 0; i < sband->n_bitrates; i++) { - struct ieee80211_rate *rate = &sband->bitrates[i]; - if (esupp_rates) { - pos = skb_put(skb, 1); - esupp_rates[1]++; - } else if (supp_rates[1] == 8) { - esupp_rates = skb_put(skb, 3); - esupp_rates[0] = WLAN_EID_EXT_SUPP_RATES; - esupp_rates[1] = 1; - pos = &esupp_rates[2]; - } else { - pos = skb_put(skb, 1); - supp_rates[1]++; - } - *pos = rate->bitrate / 5; - } - - /* if adding more here, adjust max_scan_ie_len */ - - if (ie) - memcpy(skb_put(skb, ie_len), ie, ie_len); + skb_put(skb, ieee80211_build_preq_ies(local, pos, ie, ie_len)); ieee80211_tx_skb(sdata, skb, 0); } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 447fa1790b4..68c51022e9d 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2597,7 +2597,8 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_IE]) { request->ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); - memcpy(request->ie, nla_data(info->attrs[NL80211_ATTR_IE]), + memcpy((void *)request->ie, + nla_data(info->attrs[NL80211_ATTR_IE]), request->ie_len); } -- cgit v1.2.3-70-g09d2 From 6bad8766620a3c8b64afa981502fdb543e3cfd6c Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 2 Apr 2009 14:08:09 -0400 Subject: cfg80211: send regulatory beacon hint events to userspace This informs userspace when a change has occured on a world roaming wiphy's channel which has lifted some restrictions due to a regulatory beacon hint. Because this is now sent to userspace through the regulatory multicast group we remove the debug prints we used to use as they are no longer necessary. Acked-by: Johannes Berg Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- include/linux/nl80211.h | 34 ++++++++++++++++++++++++++++++- net/wireless/nl80211.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++ net/wireless/nl80211.h | 5 +++++ net/wireless/reg.c | 28 ++++++++++++------------- 4 files changed, 106 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 209cacee528..05ba3539b77 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -7,7 +7,7 @@ * Copyright 2008 Michael Wu * Copyright 2008 Luis Carlos Cobo * Copyright 2008 Michael Buesch - * Copyright 2008 Luis R. Rodriguez + * Copyright 2008, 2009 Luis R. Rodriguez * Copyright 2008 Jouni Malinen * Copyright 2008 Colin McCabe * @@ -166,6 +166,22 @@ * set (%NL80211_ATTR_REG_TYPE), if the type of regulatory domain is * %NL80211_REG_TYPE_COUNTRY the alpha2 to which we have moved on * to (%NL80211_ATTR_REG_ALPHA2). + * @NL80211_CMD_REG_BEACON_HINT: indicates to userspace that an AP beacon + * has been found while world roaming thus enabling active scan or + * any mode of operation that initiates TX (beacons) on a channel + * where we would not have been able to do either before. As an example + * if you are world roaming (regulatory domain set to world or if your + * driver is using a custom world roaming regulatory domain) and while + * doing a passive scan on the 5 GHz band you find an AP there (if not + * on a DFS channel) you will now be able to actively scan for that AP + * or use AP mode on your card on that same channel. Note that this will + * never be used for channels 1-11 on the 2 GHz band as they are always + * enabled world wide. This beacon hint is only sent if your device had + * either disabled active scanning or beaconing on a channel. We send to + * userspace the wiphy on which we removed a restriction from + * (%NL80211_ATTR_WIPHY) and the channel on which this occurred + * before (%NL80211_ATTR_FREQ_BEFORE) and after (%NL80211_ATTR_FREQ_AFTER) + * the beacon hint was processed. * * @NL80211_CMD_AUTHENTICATE: authentication request and notification. * This command is used both as a command (request to authenticate) and @@ -270,6 +286,8 @@ enum nl80211_commands { NL80211_CMD_MICHAEL_MIC_FAILURE, + NL80211_CMD_REG_BEACON_HINT, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -288,6 +306,7 @@ enum nl80211_commands { #define NL80211_CMD_ASSOCIATE NL80211_CMD_ASSOCIATE #define NL80211_CMD_DEAUTHENTICATE NL80211_CMD_DEAUTHENTICATE #define NL80211_CMD_DISASSOCIATE NL80211_CMD_DISASSOCIATE +#define NL80211_CMD_REG_BEACON_HINT NL80211_CMD_REG_BEACON_HINT /** * enum nl80211_attrs - nl80211 netlink attributes @@ -423,6 +442,17 @@ enum nl80211_commands { * @NL80211_ATTR_KEY_TYPE: Key Type, see &enum nl80211_key_type, represented as * a u32 * + * @NL80211_ATTR_FREQ_BEFORE: A channel which has suffered a regulatory change + * due to considerations from a beacon hint. This attribute reflects + * the state of the channel _before_ the beacon hint processing. This + * attributes consists of a nested attribute containing + * NL80211_FREQUENCY_ATTR_* + * @NL80211_ATTR_FREQ_AFTER: A channel which has suffered a regulatory change + * due to considerations from a beacon hint. This attribute reflects + * the state of the channel _after_ the beacon hint processing. This + * attributes consists of a nested attribute containing + * NL80211_FREQUENCY_ATTR_* + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -511,6 +541,8 @@ enum nl80211_attrs { NL80211_ATTR_MAX_SCAN_IE_LEN, + NL80211_ATTR_FREQ_BEFORE, + NL80211_ATTR_FREQ_AFTER, /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 7285bdc4e59..85b5aa3c76f 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3491,6 +3491,60 @@ void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev, nlmsg_free(msg); } +void nl80211_send_beacon_hint_event(struct wiphy *wiphy, + struct ieee80211_channel *channel_before, + struct ieee80211_channel *channel_after) +{ + struct sk_buff *msg; + void *hdr; + struct nlattr *nl_freq; + + msg = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_REG_BEACON_HINT); + if (!hdr) { + nlmsg_free(msg); + return; + } + + /* + * Since we are applying the beacon hint to a wiphy we know its + * wiphy_idx is valid + */ + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, get_wiphy_idx(wiphy)); + + /* Before */ + nl_freq = nla_nest_start(msg, NL80211_ATTR_FREQ_BEFORE); + if (!nl_freq) + goto nla_put_failure; + if (nl80211_msg_put_channel(msg, channel_before)) + goto nla_put_failure; + nla_nest_end(msg, nl_freq); + + /* After */ + nl_freq = nla_nest_start(msg, NL80211_ATTR_FREQ_AFTER); + if (!nl_freq) + goto nla_put_failure; + if (nl80211_msg_put_channel(msg, channel_after)) + goto nla_put_failure; + nla_nest_end(msg, nl_freq); + + if (genlmsg_end(msg, hdr) < 0) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast(msg, 0, nl80211_regulatory_mcgrp.id, GFP_ATOMIC); + + return; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); +} + /* initialisation/exit functions */ int nl80211_init(void) diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index e4b92cccd15..b3aaa59afa0 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -29,4 +29,9 @@ nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev, enum nl80211_key_type key_type, int key_id, const u8 *tsc); +extern void +nl80211_send_beacon_hint_event(struct wiphy *wiphy, + struct ieee80211_channel *channel_before, + struct ieee80211_channel *channel_after); + #endif /* __NET_WIRELESS_NL80211_H */ diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 4af4304cec3..574e217bcc8 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1049,18 +1049,10 @@ static void handle_reg_beacon(struct wiphy *wiphy, unsigned int chan_idx, struct reg_beacon *reg_beacon) { -#ifdef CONFIG_CFG80211_REG_DEBUG -#define REG_DEBUG_BEACON_FLAG(desc) \ - printk(KERN_DEBUG "cfg80211: Enabling " desc " on " \ - "frequency: %d MHz (Ch %d) on %s\n", \ - reg_beacon->chan.center_freq, \ - ieee80211_frequency_to_channel(reg_beacon->chan.center_freq), \ - wiphy_name(wiphy)); -#else -#define REG_DEBUG_BEACON_FLAG(desc) do {} while (0) -#endif struct ieee80211_supported_band *sband; struct ieee80211_channel *chan; + bool channel_changed = false; + struct ieee80211_channel chan_before; assert_cfg80211_lock(); @@ -1070,20 +1062,28 @@ static void handle_reg_beacon(struct wiphy *wiphy, if (likely(chan->center_freq != reg_beacon->chan.center_freq)) return; + if (chan->beacon_found) + return; + + chan->beacon_found = true; + + chan_before.center_freq = chan->center_freq; + chan_before.flags = chan->flags; + if ((chan->flags & IEEE80211_CHAN_PASSIVE_SCAN) && !(chan->orig_flags & IEEE80211_CHAN_PASSIVE_SCAN)) { chan->flags &= ~IEEE80211_CHAN_PASSIVE_SCAN; - REG_DEBUG_BEACON_FLAG("active scanning"); + channel_changed = true; } if ((chan->flags & IEEE80211_CHAN_NO_IBSS) && !(chan->orig_flags & IEEE80211_CHAN_NO_IBSS)) { chan->flags &= ~IEEE80211_CHAN_NO_IBSS; - REG_DEBUG_BEACON_FLAG("beaconing"); + channel_changed = true; } - chan->beacon_found = true; -#undef REG_DEBUG_BEACON_FLAG + if (channel_changed) + nl80211_send_beacon_hint_event(wiphy, &chan_before, chan); } /* -- cgit v1.2.3-70-g09d2 From 25e47c18ac4d8ad09c2ed4b99c1dbbcb7e3d2c51 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 2 Apr 2009 20:14:06 +0200 Subject: cfg80211: add cipher capabilities This adds the necessary code and fields to let drivers specify their cipher capabilities and exports them to userspace. Also update mac80211 to export the ciphers it has. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 4 ++++ include/net/wireless.h | 5 +++++ net/mac80211/main.c | 14 ++++++++++++++ net/wireless/nl80211.c | 14 +++++++++++++- 4 files changed, 36 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 05ba3539b77..c01423888db 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -453,6 +453,9 @@ enum nl80211_commands { * attributes consists of a nested attribute containing * NL80211_FREQUENCY_ATTR_* * + * @NL80211_ATTR_CIPHER_SUITES: a set of u32 values indicating the supported + * cipher suites + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -540,6 +543,7 @@ enum nl80211_attrs { NL80211_ATTR_KEY_TYPE, NL80211_ATTR_MAX_SCAN_IE_LEN, + NL80211_ATTR_CIPHER_SUITES, NL80211_ATTR_FREQ_BEFORE, NL80211_ATTR_FREQ_AFTER, diff --git a/include/net/wireless.h b/include/net/wireless.h index 2bcdeda46d8..44c2642d3c0 100644 --- a/include/net/wireless.h +++ b/include/net/wireless.h @@ -205,6 +205,8 @@ struct ieee80211_supported_band { * on the reg_notifier() if it chooses to ignore future * regulatory domain changes caused by other drivers. * @signal_type: signal type reported in &struct cfg80211_bss. + * @cipher_suites: supported cipher suites + * @n_cipher_suites: number of supported cipher suites */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -224,6 +226,9 @@ struct wiphy { u8 max_scan_ssids; u16 max_scan_ie_len; + int n_cipher_suites; + const u32 *cipher_suites; + /* If multiple wiphys are registered and you're handed e.g. * a regular netdev with assigned ieee80211_ptr, you won't * know whether it points to a wiphy your driver has registered diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 679b3a14f11..c1145be72da 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -823,6 +823,15 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) struct ieee80211_master_priv *mpriv; int channels, i, j, max_bitrates; bool supp_ht; + static const u32 cipher_suites[] = { + WLAN_CIPHER_SUITE_WEP40, + WLAN_CIPHER_SUITE_WEP104, + WLAN_CIPHER_SUITE_TKIP, + WLAN_CIPHER_SUITE_CCMP, + + /* keep last -- depends on hw flags! */ + WLAN_CIPHER_SUITE_AES_CMAC + }; /* * generic code guarantees at least one band, @@ -894,6 +903,11 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if (local->hw.wiphy->max_scan_ie_len) local->hw.wiphy->max_scan_ie_len -= local->scan_ies_len; + local->hw.wiphy->cipher_suites = cipher_suites; + local->hw.wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites); + if (!(local->hw.flags & IEEE80211_HW_MFP_CAPABLE)) + local->hw.wiphy->n_cipher_suites--; + result = wiphy_register(local->hw.wiphy); if (result < 0) goto fail_wiphy_register; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 85b5aa3c76f..d33cab0e0fb 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -208,6 +208,10 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, NLA_PUT_U16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN, dev->wiphy.max_scan_ie_len); + NLA_PUT(msg, NL80211_ATTR_CIPHER_SUITES, + sizeof(u32) * dev->wiphy.n_cipher_suites, + dev->wiphy.cipher_suites); + nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES); if (!nl_modes) goto nla_put_failure; @@ -979,7 +983,7 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *drv; - int err; + int err, i; struct net_device *dev; struct key_params params; u8 key_idx = 0; @@ -1048,6 +1052,14 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info) if (err) goto unlock_rtnl; + for (i = 0; i < drv->wiphy.n_cipher_suites; i++) + if (params.cipher == drv->wiphy.cipher_suites[i]) + break; + if (i == drv->wiphy.n_cipher_suites) { + err = -EINVAL; + goto out; + } + if (!drv->ops->add_key) { err = -EOPNOTSUPP; goto out; -- cgit v1.2.3-70-g09d2 From 6a362bb1c9f900f7e6daeee52ff2d538badae49b Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Sun, 5 Apr 2009 11:01:20 -0400 Subject: ath5k: add support for Fukato Datacask Jupiter LEDs This adds support for the LEDs on the Jupiter netbook. Reported-by: Martin Bammer Signed-off-by: Bob Copeland Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath5k/led.c | 2 ++ include/linux/pci_ids.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/drivers/net/wireless/ath/ath5k/led.c b/drivers/net/wireless/ath/ath5k/led.c index 006f7716168..cbdc0b30842 100644 --- a/drivers/net/wireless/ath/ath5k/led.c +++ b/drivers/net/wireless/ath/ath5k/led.c @@ -67,6 +67,8 @@ static const struct pci_device_id ath5k_led_devices[] = { { ATH_SDEVICE(PCI_VENDOR_ID_AMBIT, 0x0428), ATH_LED(3, 0) }, /* Acer Extensa 5620z (nekoreeve@gmail.com) */ { ATH_SDEVICE(PCI_VENDOR_ID_QMI, 0x0105), ATH_LED(3, 0) }, + /* Fukato Datacask Jupiter 1014a (mrb74@gmx.at) */ + { ATH_SDEVICE(PCI_VENDOR_ID_AZWAVE, 0x1026), ATH_LED(3, 0) }, { } }; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index ee98cd57088..ea061e290d0 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2313,6 +2313,8 @@ #define PCI_VENDOR_ID_QMI 0x1a32 +#define PCI_VENDOR_ID_AZWAVE 0x1a3b + #define PCI_VENDOR_ID_TEKRAM 0x1de1 #define PCI_DEVICE_ID_TEKRAM_DC290 0xdc29 -- cgit v1.2.3-70-g09d2 From e45d8e534b67580eedd9b4910ccc16d6dd3cceff Mon Sep 17 00:00:00 2001 From: Bing Zhao Date: Mon, 6 Apr 2009 15:50:56 -0700 Subject: libertas: add support for Marvell SD8688 chip libertas: add support for Marvell SD8688 chip Use RxPD->pkt_ptr to locate eth803 header in the packet received since SD8688/v10 firmware allows a gap between RxPD and eth803 header. Set SDIO block size to 256 for CMD53. The maximum block size for SD8688 WLAN function is set to 512 in TPLFE_MAX_BLK_SIZE. But using 512 as block size results upto 2K bytes data (4 blocks) being transferred and causes buffer overflow in firmware. Both changes above are backward compatible with earlier firmware versions for SD8385/SD8686. The SDIO_DEVICE_IDs for SD8688 chip are added in include/linux/mmc/sdio_ids.h Signed-off-by: Kiran Divekar Signed-off-by: Bing Zhao Acked-by: Dan Williams Signed-off-by: John W. Linville --- drivers/net/wireless/Kconfig | 4 ++-- drivers/net/wireless/libertas/if_sdio.c | 17 +++++++++++++---- drivers/net/wireless/libertas/if_sdio.h | 2 ++ drivers/net/wireless/libertas/rx.c | 15 ++++++++------- include/linux/mmc/sdio_ids.h | 2 ++ 5 files changed, 27 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig index 8f279e7bd04..ad99470ae92 100644 --- a/drivers/net/wireless/Kconfig +++ b/drivers/net/wireless/Kconfig @@ -146,10 +146,10 @@ config LIBERTAS_CS A driver for Marvell Libertas 8385 CompactFlash devices. config LIBERTAS_SDIO - tristate "Marvell Libertas 8385 and 8686 SDIO 802.11b/g cards" + tristate "Marvell Libertas 8385/8686/8688 SDIO 802.11b/g cards" depends on LIBERTAS && MMC ---help--- - A driver for Marvell Libertas 8385 and 8686 SDIO devices. + A driver for Marvell Libertas 8385/8686/8688 SDIO devices. config LIBERTAS_SPI tristate "Marvell Libertas 8686 SPI 802.11b/g cards" diff --git a/drivers/net/wireless/libertas/if_sdio.c b/drivers/net/wireless/libertas/if_sdio.c index 76f4c653d64..55864c10f9f 100644 --- a/drivers/net/wireless/libertas/if_sdio.c +++ b/drivers/net/wireless/libertas/if_sdio.c @@ -48,8 +48,11 @@ static char *lbs_fw_name = NULL; module_param_named(fw_name, lbs_fw_name, charp, 0644); static const struct sdio_device_id if_sdio_ids[] = { - { SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_LIBERTAS) }, - { /* end: all zeroes */ }, + { SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, + SDIO_DEVICE_ID_MARVELL_LIBERTAS) }, + { SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, + SDIO_DEVICE_ID_MARVELL_8688WLAN) }, + { /* end: all zeroes */ }, }; MODULE_DEVICE_TABLE(sdio, if_sdio_ids); @@ -73,6 +76,12 @@ static struct if_sdio_model if_sdio_models[] = { .helper = "sd8686_helper.bin", .firmware = "sd8686.bin", }, + { + /* 8688 */ + .model = 0x10, + .helper = "sd8688_helper.bin", + .firmware = "sd8688.bin", + }, }; struct if_sdio_packet { @@ -488,7 +497,7 @@ static int if_sdio_prog_helper(struct if_sdio_card *card) ret = 0; release: - sdio_set_block_size(card->func, 0); + sdio_set_block_size(card->func, IF_SDIO_BLOCK_SIZE); sdio_release_host(card->func); kfree(chunk_buffer); release_fw: @@ -624,7 +633,7 @@ static int if_sdio_prog_real(struct if_sdio_card *card) ret = 0; release: - sdio_set_block_size(card->func, 0); + sdio_set_block_size(card->func, IF_SDIO_BLOCK_SIZE); sdio_release_host(card->func); kfree(chunk_buffer); release_fw: diff --git a/drivers/net/wireless/libertas/if_sdio.h b/drivers/net/wireless/libertas/if_sdio.h index 533bdfbf5d2..37ada2c29aa 100644 --- a/drivers/net/wireless/libertas/if_sdio.h +++ b/drivers/net/wireless/libertas/if_sdio.h @@ -42,4 +42,6 @@ #define IF_SDIO_EVENT 0x80fc +#define IF_SDIO_BLOCK_SIZE 256 + #endif diff --git a/drivers/net/wireless/libertas/rx.c b/drivers/net/wireless/libertas/rx.c index 820c22d9220..bd845d09f17 100644 --- a/drivers/net/wireless/libertas/rx.c +++ b/drivers/net/wireless/libertas/rx.c @@ -25,7 +25,6 @@ struct rfc1042hdr { } __attribute__ ((packed)); struct rxpackethdr { - struct rxpd rx_pd; struct eth803hdr eth803_hdr; struct rfc1042hdr rfc1042_hdr; } __attribute__ ((packed)); @@ -158,8 +157,9 @@ int lbs_process_rxed_packet(struct lbs_private *priv, struct sk_buff *skb) if (priv->monitormode) return process_rxed_802_11_packet(priv, skb); - p_rx_pkt = (struct rxpackethdr *) skb->data; - p_rx_pd = &p_rx_pkt->rx_pd; + p_rx_pd = (struct rxpd *) skb->data; + p_rx_pkt = (struct rxpackethdr *) ((u8 *)p_rx_pd + + le32_to_cpu(p_rx_pd->pkt_ptr)); if (priv->mesh_dev) { if (priv->mesh_fw_ver == MESH_FW_OLD) { if (p_rx_pd->rx_control & RxPD_MESH_FRAME) @@ -181,8 +181,9 @@ int lbs_process_rxed_packet(struct lbs_private *priv, struct sk_buff *skb) goto done; } - lbs_deb_rx("rx data: skb->len-sizeof(RxPd) = %d-%zd = %zd\n", - skb->len, sizeof(struct rxpd), skb->len - sizeof(struct rxpd)); + lbs_deb_rx("rx data: skb->len - pkt_ptr = %d-%zd = %zd\n", + skb->len, le32_to_cpu(p_rx_pd->pkt_ptr), + skb->len - le32_to_cpu(p_rx_pd->pkt_ptr)); lbs_deb_hex(LBS_DEB_RX, "RX Data: Dest", p_rx_pkt->eth803_hdr.dest_addr, sizeof(p_rx_pkt->eth803_hdr.dest_addr)); @@ -216,14 +217,14 @@ int lbs_process_rxed_packet(struct lbs_private *priv, struct sk_buff *skb) /* Chop off the rxpd + the excess memory from the 802.2/llc/snap header * that was removed */ - hdrchop = (u8 *) p_ethhdr - (u8 *) p_rx_pkt; + hdrchop = (u8 *)p_ethhdr - (u8 *)p_rx_pd; } else { lbs_deb_hex(LBS_DEB_RX, "RX Data: LLC/SNAP", (u8 *) & p_rx_pkt->rfc1042_hdr, sizeof(p_rx_pkt->rfc1042_hdr)); /* Chop off the rxpd */ - hdrchop = (u8 *) & p_rx_pkt->eth803_hdr - (u8 *) p_rx_pkt; + hdrchop = (u8 *)&p_rx_pkt->eth803_hdr - (u8 *)p_rx_pd; } /* Chop off the leading header bytes so the skb points to the start of diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index ea1bf5ba092..c7211ab6dd4 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -25,5 +25,7 @@ #define SDIO_VENDOR_ID_MARVELL 0x02df #define SDIO_DEVICE_ID_MARVELL_LIBERTAS 0x9103 +#define SDIO_DEVICE_ID_MARVELL_8688WLAN 0x9104 +#define SDIO_DEVICE_ID_MARVELL_8688BT 0x9105 #endif -- cgit v1.2.3-70-g09d2 From f2753ddbadb0873a98421415882318251bbd9eaa Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 14 Apr 2009 10:09:24 +0200 Subject: mac80211: add hardware restart function Some hardware defects may require the hardware to be re-initialised completely from scratch. Drivers would need much information (for instance the current MAC address, crypto keys, beaconing information, etc.) stored duplicated from mac80211 to be able to do this, so let mac80211 help them. The new ieee80211_restart_hw() function requires the same code as resuming, so move that code into a new ieee80211_reconfig() function in util.c and leave only the suspend code in pm.c. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 14 ++++++ net/mac80211/ieee80211_i.h | 13 ++++- net/mac80211/main.c | 24 +++++++++ net/mac80211/pm.c | 110 ++---------------------------------------- net/mac80211/util.c | 118 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 172 insertions(+), 107 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 2c6f976831b..a593bedcfed 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1575,6 +1575,20 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw); */ void ieee80211_free_hw(struct ieee80211_hw *hw); +/** + * ieee80211_restart_hw - restart hardware completely + * + * Call this function when the hardware was restarted for some reason + * (hardware error, ...) and the driver is unable to restore its state + * by itself. mac80211 assumes that at this point the driver/hardware + * is completely uninitialised and stopped, it starts the process by + * calling the ->start() operation. The driver will need to reset all + * internal state that it has prior to calling this function. + * + * @hw: the hardware to restart + */ +void ieee80211_restart_hw(struct ieee80211_hw *hw); + /* trick to avoid symbol clashes with the ieee80211 subsystem */ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb, struct ieee80211_rx_status *status); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index cb80a80504e..13d6f890ced 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -748,6 +748,8 @@ struct ieee80211_local { int user_power_level; /* in dBm */ int power_constr_level; /* in dBm */ + struct work_struct restart_work; + #ifdef CONFIG_MAC80211_DEBUGFS struct local_debugfsdentries { struct dentry *rcdir; @@ -1036,15 +1038,22 @@ void ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata, u16 capab_info, u8 *pwr_constr_elem, u8 pwr_constr_elem_len); -/* Suspend/resume */ +/* Suspend/resume and hw reconfiguration */ +int ieee80211_reconfig(struct ieee80211_local *local); + #ifdef CONFIG_PM int __ieee80211_suspend(struct ieee80211_hw *hw); -int __ieee80211_resume(struct ieee80211_hw *hw); + +static inline int __ieee80211_resume(struct ieee80211_hw *hw) +{ + return ieee80211_reconfig(hw_to_local(hw)); +} #else static inline int __ieee80211_suspend(struct ieee80211_hw *hw) { return 0; } + static inline int __ieee80211_resume(struct ieee80211_hw *hw) { return 0; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index c1145be72da..80c0e28bf54 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -696,6 +696,28 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) } EXPORT_SYMBOL(ieee80211_tx_status); +static void ieee80211_restart_work(struct work_struct *work) +{ + struct ieee80211_local *local = + container_of(work, struct ieee80211_local, restart_work); + + rtnl_lock(); + ieee80211_reconfig(local); + rtnl_unlock(); +} + +void ieee80211_restart_hw(struct ieee80211_hw *hw) +{ + struct ieee80211_local *local = hw_to_local(hw); + + /* use this reason, __ieee80211_resume will unblock it */ + ieee80211_stop_queues_by_reason(hw, + IEEE80211_QUEUE_STOP_REASON_SUSPEND); + + schedule_work(&local->restart_work); +} +EXPORT_SYMBOL(ieee80211_restart_hw); + struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, const struct ieee80211_ops *ops) { @@ -768,6 +790,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, INIT_DELAYED_WORK(&local->scan_work, ieee80211_scan_work); + INIT_WORK(&local->restart_work, ieee80211_restart_work); + INIT_WORK(&local->dynamic_ps_enable_work, ieee80211_dynamic_ps_enable_work); INIT_WORK(&local->dynamic_ps_disable_work, diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 2b4c95cd9da..b38986c9dee 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -72,108 +72,8 @@ int __ieee80211_suspend(struct ieee80211_hw *hw) return 0; } -int __ieee80211_resume(struct ieee80211_hw *hw) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_sub_if_data *sdata; - struct ieee80211_if_init_conf conf; - struct sta_info *sta; - unsigned long flags; - int res; - - /* restart hardware */ - if (local->open_count) { - res = local->ops->start(hw); - - ieee80211_led_radio(local, hw->conf.radio_enabled); - } - - /* add interfaces */ - list_for_each_entry(sdata, &local->interfaces, list) { - if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN && - sdata->vif.type != NL80211_IFTYPE_MONITOR && - netif_running(sdata->dev)) { - conf.vif = &sdata->vif; - conf.type = sdata->vif.type; - conf.mac_addr = sdata->dev->dev_addr; - res = local->ops->add_interface(hw, &conf); - } - } - - /* add STAs back */ - if (local->ops->sta_notify) { - spin_lock_irqsave(&local->sta_lock, flags); - list_for_each_entry(sta, &local->sta_list, list) { - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) - sdata = container_of(sdata->bss, - struct ieee80211_sub_if_data, - u.ap); - - local->ops->sta_notify(hw, &sdata->vif, - STA_NOTIFY_ADD, &sta->sta); - } - spin_unlock_irqrestore(&local->sta_lock, flags); - } - - /* Clear Suspend state so that ADDBA requests can be processed */ - - rcu_read_lock(); - - if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) { - list_for_each_entry_rcu(sta, &local->sta_list, list) { - clear_sta_flags(sta, WLAN_STA_SUSPEND); - } - } - - rcu_read_unlock(); - - /* setup RTS threshold */ - if (local->ops->set_rts_threshold) - local->ops->set_rts_threshold(hw, local->rts_threshold); - - /* reconfigure hardware */ - ieee80211_hw_config(local, ~0); - - netif_addr_lock_bh(local->mdev); - ieee80211_configure_filter(local); - netif_addr_unlock_bh(local->mdev); - - /* Finally also reconfigure all the BSS information */ - list_for_each_entry(sdata, &local->interfaces, list) { - u32 changed = ~0; - if (!netif_running(sdata->dev)) - continue; - switch (sdata->vif.type) { - case NL80211_IFTYPE_STATION: - /* disable beacon change bits */ - changed &= ~IEEE80211_IFCC_BEACON; - /* fall through */ - case NL80211_IFTYPE_ADHOC: - case NL80211_IFTYPE_AP: - case NL80211_IFTYPE_MESH_POINT: - WARN_ON(ieee80211_if_config(sdata, changed)); - ieee80211_bss_info_change_notify(sdata, ~0); - break; - case NL80211_IFTYPE_WDS: - break; - case NL80211_IFTYPE_AP_VLAN: - case NL80211_IFTYPE_MONITOR: - /* ignore virtual */ - break; - case NL80211_IFTYPE_UNSPECIFIED: - case __NL80211_IFTYPE_AFTER_LAST: - WARN_ON(1); - break; - } - } - - /* add back keys */ - list_for_each_entry(sdata, &local->interfaces, list) - if (netif_running(sdata->dev)) - ieee80211_enable_keys(sdata); - - ieee80211_wake_queues_by_reason(hw, - IEEE80211_QUEUE_STOP_REASON_SUSPEND); - - return 0; -} +/* + * __ieee80211_resume() is a static inline which just calls + * ieee80211_reconfig(), which is also needed for hardware + * hang/firmware failure/etc. recovery. + */ diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 1ff83532120..b361e2acfce 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -28,6 +28,7 @@ #include "rate.h" #include "mesh.h" #include "wme.h" +#include "led.h" /* privid for wiphys to determine whether they belong to us or not */ void *mac80211_wiphy_privid = &mac80211_wiphy_privid; @@ -966,3 +967,120 @@ u32 ieee80211_sta_get_rates(struct ieee80211_local *local, } return supp_rates; } + +int ieee80211_reconfig(struct ieee80211_local *local) +{ + struct ieee80211_hw *hw = &local->hw; + struct ieee80211_sub_if_data *sdata; + struct ieee80211_if_init_conf conf; + struct sta_info *sta; + unsigned long flags; + int res; + + /* restart hardware */ + if (local->open_count) { + res = local->ops->start(hw); + + ieee80211_led_radio(local, hw->conf.radio_enabled); + } + + /* add interfaces */ + list_for_each_entry(sdata, &local->interfaces, list) { + if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN && + sdata->vif.type != NL80211_IFTYPE_MONITOR && + netif_running(sdata->dev)) { + conf.vif = &sdata->vif; + conf.type = sdata->vif.type; + conf.mac_addr = sdata->dev->dev_addr; + res = local->ops->add_interface(hw, &conf); + } + } + + /* add STAs back */ + if (local->ops->sta_notify) { + spin_lock_irqsave(&local->sta_lock, flags); + list_for_each_entry(sta, &local->sta_list, list) { + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + sdata = container_of(sdata->bss, + struct ieee80211_sub_if_data, + u.ap); + + local->ops->sta_notify(hw, &sdata->vif, + STA_NOTIFY_ADD, &sta->sta); + } + spin_unlock_irqrestore(&local->sta_lock, flags); + } + + /* Clear Suspend state so that ADDBA requests can be processed */ + + rcu_read_lock(); + + if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) { + list_for_each_entry_rcu(sta, &local->sta_list, list) { + clear_sta_flags(sta, WLAN_STA_SUSPEND); + } + } + + rcu_read_unlock(); + + /* setup RTS threshold */ + if (local->ops->set_rts_threshold) + local->ops->set_rts_threshold(hw, local->rts_threshold); + + /* reconfigure hardware */ + ieee80211_hw_config(local, ~0); + + netif_addr_lock_bh(local->mdev); + ieee80211_configure_filter(local); + netif_addr_unlock_bh(local->mdev); + + /* Finally also reconfigure all the BSS information */ + list_for_each_entry(sdata, &local->interfaces, list) { + u32 changed = ~0; + if (!netif_running(sdata->dev)) + continue; + switch (sdata->vif.type) { + case NL80211_IFTYPE_STATION: + /* disable beacon change bits */ + changed &= ~IEEE80211_IFCC_BEACON; + /* fall through */ + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_MESH_POINT: + /* + * Driver's config_interface can fail if rfkill is + * enabled. Accommodate this return code. + * FIXME: When mac80211 has knowledge of rfkill + * state the code below can change back to: + * WARN(ieee80211_if_config(sdata, changed)); + * ieee80211_bss_info_change_notify(sdata, ~0); + */ + if (ieee80211_if_config(sdata, changed)) + printk(KERN_DEBUG "%s: failed to configure interface during resume\n", + sdata->dev->name); + else + ieee80211_bss_info_change_notify(sdata, ~0); + break; + case NL80211_IFTYPE_WDS: + break; + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_MONITOR: + /* ignore virtual */ + break; + case NL80211_IFTYPE_UNSPECIFIED: + case __NL80211_IFTYPE_AFTER_LAST: + WARN_ON(1); + break; + } + } + + /* add back keys */ + list_for_each_entry(sdata, &local->interfaces, list) + if (netif_running(sdata->dev)) + ieee80211_enable_keys(sdata); + + ieee80211_wake_queues_by_reason(hw, + IEEE80211_QUEUE_STOP_REASON_SUSPEND); + + return 0; +} -- cgit v1.2.3-70-g09d2 From 10f644a47b76d3e61b98f2d02ce9690b94c51ee5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 16 Apr 2009 13:17:25 +0200 Subject: mac80211: disable powersave if pm_qos asks for low latency When an application asks for a latency lower than the beacon interval there's nothing we can do -- we need to stay awake and not have the AP buffer frames for us. Add code to automatically calculate this constraint in mac80211 so drivers need not concern themselves with it. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 9 +++++++++ net/mac80211/ieee80211_i.h | 5 ++++- net/mac80211/iface.c | 4 ++-- net/mac80211/main.c | 31 ++++++++++++++++++++++++------- net/mac80211/mlme.c | 36 ++++++++++++++++++++++++++++++++---- net/mac80211/wext.c | 2 +- 6 files changed, 72 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 4b501b48ce8..53563d53b5a 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1383,4 +1383,13 @@ static inline int ieee80211_freq_to_ofdm_chan(int s_freq, int freq) return -1; } +/** + * ieee80211_tu_to_usec - convert time units (TU) to microseconds + * @tu: the TUs + */ +static inline unsigned long ieee80211_tu_to_usec(unsigned long tu) +{ + return 1024 * tu; +} + #endif /* LINUX_IEEE80211_H */ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index ff40dd7b523..b1d18d967d8 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -750,6 +750,7 @@ struct ieee80211_local { struct work_struct dynamic_ps_enable_work; struct work_struct dynamic_ps_disable_work; struct timer_list dynamic_ps_timer; + struct notifier_block network_latency_notifier; int user_power_level; /* in dBm */ int power_constr_level; /* in dBm */ @@ -938,7 +939,9 @@ int ieee80211_sta_deauthenticate(struct ieee80211_sub_if_data *sdata, u16 reason int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason); void ieee80211_send_pspoll(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata); -void ieee80211_recalc_ps(struct ieee80211_local *local); +void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency); +int ieee80211_max_network_latency(struct notifier_block *nb, + unsigned long data, void *dummy); /* IBSS code */ int ieee80211_ibss_commit(struct ieee80211_sub_if_data *sdata); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 6240f76e2a4..5d60deb219d 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -317,7 +317,7 @@ static int ieee80211_open(struct net_device *dev) ieee80211_set_wmm_default(sdata); } - ieee80211_recalc_ps(local); + ieee80211_recalc_ps(local, -1); /* * ieee80211_sta_work is disabled while network interface @@ -574,7 +574,7 @@ static int ieee80211_stop(struct net_device *dev) hw_reconf_flags = 0; } - ieee80211_recalc_ps(local); + ieee80211_recalc_ps(local, -1); /* do after stop to avoid reconfiguring when we stop anyway */ if (hw_reconf_flags) diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 80c0e28bf54..049ce863980 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -1038,25 +1039,38 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) } } + local->network_latency_notifier.notifier_call = + ieee80211_max_network_latency; + result = pm_qos_add_notifier(PM_QOS_NETWORK_LATENCY, + &local->network_latency_notifier); + + if (result) { + rtnl_lock(); + goto fail_pm_qos; + } + return 0; -fail_wep: + fail_pm_qos: + ieee80211_led_exit(local); + ieee80211_remove_interfaces(local); + fail_wep: rate_control_deinitialize(local); -fail_rate: + fail_rate: unregister_netdevice(local->mdev); local->mdev = NULL; -fail_dev: + fail_dev: rtnl_unlock(); sta_info_stop(local); -fail_sta_info: + fail_sta_info: debugfs_hw_del(local); destroy_workqueue(local->hw.workqueue); -fail_workqueue: + fail_workqueue: if (local->mdev) free_netdev(local->mdev); -fail_mdev_alloc: + fail_mdev_alloc: wiphy_unregister(local->hw.wiphy); -fail_wiphy_register: + fail_wiphy_register: kfree(local->int_scan_req.channels); return result; } @@ -1069,6 +1083,9 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) tasklet_kill(&local->tx_pending_tasklet); tasklet_kill(&local->tasklet); + pm_qos_remove_notifier(PM_QOS_NETWORK_LATENCY, + &local->network_latency_notifier); + rtnl_lock(); /* diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 06d9a1d2325..c39a214e7ad 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -515,7 +516,7 @@ static void ieee80211_change_ps(struct ieee80211_local *local) } /* need to hold RTNL or interface lock */ -void ieee80211_recalc_ps(struct ieee80211_local *local) +void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency) { struct ieee80211_sub_if_data *sdata, *found = NULL; int count = 0; @@ -534,10 +535,22 @@ void ieee80211_recalc_ps(struct ieee80211_local *local) count++; } - if (count == 1 && found->u.mgd.powersave) - local->ps_sdata = found; - else + if (count == 1 && found->u.mgd.powersave) { + s32 beaconint_us; + + if (latency < 0) + latency = pm_qos_requirement(PM_QOS_NETWORK_LATENCY); + + beaconint_us = ieee80211_tu_to_usec( + found->vif.bss_conf.beacon_int); + + if (beaconint_us > latency) + local->ps_sdata = NULL; + else + local->ps_sdata = found; + } else { local->ps_sdata = NULL; + } ieee80211_change_ps(local); } @@ -2324,3 +2337,18 @@ void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local) ieee80211_restart_sta_timer(sdata); rcu_read_unlock(); } + +int ieee80211_max_network_latency(struct notifier_block *nb, + unsigned long data, void *dummy) +{ + s32 latency_usec = (s32) data; + struct ieee80211_local *local = + container_of(nb, struct ieee80211_local, + network_latency_notifier); + + mutex_lock(&local->iflist_mtx); + ieee80211_recalc_ps(local, latency_usec); + mutex_unlock(&local->iflist_mtx); + + return 0; +} diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 81f63e57027..1c4664b8b1a 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -789,7 +789,7 @@ static int ieee80211_ioctl_siwpower(struct net_device *dev, ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_DYNPS_TIMEOUT); - ieee80211_recalc_ps(local); + ieee80211_recalc_ps(local, -1); return 0; } -- cgit v1.2.3-70-g09d2 From 955394c98c9cb79bdb3e6b479695af3a90ea0623 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 16 Apr 2009 17:04:25 +0200 Subject: mac80211: document powersaving/beacon filter future Document what mac80211 will do in the future to help save power. We're not quite there yet, but a plan helps. Also, while at it, fix the docs wrt. multicast traffic. Signed-off-by: Johannes Berg Reviewed-by: Kalle Valo Signed-off-by: John W. Linville --- include/net/mac80211.h | 60 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 50 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a593bedcfed..52808bdcc6c 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1109,11 +1109,9 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw, * need software support for parsing the TIM bitmap. This is also supported * by mac80211 by combining the %IEEE80211_HW_SUPPORTS_PS and * %IEEE80211_HW_PS_NULLFUNC_STACK flags. The hardware is of course still - * required to pass up beacons. Additionally, in this case, mac80211 will - * wake up the hardware when multicast traffic is announced in the beacon. - * - * FIXME: I don't think we can be fast enough in software when we want to - * receive multicast traffic? + * required to pass up beacons. The hardware is still required to handle + * waking up for multicast traffic; if it cannot the driver must handle that + * as best as it can, mac80211 is too slow. * * Dynamic powersave mode is an extension to normal powersave mode in which * the hardware stays awake for a user-specified period of time after sending @@ -1134,11 +1132,53 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw, * way the host will only receive beacons where some relevant information * (for example ERP protection or WMM settings) have changed. * - * Beacon filter support is informed with %IEEE80211_HW_BEACON_FILTER flag. - * The driver needs to enable beacon filter support whenever power save is - * enabled, that is %IEEE80211_CONF_PS is set. When power save is enabled, - * the stack will not check for beacon miss at all and the driver needs to - * notify about complete loss of beacons with ieee80211_beacon_loss(). + * Beacon filter support is advertised with the %IEEE80211_HW_BEACON_FILTER + * hardware capability. The driver needs to enable beacon filter support + * whenever power save is enabled, that is %IEEE80211_CONF_PS is set. When + * power save is enabled, the stack will not check for beacon loss and the + * driver needs to notify about loss of beacons with ieee80211_beacon_loss(). + * + * The time (or number of beacons missed) until the firmware notifies the + * driver of a beacon loss event (which in turn causes the driver to call + * ieee80211_beacon_loss()) should be configurable and will be controlled + * by mac80211 and the roaming algorithm in the future. + * + * Since there may be constantly changing information elements that nothing + * in the software stack cares about, we will, in the future, have mac80211 + * tell the driver which information elements are interesting in the sense + * that we want to see changes in them. This will include + * - a list of information element IDs + * - a list of OUIs for the vendor information element + * + * Ideally, the hardware would filter out any beacons without changes in the + * requested elements, but if it cannot support that it may, at the expense + * of some efficiency, filter out only a subset. For example, if the device + * doesn't support checking for OUIs it should pass up all changes in all + * vendor information elements. + * + * Note that change, for the sake of simplification, also includes information + * elements appearing or disappearing from the beacon. + * + * Some hardware supports an "ignore list" instead, just make sure nothing + * that was requested is on the ignore list, and include commonly changing + * information element IDs in the ignore list, for example 11 (BSS load) and + * the various vendor-assigned IEs with unknown contents (128, 129, 133-136, + * 149, 150, 155, 156, 173, 176, 178, 179, 219); for forward compatibility + * it could also include some currently unused IDs. + * + * + * In addition to these capabilities, hardware should support notifying the + * host of changes in the beacon RSSI. This is relevant to implement roaming + * when no traffic is flowing (when traffic is flowing we see the RSSI of + * the received data packets). This can consist in notifying the host when + * the RSSI changes significantly or when it drops below or rises above + * configurable thresholds. In the future these thresholds will also be + * configured by mac80211 (which gets them from userspace) to implement + * them as the roaming algorithm requires. + * + * If the hardware cannot implement this, the driver should ask it to + * periodically pass beacon frames to the host so that software can do the + * signal strength threshold checking. */ /** -- cgit v1.2.3-70-g09d2 From 691597cb26f236ac7471f1adf925a134c86799d6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 19 Apr 2009 19:57:45 +0200 Subject: cfg80211/mac80211: move wext SIWMLME into cfg80211 Since we have ->deauth and ->disassoc we can support the wext SIWMLME call directly without driver wext handlers. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 3 +++ net/mac80211/cfg.c | 12 ++++-------- net/mac80211/mlme.c | 6 ------ net/mac80211/wext.c | 25 +------------------------ net/wireless/wext-compat.c | 46 +++++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 53 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d303c269a69..019a41efa0b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -833,6 +833,9 @@ int cfg80211_wext_siwscan(struct net_device *dev, int cfg80211_wext_giwscan(struct net_device *dev, struct iw_request_info *info, struct iw_point *data, char *extra); +int cfg80211_wext_siwmlme(struct net_device *dev, + struct iw_request_info *info, + struct iw_point *data, char *extra); int cfg80211_wext_giwrange(struct net_device *dev, struct iw_request_info *info, struct iw_point *data, char *extra); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 520efa8a079..daf75287e92 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1268,22 +1268,18 @@ static int ieee80211_assoc(struct wiphy *wiphy, struct net_device *dev, static int ieee80211_deauth(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_deauth_request *req) { - struct ieee80211_sub_if_data *sdata; - - sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - /* TODO: req->ie */ + /* TODO: req->ie, req->peer_addr */ return ieee80211_sta_deauthenticate(sdata, req->reason_code); } static int ieee80211_disassoc(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_disassoc_request *req) { - struct ieee80211_sub_if_data *sdata; - - sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - /* TODO: req->ie */ + /* TODO: req->ie, req->peer_addr */ return ieee80211_sta_disassociate(sdata, req->reason_code); } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a16c9d724be..428742d7f44 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2338,9 +2338,6 @@ int ieee80211_sta_deauthenticate(struct ieee80211_sub_if_data *sdata, u16 reason printk(KERN_DEBUG "%s: deauthenticating by local choice (reason=%d)\n", sdata->dev->name, reason); - if (sdata->vif.type != NL80211_IFTYPE_STATION) - return -EINVAL; - ieee80211_set_disassoc(sdata, true, true, reason); return 0; } @@ -2352,9 +2349,6 @@ int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason) printk(KERN_DEBUG "%s: disassociating by local choice (reason=%d)\n", sdata->dev->name, reason); - if (sdata->vif.type != NL80211_IFTYPE_STATION) - return -EINVAL; - if (!(ifmgd->flags & IEEE80211_STA_ASSOCIATED)) return -ENOLINK; diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 1c4664b8b1a..896704cf94e 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -612,29 +612,6 @@ static int ieee80211_ioctl_giwretry(struct net_device *dev, return 0; } -static int ieee80211_ioctl_siwmlme(struct net_device *dev, - struct iw_request_info *info, - struct iw_point *data, char *extra) -{ - struct ieee80211_sub_if_data *sdata; - struct iw_mlme *mlme = (struct iw_mlme *) extra; - - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (!(sdata->vif.type == NL80211_IFTYPE_STATION)) - return -EINVAL; - - switch (mlme->cmd) { - case IW_MLME_DEAUTH: - /* TODO: mlme->addr.sa_data */ - return ieee80211_sta_deauthenticate(sdata, mlme->reason_code); - case IW_MLME_DISASSOC: - /* TODO: mlme->addr.sa_data */ - return ieee80211_sta_disassociate(sdata, mlme->reason_code); - default: - return -EOPNOTSUPP; - } -} - static int ieee80211_ioctl_siwencode(struct net_device *dev, struct iw_request_info *info, @@ -1076,7 +1053,7 @@ static const iw_handler ieee80211_handler[] = (iw_handler) NULL, /* SIOCGIWTHRSPY */ (iw_handler) ieee80211_ioctl_siwap, /* SIOCSIWAP */ (iw_handler) ieee80211_ioctl_giwap, /* SIOCGIWAP */ - (iw_handler) ieee80211_ioctl_siwmlme, /* SIOCSIWMLME */ + (iw_handler) cfg80211_wext_siwmlme, /* SIOCSIWMLME */ (iw_handler) NULL, /* SIOCGIWAPLIST */ (iw_handler) cfg80211_wext_siwscan, /* SIOCSIWSCAN */ (iw_handler) cfg80211_wext_giwscan, /* SIOCGIWSCAN */ diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 0fd1db6e95b..6fd7bf7b448 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -206,7 +207,6 @@ int cfg80211_wext_giwrange(struct net_device *dev, range->enc_capa = IW_ENC_CAPA_WPA | IW_ENC_CAPA_WPA2 | IW_ENC_CAPA_CIPHER_TKIP | IW_ENC_CAPA_CIPHER_CCMP; - for (band = 0; band < IEEE80211_NUM_BANDS; band ++) { int i; struct ieee80211_supported_band *sband; @@ -241,3 +241,47 @@ int cfg80211_wext_giwrange(struct net_device *dev, return 0; } EXPORT_SYMBOL(cfg80211_wext_giwrange); + +int cfg80211_wext_siwmlme(struct net_device *dev, + struct iw_request_info *info, + struct iw_point *data, char *extra) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct iw_mlme *mlme = (struct iw_mlme *)extra; + struct cfg80211_registered_device *rdev; + union { + struct cfg80211_disassoc_request disassoc; + struct cfg80211_deauth_request deauth; + } cmd; + + if (!wdev) + return -EOPNOTSUPP; + + rdev = wiphy_to_dev(wdev->wiphy); + + if (wdev->iftype != NL80211_IFTYPE_STATION) + return -EINVAL; + + if (mlme->addr.sa_family != ARPHRD_ETHER) + return -EINVAL; + + memset(&cmd, 0, sizeof(cmd)); + + switch (mlme->cmd) { + case IW_MLME_DEAUTH: + if (!rdev->ops->deauth) + return -EOPNOTSUPP; + cmd.deauth.peer_addr = mlme->addr.sa_data; + cmd.deauth.reason_code = mlme->reason_code; + return rdev->ops->deauth(wdev->wiphy, dev, &cmd.deauth); + case IW_MLME_DISASSOC: + if (!rdev->ops->disassoc) + return -EOPNOTSUPP; + cmd.disassoc.peer_addr = mlme->addr.sa_data; + cmd.disassoc.reason_code = mlme->reason_code; + return rdev->ops->disassoc(wdev->wiphy, dev, &cmd.disassoc); + default: + return -EOPNOTSUPP; + } +} +EXPORT_SYMBOL(cfg80211_wext_siwmlme); -- cgit v1.2.3-70-g09d2 From 04a773ade0680d862b479d7219973df60f7a3834 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 19 Apr 2009 21:24:32 +0200 Subject: cfg80211/nl80211: add IBSS API This adds IBSS API along with (preliminary) wext handlers. The wext handlers can only do IBSS so you need to call them from your own wext handlers if the mode is IBSS. The nl80211 API requires * an SSID * a channel (frequency) for the case that a new IBSS has to be created It optionally supports * a flag to fix the channel * a fixed BSSID The cfg80211 code also takes care to leave the IBSS before the netdev is set down. If wireless extensions are used, it also caches values when the interface is down and instructs the driver to join when the interface is set up. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 18 +++ include/net/cfg80211.h | 72 +++++++++ include/net/wireless.h | 14 ++ net/wireless/Makefile | 2 +- net/wireless/core.c | 16 ++ net/wireless/core.h | 8 + net/wireless/ibss.c | 360 +++++++++++++++++++++++++++++++++++++++++++++ net/wireless/nl80211.c | 182 +++++++++++++++++++++-- net/wireless/nl80211.h | 4 + net/wireless/wext-compat.c | 30 ++++ 10 files changed, 693 insertions(+), 13 deletions(-) create mode 100644 net/wireless/ibss.c (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index c01423888db..25ce3e42bd1 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -223,6 +223,15 @@ * %NL80211_ATTR_KEY_SEQ to indicate the TSC value of the frame; this * event matches with MLME-MICHAELMICFAILURE.indication() primitive * + * @NL80211_CMD_JOIN_IBSS: Join a new IBSS -- given at least an SSID and a + * FREQ attribute (for the initial frequency if no peer can be found) + * and optionally a MAC (as BSSID) and FREQ_FIXED attribute if those + * should be fixed rather than automatically determined. Can only be + * executed on a network interface that is UP, and fixed BSSID/FREQ + * may be rejected. + * @NL80211_CMD_LEAVE_IBSS: Leave the IBSS -- no special arguments, the IBSS is + * determined by the network interface. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -288,6 +297,9 @@ enum nl80211_commands { NL80211_CMD_REG_BEACON_HINT, + NL80211_CMD_JOIN_IBSS, + NL80211_CMD_LEAVE_IBSS, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -456,6 +468,9 @@ enum nl80211_commands { * @NL80211_ATTR_CIPHER_SUITES: a set of u32 values indicating the supported * cipher suites * + * @NL80211_ATTR_FREQ_FIXED: a flag indicating the IBSS should not try to look + * for other networks on different channels + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -547,6 +562,9 @@ enum nl80211_attrs { NL80211_ATTR_FREQ_BEFORE, NL80211_ATTR_FREQ_AFTER, + + NL80211_ATTR_FREQ_FIXED, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 019a41efa0b..5287a3e56e7 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -657,6 +657,31 @@ struct cfg80211_disassoc_request { size_t ie_len; }; +/** + * struct cfg80211_ibss_params - IBSS parameters + * + * This structure defines the IBSS parameters for the join_ibss() + * method. + * + * @ssid: The SSID, will always be non-null. + * @ssid_len: The length of the SSID, will always be non-zero. + * @bssid: Fixed BSSID requested, maybe be %NULL, if set do not + * search for IBSSs with a different BSSID. + * @channel: The channel to use if no IBSS can be found to join. + * @channel_fixed: The channel should be fixed -- do not search for + * IBSSs to join on other channels. + * @ie: information element(s) to include in the beacon + * @ie_len: length of that + */ +struct cfg80211_ibss_params { + u8 *ssid; + u8 *bssid; + struct ieee80211_channel *channel; + u8 *ie; + u8 ssid_len, ie_len; + bool channel_fixed; +}; + /** * struct cfg80211_ops - backend description for wireless configuration * @@ -732,6 +757,11 @@ struct cfg80211_disassoc_request { * @assoc: Request to (re)associate with the specified peer * @deauth: Request to deauthenticate from the specified peer * @disassoc: Request to disassociate from the specified peer + * + * @join_ibss: Join the specified IBSS (or create if necessary). Once done, call + * cfg80211_ibss_joined(), also call that function when changing BSSID due + * to a merge. + * @leave_ibss: Leave the IBSS. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy); @@ -817,6 +847,10 @@ struct cfg80211_ops { struct cfg80211_deauth_request *req); int (*disassoc)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_disassoc_request *req); + + int (*join_ibss)(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_ibss_params *params); + int (*leave_ibss)(struct wiphy *wiphy, struct net_device *dev); }; /* temporary wext handlers */ @@ -839,6 +873,28 @@ int cfg80211_wext_siwmlme(struct net_device *dev, int cfg80211_wext_giwrange(struct net_device *dev, struct iw_request_info *info, struct iw_point *data, char *extra); +int cfg80211_ibss_wext_siwfreq(struct net_device *dev, + struct iw_request_info *info, + struct iw_freq *freq, char *extra); +int cfg80211_ibss_wext_giwfreq(struct net_device *dev, + struct iw_request_info *info, + struct iw_freq *freq, char *extra); +int cfg80211_ibss_wext_siwessid(struct net_device *dev, + struct iw_request_info *info, + struct iw_point *data, char *ssid); +int cfg80211_ibss_wext_giwessid(struct net_device *dev, + struct iw_request_info *info, + struct iw_point *data, char *ssid); +int cfg80211_ibss_wext_siwap(struct net_device *dev, + struct iw_request_info *info, + struct sockaddr *ap_addr, char *extra); +int cfg80211_ibss_wext_giwap(struct net_device *dev, + struct iw_request_info *info, + struct sockaddr *ap_addr, char *extra); + +/* wext helper for now (to be removed) */ +struct ieee80211_channel *cfg80211_wext_freq(struct wiphy *wiphy, + struct iw_freq *freq); /** * cfg80211_scan_done - notify that scan finished @@ -984,4 +1040,20 @@ void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr, enum nl80211_key_type key_type, int key_id, const u8 *tsc); +/** + * cfg80211_ibss_joined - notify cfg80211 that device joined an IBSS + * + * @dev: network device + * @bssid: the BSSID of the IBSS joined + * @gfp: allocation flags + * + * This function notifies cfg80211 that the device joined an IBSS or + * switched to a different BSSID. Before this function can be called, + * either a beacon has to have been received from the IBSS, or one of + * the cfg80211_inform_bss{,_frame} functions must have been called + * with the locally generated beacon -- this guarantees that there is + * always a scan result for this IBSS. cfg80211 will handle the rest. + */ +void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, gfp_t gfp); + #endif /* __NET_CFG80211_H */ diff --git a/include/net/wireless.h b/include/net/wireless.h index 44c2642d3c0..abd27b03333 100644 --- a/include/net/wireless.h +++ b/include/net/wireless.h @@ -265,6 +265,8 @@ struct wiphy { * * @wiphy: pointer to hardware description * @iftype: interface type + * @list: (private) + * @netdev (private) */ struct wireless_dev { struct wiphy *wiphy; @@ -273,6 +275,18 @@ struct wireless_dev { /* private to the generic wireless code */ struct list_head list; struct net_device *netdev; + + /* currently used for IBSS - might be rearranged in the future */ + struct cfg80211_bss *current_bss; + u8 bssid[ETH_ALEN]; + u8 ssid[IEEE80211_MAX_SSID_LEN]; + u8 ssid_len; + +#ifdef CONFIG_WIRELESS_EXT + /* wext data */ + struct cfg80211_ibss_params wext; + u8 wext_bssid[ETH_ALEN]; +#endif }; /** diff --git a/net/wireless/Makefile b/net/wireless/Makefile index 6d1e7b27b75..14ea01c4a10 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_LIB80211_CRYPT_WEP) += lib80211_crypt_wep.o obj-$(CONFIG_LIB80211_CRYPT_CCMP) += lib80211_crypt_ccmp.o obj-$(CONFIG_LIB80211_CRYPT_TKIP) += lib80211_crypt_tkip.o -cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o scan.o nl80211.o mlme.o +cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o scan.o nl80211.o mlme.o ibss.o cfg80211-$(CONFIG_WIRELESS_EXT) += wext-compat.o ccflags-y += -D__CHECK_ENDIAN__ diff --git a/net/wireless/core.c b/net/wireless/core.c index d1f556535f6..de1ac51ae4e 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -450,6 +450,22 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, dev->ieee80211_ptr->netdev = dev; mutex_unlock(&rdev->devlist_mtx); break; + case NETDEV_GOING_DOWN: + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC) + break; + if (!dev->ieee80211_ptr->ssid_len) + break; + cfg80211_leave_ibss(rdev, dev); + break; + case NETDEV_UP: +#ifdef CONFIG_WIRELESS_EXT + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC) + break; + if (!dev->ieee80211_ptr->wext.ssid_len) + break; + cfg80211_join_ibss(rdev, dev, &dev->ieee80211_ptr->wext); + break; +#endif case NETDEV_UNREGISTER: mutex_lock(&rdev->devlist_mtx); if (!list_empty(&dev->ieee80211_ptr->list)) { diff --git a/net/wireless/core.h b/net/wireless/core.h index 02668b02e33..2ef3595fd6e 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -144,4 +144,12 @@ void cfg80211_bss_expire(struct cfg80211_registered_device *dev); void cfg80211_bss_age(struct cfg80211_registered_device *dev, unsigned long age_secs); +/* IBSS */ +int cfg80211_join_ibss(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_ibss_params *params); +void cfg80211_clear_ibss(struct net_device *dev); +int cfg80211_leave_ibss(struct cfg80211_registered_device *rdev, + struct net_device *dev); + #endif /* __NET_WIRELESS_CORE_H */ diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c new file mode 100644 index 00000000000..2bf42fdef3a --- /dev/null +++ b/net/wireless/ibss.c @@ -0,0 +1,360 @@ +/* + * Some IBSS support code for cfg80211. + * + * Copyright 2009 Johannes Berg + */ + +#include +#include +#include +#include +#include "nl80211.h" + + +void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, gfp_t gfp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_bss *bss; +#ifdef CONFIG_WIRELESS_EXT + union iwreq_data wrqu; +#endif + + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_ADHOC)) + return; + + if (WARN_ON(!wdev->ssid_len)) + return; + + if (memcmp(bssid, wdev->bssid, ETH_ALEN) == 0) + return; + + bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid, + wdev->ssid, wdev->ssid_len, + WLAN_CAPABILITY_IBSS, WLAN_CAPABILITY_IBSS); + + if (WARN_ON(!bss)) + return; + + if (wdev->current_bss) { + cfg80211_unhold_bss(wdev->current_bss); + cfg80211_put_bss(wdev->current_bss); + } + + cfg80211_hold_bss(bss); + wdev->current_bss = bss; + memcpy(wdev->bssid, bssid, ETH_ALEN); + + nl80211_send_ibss_bssid(wiphy_to_dev(wdev->wiphy), dev, bssid, gfp); +#ifdef CONFIG_WIRELESS_EXT + memset(&wrqu, 0, sizeof(wrqu)); + memcpy(wrqu.ap_addr.sa_data, bssid, ETH_ALEN); + wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL); +#endif +} +EXPORT_SYMBOL(cfg80211_ibss_joined); + +int cfg80211_join_ibss(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_ibss_params *params) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + int err; + + if (wdev->ssid_len) + return -EALREADY; + +#ifdef CONFIG_WIRELESS_EXT + wdev->wext.channel = params->channel; +#endif + err = rdev->ops->join_ibss(&rdev->wiphy, dev, params); + + if (err) + return err; + + memcpy(wdev->ssid, params->ssid, params->ssid_len); + wdev->ssid_len = params->ssid_len; + + return 0; +} + +void cfg80211_clear_ibss(struct net_device *dev) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + + if (wdev->current_bss) { + cfg80211_unhold_bss(wdev->current_bss); + cfg80211_put_bss(wdev->current_bss); + } + + wdev->current_bss = NULL; + wdev->ssid_len = 0; + memset(wdev->bssid, 0, ETH_ALEN); +} + +int cfg80211_leave_ibss(struct cfg80211_registered_device *rdev, + struct net_device *dev) +{ + int err; + + err = rdev->ops->leave_ibss(&rdev->wiphy, dev); + + if (err) + return err; + + cfg80211_clear_ibss(dev); + + return 0; +} + +#ifdef CONFIG_WIRELESS_EXT +static int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev) +{ + enum ieee80211_band band; + int i; + + /* try to find an IBSS channel if none requested ... */ + if (!wdev->wext.channel) { + for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + struct ieee80211_supported_band *sband; + struct ieee80211_channel *chan; + + sband = rdev->wiphy.bands[band]; + if (!sband) + continue; + + for (i = 0; i < sband->n_channels; i++) { + chan = &sband->channels[i]; + if (chan->flags & IEEE80211_CHAN_NO_IBSS) + continue; + if (chan->flags & IEEE80211_CHAN_DISABLED) + continue; + wdev->wext.channel = chan; + break; + } + + if (wdev->wext.channel) + break; + } + + if (!wdev->wext.channel) + return -EINVAL; + } + + /* don't join -- SSID is not there */ + if (!wdev->wext.ssid_len) + return 0; + + if (!netif_running(wdev->netdev)) + return 0; + + return cfg80211_join_ibss(wiphy_to_dev(wdev->wiphy), + wdev->netdev, &wdev->wext); +} + +int cfg80211_ibss_wext_siwfreq(struct net_device *dev, + struct iw_request_info *info, + struct iw_freq *freq, char *extra) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct ieee80211_channel *chan; + int err; + + /* call only for ibss! */ + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_ADHOC)) + return -EINVAL; + + if (!wiphy_to_dev(wdev->wiphy)->ops->join_ibss) + return -EOPNOTSUPP; + + chan = cfg80211_wext_freq(wdev->wiphy, freq); + if (chan && IS_ERR(chan)) + return PTR_ERR(chan); + + if (chan && + (chan->flags & IEEE80211_CHAN_NO_IBSS || + chan->flags & IEEE80211_CHAN_DISABLED)) + return -EINVAL; + + if (wdev->wext.channel == chan) + return 0; + + if (wdev->ssid_len) { + err = cfg80211_leave_ibss(wiphy_to_dev(wdev->wiphy), dev); + if (err) + return err; + } + + if (chan) { + wdev->wext.channel = chan; + wdev->wext.channel_fixed = true; + } else { + /* cfg80211_ibss_wext_join will pick one if needed */ + wdev->wext.channel_fixed = false; + } + + return cfg80211_ibss_wext_join(wiphy_to_dev(wdev->wiphy), wdev); +} +/* temporary symbol - mark GPL - in the future the handler won't be */ +EXPORT_SYMBOL_GPL(cfg80211_ibss_wext_siwfreq); + +int cfg80211_ibss_wext_giwfreq(struct net_device *dev, + struct iw_request_info *info, + struct iw_freq *freq, char *extra) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct ieee80211_channel *chan = NULL; + + /* call only for ibss! */ + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_ADHOC)) + return -EINVAL; + + if (wdev->current_bss) + chan = wdev->current_bss->channel; + else if (wdev->wext.channel) + chan = wdev->wext.channel; + + if (chan) { + freq->m = chan->center_freq; + freq->e = 6; + return 0; + } + + /* no channel if not joining */ + return -EINVAL; +} +/* temporary symbol - mark GPL - in the future the handler won't be */ +EXPORT_SYMBOL_GPL(cfg80211_ibss_wext_giwfreq); + +int cfg80211_ibss_wext_siwessid(struct net_device *dev, + struct iw_request_info *info, + struct iw_point *data, char *ssid) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + size_t len = data->length; + int err; + + /* call only for ibss! */ + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_ADHOC)) + return -EINVAL; + + if (!wiphy_to_dev(wdev->wiphy)->ops->join_ibss) + return -EOPNOTSUPP; + + if (wdev->ssid_len) { + err = cfg80211_leave_ibss(wiphy_to_dev(wdev->wiphy), dev); + if (err) + return err; + } + + /* iwconfig uses nul termination in SSID.. */ + if (len > 0 && ssid[len - 1] == '\0') + len--; + + wdev->wext.ssid = wdev->ssid; + memcpy(wdev->wext.ssid, ssid, len); + wdev->wext.ssid_len = len; + + return cfg80211_ibss_wext_join(wiphy_to_dev(wdev->wiphy), wdev); +} +/* temporary symbol - mark GPL - in the future the handler won't be */ +EXPORT_SYMBOL_GPL(cfg80211_ibss_wext_siwessid); + +int cfg80211_ibss_wext_giwessid(struct net_device *dev, + struct iw_request_info *info, + struct iw_point *data, char *ssid) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + + /* call only for ibss! */ + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_ADHOC)) + return -EINVAL; + + data->flags = 0; + + if (wdev->ssid_len) { + data->flags = 1; + data->length = wdev->ssid_len; + memcpy(ssid, wdev->ssid, data->length); + } else if (wdev->wext.ssid) { + data->flags = 1; + data->length = wdev->wext.ssid_len; + memcpy(ssid, wdev->wext.ssid, data->length); + } + + return 0; +} +/* temporary symbol - mark GPL - in the future the handler won't be */ +EXPORT_SYMBOL_GPL(cfg80211_ibss_wext_giwessid); + +int cfg80211_ibss_wext_siwap(struct net_device *dev, + struct iw_request_info *info, + struct sockaddr *ap_addr, char *extra) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + u8 *bssid = ap_addr->sa_data; + int err; + + /* call only for ibss! */ + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_ADHOC)) + return -EINVAL; + + if (!wiphy_to_dev(wdev->wiphy)->ops->join_ibss) + return -EOPNOTSUPP; + + if (ap_addr->sa_family != ARPHRD_ETHER) + return -EINVAL; + + /* automatic mode */ + if (is_zero_ether_addr(bssid) || is_broadcast_ether_addr(bssid)) + bssid = NULL; + + /* both automatic */ + if (!bssid && !wdev->wext.bssid) + return 0; + + /* fixed already - and no change */ + if (wdev->wext.bssid && bssid && + compare_ether_addr(bssid, wdev->wext.bssid) == 0) + return 0; + + if (wdev->ssid_len) { + err = cfg80211_leave_ibss(wiphy_to_dev(wdev->wiphy), dev); + if (err) + return err; + } + + if (bssid) { + memcpy(wdev->wext_bssid, bssid, ETH_ALEN); + wdev->wext.bssid = wdev->wext_bssid; + } else + wdev->wext.bssid = NULL; + + return cfg80211_ibss_wext_join(wiphy_to_dev(wdev->wiphy), wdev); +} +/* temporary symbol - mark GPL - in the future the handler won't be */ +EXPORT_SYMBOL_GPL(cfg80211_ibss_wext_siwap); + +int cfg80211_ibss_wext_giwap(struct net_device *dev, + struct iw_request_info *info, + struct sockaddr *ap_addr, char *extra) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + + /* call only for ibss! */ + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_ADHOC)) + return -EINVAL; + + ap_addr->sa_family = ARPHRD_ETHER; + + if (wdev->wext.bssid) { + memcpy(ap_addr->sa_data, wdev->wext.bssid, ETH_ALEN); + return 0; + } + + memcpy(ap_addr->sa_data, wdev->bssid, ETH_ALEN); + return 0; +} +/* temporary symbol - mark GPL - in the future the handler won't be */ +EXPORT_SYMBOL_GPL(cfg80211_ibss_wext_giwap); +#endif diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d2cfde659e7..16f86356ac9 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -116,6 +116,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { .len = IEEE80211_MAX_SSID_LEN }, [NL80211_ATTR_AUTH_TYPE] = { .type = NLA_U32 }, [NL80211_ATTR_REASON_CODE] = { .type = NLA_U16 }, + [NL80211_ATTR_FREQ_FIXED] = { .type = NLA_FLAG }, }; /* IE validation */ @@ -322,6 +323,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, CMD(assoc, ASSOCIATE); CMD(deauth, DEAUTHENTICATE); CMD(disassoc, DISASSOCIATE); + CMD(join_ibss, JOIN_IBSS); #undef CMD nla_nest_end(msg, nl_cmds); @@ -668,7 +670,7 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *drv; struct vif_params params; int err, ifindex; - enum nl80211_iftype type; + enum nl80211_iftype otype, ntype; struct net_device *dev; u32 _flags, *flags = NULL; bool change = false; @@ -682,30 +684,27 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) goto unlock_rtnl; ifindex = dev->ifindex; - type = dev->ieee80211_ptr->iftype; + otype = ntype = dev->ieee80211_ptr->iftype; dev_put(dev); if (info->attrs[NL80211_ATTR_IFTYPE]) { - enum nl80211_iftype ntype; - ntype = nla_get_u32(info->attrs[NL80211_ATTR_IFTYPE]); - if (type != ntype) + if (otype != ntype) change = true; - type = ntype; - if (type > NL80211_IFTYPE_MAX) { + if (ntype > NL80211_IFTYPE_MAX) { err = -EINVAL; goto unlock; } } if (!drv->ops->change_virtual_intf || - !(drv->wiphy.interface_modes & (1 << type))) { + !(drv->wiphy.interface_modes & (1 << ntype))) { err = -EOPNOTSUPP; goto unlock; } if (info->attrs[NL80211_ATTR_MESH_ID]) { - if (type != NL80211_IFTYPE_MESH_POINT) { + if (ntype != NL80211_IFTYPE_MESH_POINT) { err = -EINVAL; goto unlock; } @@ -715,7 +714,7 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) } if (info->attrs[NL80211_ATTR_MNTR_FLAGS]) { - if (type != NL80211_IFTYPE_MONITOR) { + if (ntype != NL80211_IFTYPE_MONITOR) { err = -EINVAL; goto unlock; } @@ -730,12 +729,17 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) if (change) err = drv->ops->change_virtual_intf(&drv->wiphy, ifindex, - type, flags, ¶ms); + ntype, flags, ¶ms); else err = 0; dev = __dev_get_by_index(&init_net, ifindex); - WARN_ON(!dev || (!err && dev->ieee80211_ptr->iftype != type)); + WARN_ON(!dev || (!err && dev->ieee80211_ptr->iftype != ntype)); + + if (dev && !err && (ntype != otype)) { + if (otype == NL80211_IFTYPE_ADHOC) + cfg80211_clear_ibss(dev); + } unlock: cfg80211_put_dev(drv); @@ -3052,6 +3056,114 @@ unlock_rtnl: return err; } +static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *drv; + struct net_device *dev; + struct cfg80211_ibss_params ibss; + struct wiphy *wiphy; + int err; + + if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE])) + return -EINVAL; + + if (!info->attrs[NL80211_ATTR_WIPHY_FREQ] || + !info->attrs[NL80211_ATTR_SSID] || + !nla_len(info->attrs[NL80211_ATTR_SSID])) + return -EINVAL; + + rtnl_lock(); + + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); + if (err) + goto unlock_rtnl; + + if (!drv->ops->join_ibss) { + err = -EOPNOTSUPP; + goto out; + } + + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC) { + err = -EOPNOTSUPP; + goto out; + } + + if (!netif_running(dev)) { + err = -ENETDOWN; + goto out; + } + + wiphy = &drv->wiphy; + memset(&ibss, 0, sizeof(ibss)); + + if (info->attrs[NL80211_ATTR_MAC]) + ibss.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); + ibss.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); + ibss.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); + + if (info->attrs[NL80211_ATTR_IE]) { + ibss.ie = nla_data(info->attrs[NL80211_ATTR_IE]); + ibss.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); + } + + ibss.channel = ieee80211_get_channel(wiphy, + nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ])); + if (!ibss.channel || + ibss.channel->flags & IEEE80211_CHAN_NO_IBSS || + ibss.channel->flags & IEEE80211_CHAN_DISABLED) { + err = -EINVAL; + goto out; + } + + ibss.channel_fixed = !!info->attrs[NL80211_ATTR_FREQ_FIXED]; + + err = cfg80211_join_ibss(drv, dev, &ibss); + +out: + cfg80211_put_dev(drv); + dev_put(dev); +unlock_rtnl: + rtnl_unlock(); + return err; +} + +static int nl80211_leave_ibss(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *drv; + struct net_device *dev; + int err; + + rtnl_lock(); + + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); + if (err) + goto unlock_rtnl; + + if (!drv->ops->leave_ibss) { + err = -EOPNOTSUPP; + goto out; + } + + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC) { + err = -EOPNOTSUPP; + goto out; + } + + if (!netif_running(dev)) { + err = -ENETDOWN; + goto out; + } + + err = cfg80211_leave_ibss(drv, dev); + +out: + cfg80211_put_dev(drv); + dev_put(dev); +unlock_rtnl: + rtnl_unlock(); + return err; +} + static struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_WIPHY, @@ -3253,6 +3365,18 @@ static struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, }, + { + .cmd = NL80211_CMD_JOIN_IBSS, + .doit = nl80211_join_ibss, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = NL80211_CMD_LEAVE_IBSS, + .doit = nl80211_leave_ibss, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, }; static struct genl_multicast_group nl80211_mlme_mcgrp = { .name = "mlme", @@ -3466,6 +3590,40 @@ void nl80211_send_disassoc(struct cfg80211_registered_device *rdev, NL80211_CMD_DISASSOCIATE); } +void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *bssid, + gfp_t gfp) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_GOODSIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_JOIN_IBSS); + if (!hdr) { + nlmsg_free(msg); + return; + } + + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx); + NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex); + NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid); + + if (genlmsg_end(msg, hdr) < 0) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp); + return; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); +} + void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *addr, enum nl80211_key_type key_type, int key_id, diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index b3aaa59afa0..17d2d8bfaf7 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -34,4 +34,8 @@ nl80211_send_beacon_hint_event(struct wiphy *wiphy, struct ieee80211_channel *channel_before, struct ieee80211_channel *channel_after); +void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *bssid, + gfp_t gfp); + #endif /* __NET_WIRELESS_NL80211_H */ diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 6fd7bf7b448..57eaea26b67 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -285,3 +285,33 @@ int cfg80211_wext_siwmlme(struct net_device *dev, } } EXPORT_SYMBOL(cfg80211_wext_siwmlme); + + +/** + * cfg80211_wext_freq - get wext frequency for non-"auto" + * @wiphy: the wiphy + * @freq: the wext freq encoding + * + * Returns a channel, %NULL for auto, or an ERR_PTR for errors! + */ +struct ieee80211_channel *cfg80211_wext_freq(struct wiphy *wiphy, + struct iw_freq *freq) +{ + if (freq->e == 0) { + if (freq->m < 0) + return NULL; + else + return ieee80211_get_channel(wiphy, + ieee80211_channel_to_frequency(freq->m)); + } else { + int i, div = 1000000; + for (i = 0; i < freq->e; i++) + div /= 10; + if (div > 0) + return ieee80211_get_channel(wiphy, freq->m / div); + else + return ERR_PTR(-EINVAL); + } + +} +EXPORT_SYMBOL(cfg80211_wext_freq); -- cgit v1.2.3-70-g09d2 From d323655372590c533c275b1d798f9d1221efb5c6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 20 Apr 2009 14:31:42 +0200 Subject: cfg80211: clean up includes Trying to separate header files into net/wireless.h and net/cfg80211.h has been a source of confusion. Remove net/wireless.h (because there also is the linux/wireless.h) and subsume everything into net/cfg80211.h -- except the definitions for regulatory structures which get moved to a new header net/regulatory.h. The "new" net/cfg80211.h is now divided into sections. There are no real changes in this patch but code shuffling and some very minor documentation fixes. I have also, to make things reflect reality, put in a copyright line for Luis to net/regulatory.h since that is probably exclusively written by him but was formerly in a file that only had my copyright line. Signed-off-by: Johannes Berg Cc: Luis R. Rodriguez Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ar9170/ar9170.h | 2 +- drivers/net/wireless/ath/ar9170/usb.h | 2 +- drivers/net/wireless/ath/ath9k/eeprom.h | 2 +- drivers/net/wireless/ath/regd.c | 1 - drivers/net/wireless/ath/regd.h | 1 - drivers/net/wireless/rndis_wlan.c | 1 - include/net/cfg80211.h | 709 +++++++++++++++++++++++++------ include/net/mac80211.h | 1 - include/net/regulatory.h | 101 +++++ include/net/wireless.h | 492 --------------------- net/mac80211/ht.c | 1 - net/mac80211/ieee80211_i.h | 1 - net/mac80211/spectmgmt.c | 2 +- net/wireless/core.c | 1 - net/wireless/core.h | 1 - net/wireless/ibss.c | 1 - net/wireless/reg.c | 1 - net/wireless/util.c | 6 +- net/wireless/wext-compat.c | 1 - 19 files changed, 690 insertions(+), 637 deletions(-) create mode 100644 include/net/regulatory.h delete mode 100644 include/net/wireless.h (limited to 'include') diff --git a/drivers/net/wireless/ath/ar9170/ar9170.h b/drivers/net/wireless/ath/ar9170/ar9170.h index f797495faa5..2522a190fdf 100644 --- a/drivers/net/wireless/ath/ar9170/ar9170.h +++ b/drivers/net/wireless/ath/ar9170/ar9170.h @@ -40,7 +40,7 @@ #include #include -#include +#include #include #ifdef CONFIG_AR9170_LEDS #include diff --git a/drivers/net/wireless/ath/ar9170/usb.h b/drivers/net/wireless/ath/ar9170/usb.h index f5852924cd6..ac42586495d 100644 --- a/drivers/net/wireless/ath/ar9170/usb.h +++ b/drivers/net/wireless/ath/ar9170/usb.h @@ -43,7 +43,7 @@ #include #include #include -#include +#include #include #include #include "eeprom.h" diff --git a/drivers/net/wireless/ath/ath9k/eeprom.h b/drivers/net/wireless/ath/ath9k/eeprom.h index 9a7715df5cf..7c59dc47f91 100644 --- a/drivers/net/wireless/ath/ath9k/eeprom.h +++ b/drivers/net/wireless/ath/ath9k/eeprom.h @@ -17,7 +17,7 @@ #ifndef EEPROM_H #define EEPROM_H -#include +#include #define AH_USE_EEPROM 0x1 diff --git a/drivers/net/wireless/ath/regd.c b/drivers/net/wireless/ath/regd.c index 526c7f1308d..fdf07c82208 100644 --- a/drivers/net/wireless/ath/regd.c +++ b/drivers/net/wireless/ath/regd.c @@ -18,7 +18,6 @@ #include #include #include -#include #include "regd.h" #include "regd_common.h" diff --git a/drivers/net/wireless/ath/regd.h b/drivers/net/wireless/ath/regd.h index eba7c7123b5..07291ccb23f 100644 --- a/drivers/net/wireless/ath/regd.h +++ b/drivers/net/wireless/ath/regd.h @@ -20,7 +20,6 @@ #include #include -#include #define NO_CTL 0xff #define SD_NO_CTL 0xE0 diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c index 239e6a14eae..43e0ba61df2 100644 --- a/drivers/net/wireless/rndis_wlan.c +++ b/drivers/net/wireless/rndis_wlan.c @@ -42,7 +42,6 @@ #include #include #include -#include #include #include #include diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 5287a3e56e7..601eac64b02 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1,70 +1,217 @@ #ifndef __NET_CFG80211_H #define __NET_CFG80211_H +/* + * 802.11 device and configuration interface + * + * Copyright 2006-2009 Johannes Berg + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include #include #include #include #include #include -#include -#include -#include +#include + /* remove once we remove the wext stuff */ +#include +#include + /* - * 802.11 configuration in-kernel interface + * wireless hardware capability structures + */ + +/** + * enum ieee80211_band - supported frequency bands + * + * The bands are assigned this way because the supported + * bitrates differ in these bands. * - * Copyright 2006, 2007 Johannes Berg + * @IEEE80211_BAND_2GHZ: 2.4GHz ISM band + * @IEEE80211_BAND_5GHZ: around 5GHz band (4.9-5.7) */ +enum ieee80211_band { + IEEE80211_BAND_2GHZ, + IEEE80211_BAND_5GHZ, + + /* keep last */ + IEEE80211_NUM_BANDS +}; /** - * struct vif_params - describes virtual interface parameters - * @mesh_id: mesh ID to use - * @mesh_id_len: length of the mesh ID + * enum ieee80211_channel_flags - channel flags + * + * Channel flags set by the regulatory control code. + * + * @IEEE80211_CHAN_DISABLED: This channel is disabled. + * @IEEE80211_CHAN_PASSIVE_SCAN: Only passive scanning is permitted + * on this channel. + * @IEEE80211_CHAN_NO_IBSS: IBSS is not allowed on this channel. + * @IEEE80211_CHAN_RADAR: Radar detection is required on this channel. + * @IEEE80211_CHAN_NO_FAT_ABOVE: extension channel above this channel + * is not permitted. + * @IEEE80211_CHAN_NO_FAT_BELOW: extension channel below this channel + * is not permitted. */ -struct vif_params { - u8 *mesh_id; - int mesh_id_len; +enum ieee80211_channel_flags { + IEEE80211_CHAN_DISABLED = 1<<0, + IEEE80211_CHAN_PASSIVE_SCAN = 1<<1, + IEEE80211_CHAN_NO_IBSS = 1<<2, + IEEE80211_CHAN_RADAR = 1<<3, + IEEE80211_CHAN_NO_FAT_ABOVE = 1<<4, + IEEE80211_CHAN_NO_FAT_BELOW = 1<<5, }; -/* Radiotap header iteration - * implemented in net/wireless/radiotap.c - * docs in Documentation/networking/radiotap-headers.txt +/** + * struct ieee80211_channel - channel definition + * + * This structure describes a single channel for use + * with cfg80211. + * + * @center_freq: center frequency in MHz + * @max_bandwidth: maximum allowed bandwidth for this channel, in MHz + * @hw_value: hardware-specific value for the channel + * @flags: channel flags from &enum ieee80211_channel_flags. + * @orig_flags: channel flags at registration time, used by regulatory + * code to support devices with additional restrictions + * @band: band this channel belongs to. + * @max_antenna_gain: maximum antenna gain in dBi + * @max_power: maximum transmission power (in dBm) + * @beacon_found: helper to regulatory code to indicate when a beacon + * has been found on this channel. Use regulatory_hint_found_beacon() + * to enable this, this is is useful only on 5 GHz band. + * @orig_mag: internal use + * @orig_mpwr: internal use */ +struct ieee80211_channel { + enum ieee80211_band band; + u16 center_freq; + u8 max_bandwidth; + u16 hw_value; + u32 flags; + int max_antenna_gain; + int max_power; + bool beacon_found; + u32 orig_flags; + int orig_mag, orig_mpwr; +}; + /** - * struct ieee80211_radiotap_iterator - tracks walk thru present radiotap args - * @rtheader: pointer to the radiotap header we are walking through - * @max_length: length of radiotap header in cpu byte ordering - * @this_arg_index: IEEE80211_RADIOTAP_... index of current arg - * @this_arg: pointer to current radiotap arg - * @arg_index: internal next argument index - * @arg: internal next argument pointer - * @next_bitmap: internal pointer to next present u32 - * @bitmap_shifter: internal shifter for curr u32 bitmap, b0 set == arg present + * enum ieee80211_rate_flags - rate flags + * + * Hardware/specification flags for rates. These are structured + * in a way that allows using the same bitrate structure for + * different bands/PHY modes. + * + * @IEEE80211_RATE_SHORT_PREAMBLE: Hardware can send with short + * preamble on this bitrate; only relevant in 2.4GHz band and + * with CCK rates. + * @IEEE80211_RATE_MANDATORY_A: This bitrate is a mandatory rate + * when used with 802.11a (on the 5 GHz band); filled by the + * core code when registering the wiphy. + * @IEEE80211_RATE_MANDATORY_B: This bitrate is a mandatory rate + * when used with 802.11b (on the 2.4 GHz band); filled by the + * core code when registering the wiphy. + * @IEEE80211_RATE_MANDATORY_G: This bitrate is a mandatory rate + * when used with 802.11g (on the 2.4 GHz band); filled by the + * core code when registering the wiphy. + * @IEEE80211_RATE_ERP_G: This is an ERP rate in 802.11g mode. */ +enum ieee80211_rate_flags { + IEEE80211_RATE_SHORT_PREAMBLE = 1<<0, + IEEE80211_RATE_MANDATORY_A = 1<<1, + IEEE80211_RATE_MANDATORY_B = 1<<2, + IEEE80211_RATE_MANDATORY_G = 1<<3, + IEEE80211_RATE_ERP_G = 1<<4, +}; -struct ieee80211_radiotap_iterator { - struct ieee80211_radiotap_header *rtheader; - int max_length; - int this_arg_index; - u8 *this_arg; +/** + * struct ieee80211_rate - bitrate definition + * + * This structure describes a bitrate that an 802.11 PHY can + * operate with. The two values @hw_value and @hw_value_short + * are only for driver use when pointers to this structure are + * passed around. + * + * @flags: rate-specific flags + * @bitrate: bitrate in units of 100 Kbps + * @hw_value: driver/hardware value for this rate + * @hw_value_short: driver/hardware value for this rate when + * short preamble is used + */ +struct ieee80211_rate { + u32 flags; + u16 bitrate; + u16 hw_value, hw_value_short; +}; - int arg_index; - u8 *arg; - __le32 *next_bitmap; - u32 bitmap_shifter; +/** + * struct ieee80211_sta_ht_cap - STA's HT capabilities + * + * This structure describes most essential parameters needed + * to describe 802.11n HT capabilities for an STA. + * + * @ht_supported: is HT supported by the STA + * @cap: HT capabilities map as described in 802.11n spec + * @ampdu_factor: Maximum A-MPDU length factor + * @ampdu_density: Minimum A-MPDU spacing + * @mcs: Supported MCS rates + */ +struct ieee80211_sta_ht_cap { + u16 cap; /* use IEEE80211_HT_CAP_ */ + bool ht_supported; + u8 ampdu_factor; + u8 ampdu_density; + struct ieee80211_mcs_info mcs; }; -extern int ieee80211_radiotap_iterator_init( - struct ieee80211_radiotap_iterator *iterator, - struct ieee80211_radiotap_header *radiotap_header, - int max_length); +/** + * struct ieee80211_supported_band - frequency band definition + * + * This structure describes a frequency band a wiphy + * is able to operate in. + * + * @channels: Array of channels the hardware can operate in + * in this band. + * @band: the band this structure represents + * @n_channels: Number of channels in @channels + * @bitrates: Array of bitrates the hardware can operate with + * in this band. Must be sorted to give a valid "supported + * rates" IE, i.e. CCK rates first, then OFDM. + * @n_bitrates: Number of bitrates in @bitrates + */ +struct ieee80211_supported_band { + struct ieee80211_channel *channels; + struct ieee80211_rate *bitrates; + enum ieee80211_band band; + int n_channels; + int n_bitrates; + struct ieee80211_sta_ht_cap ht_cap; +}; -extern int ieee80211_radiotap_iterator_next( - struct ieee80211_radiotap_iterator *iterator); +/* + * Wireless hardware/device configuration structures and methods + */ +/** + * struct vif_params - describes virtual interface parameters + * @mesh_id: mesh ID to use + * @mesh_id_len: length of the mesh ID + */ +struct vif_params { + u8 *mesh_id; + int mesh_id_len; +}; - /** +/** * struct key_params - key information * * Information about a key @@ -347,92 +494,6 @@ struct bss_parameters { u8 basic_rates_len; }; -/** - * enum environment_cap - Environment parsed from country IE - * @ENVIRON_ANY: indicates country IE applies to both indoor and - * outdoor operation. - * @ENVIRON_INDOOR: indicates country IE applies only to indoor operation - * @ENVIRON_OUTDOOR: indicates country IE applies only to outdoor operation - */ -enum environment_cap { - ENVIRON_ANY, - ENVIRON_INDOOR, - ENVIRON_OUTDOOR, -}; - -/** - * struct regulatory_request - used to keep track of regulatory requests - * - * @wiphy_idx: this is set if this request's initiator is - * %REGDOM_SET_BY_COUNTRY_IE or %REGDOM_SET_BY_DRIVER. This - * can be used by the wireless core to deal with conflicts - * and potentially inform users of which devices specifically - * cased the conflicts. - * @initiator: indicates who sent this request, could be any of - * of those set in nl80211_reg_initiator (%NL80211_REGDOM_SET_BY_*) - * @alpha2: the ISO / IEC 3166 alpha2 country code of the requested - * regulatory domain. We have a few special codes: - * 00 - World regulatory domain - * 99 - built by driver but a specific alpha2 cannot be determined - * 98 - result of an intersection between two regulatory domains - * @intersect: indicates whether the wireless core should intersect - * the requested regulatory domain with the presently set regulatory - * domain. - * @country_ie_checksum: checksum of the last processed and accepted - * country IE - * @country_ie_env: lets us know if the AP is telling us we are outdoor, - * indoor, or if it doesn't matter - * @list: used to insert into the reg_requests_list linked list - */ -struct regulatory_request { - int wiphy_idx; - enum nl80211_reg_initiator initiator; - char alpha2[2]; - bool intersect; - u32 country_ie_checksum; - enum environment_cap country_ie_env; - struct list_head list; -}; - -struct ieee80211_freq_range { - u32 start_freq_khz; - u32 end_freq_khz; - u32 max_bandwidth_khz; -}; - -struct ieee80211_power_rule { - u32 max_antenna_gain; - u32 max_eirp; -}; - -struct ieee80211_reg_rule { - struct ieee80211_freq_range freq_range; - struct ieee80211_power_rule power_rule; - u32 flags; -}; - -struct ieee80211_regdomain { - u32 n_reg_rules; - char alpha2[2]; - struct ieee80211_reg_rule reg_rules[]; -}; - -#define MHZ_TO_KHZ(freq) ((freq) * 1000) -#define KHZ_TO_MHZ(freq) ((freq) / 1000) -#define DBI_TO_MBI(gain) ((gain) * 100) -#define MBI_TO_DBI(gain) ((gain) / 100) -#define DBM_TO_MBM(gain) ((gain) * 100) -#define MBM_TO_DBM(gain) ((gain) / 100) - -#define REG_RULE(start, end, bw, gain, eirp, reg_flags) { \ - .freq_range.start_freq_khz = MHZ_TO_KHZ(start), \ - .freq_range.end_freq_khz = MHZ_TO_KHZ(end), \ - .freq_range.max_bandwidth_khz = MHZ_TO_KHZ(bw), \ - .power_rule.max_antenna_gain = DBI_TO_MBI(gain), \ - .power_rule.max_eirp = DBM_TO_MBM(eirp), \ - .flags = reg_flags, \ - } - struct mesh_config { /* Timeouts in ms */ /* Mesh plink management parameters */ @@ -853,7 +914,396 @@ struct cfg80211_ops { int (*leave_ibss)(struct wiphy *wiphy, struct net_device *dev); }; -/* temporary wext handlers */ +/* + * wireless hardware and networking interfaces structures + * and registration/helper functions + */ + +/** + * struct wiphy - wireless hardware description + * @idx: the wiphy index assigned to this item + * @class_dev: the class device representing /sys/class/ieee80211/ + * @custom_regulatory: tells us the driver for this device + * has its own custom regulatory domain and cannot identify the + * ISO / IEC 3166 alpha2 it belongs to. When this is enabled + * we will disregard the first regulatory hint (when the + * initiator is %REGDOM_SET_BY_CORE). + * @strict_regulatory: tells us the driver for this device will ignore + * regulatory domain settings until it gets its own regulatory domain + * via its regulatory_hint(). After its gets its own regulatory domain + * it will only allow further regulatory domain settings to further + * enhance compliance. For example if channel 13 and 14 are disabled + * by this regulatory domain no user regulatory domain can enable these + * channels at a later time. This can be used for devices which do not + * have calibration information gauranteed for frequencies or settings + * outside of its regulatory domain. + * @reg_notifier: the driver's regulatory notification callback + * @regd: the driver's regulatory domain, if one was requested via + * the regulatory_hint() API. This can be used by the driver + * on the reg_notifier() if it chooses to ignore future + * regulatory domain changes caused by other drivers. + * @signal_type: signal type reported in &struct cfg80211_bss. + * @cipher_suites: supported cipher suites + * @n_cipher_suites: number of supported cipher suites + */ +struct wiphy { + /* assign these fields before you register the wiphy */ + + /* permanent MAC address */ + u8 perm_addr[ETH_ALEN]; + + /* Supported interface modes, OR together BIT(NL80211_IFTYPE_...) */ + u16 interface_modes; + + bool custom_regulatory; + bool strict_regulatory; + + enum cfg80211_signal_type signal_type; + + int bss_priv_size; + u8 max_scan_ssids; + u16 max_scan_ie_len; + + int n_cipher_suites; + const u32 *cipher_suites; + + /* If multiple wiphys are registered and you're handed e.g. + * a regular netdev with assigned ieee80211_ptr, you won't + * know whether it points to a wiphy your driver has registered + * or not. Assign this to something global to your driver to + * help determine whether you own this wiphy or not. */ + void *privid; + + struct ieee80211_supported_band *bands[IEEE80211_NUM_BANDS]; + + /* Lets us get back the wiphy on the callback */ + int (*reg_notifier)(struct wiphy *wiphy, + struct regulatory_request *request); + + /* fields below are read-only, assigned by cfg80211 */ + + const struct ieee80211_regdomain *regd; + + /* the item in /sys/class/ieee80211/ points to this, + * you need use set_wiphy_dev() (see below) */ + struct device dev; + + /* dir in debugfs: ieee80211/ */ + struct dentry *debugfsdir; + + char priv[0] __attribute__((__aligned__(NETDEV_ALIGN))); +}; + +/** + * wiphy_priv - return priv from wiphy + * + * @wiphy: the wiphy whose priv pointer to return + */ +static inline void *wiphy_priv(struct wiphy *wiphy) +{ + BUG_ON(!wiphy); + return &wiphy->priv; +} + +/** + * set_wiphy_dev - set device pointer for wiphy + * + * @wiphy: The wiphy whose device to bind + * @dev: The device to parent it to + */ +static inline void set_wiphy_dev(struct wiphy *wiphy, struct device *dev) +{ + wiphy->dev.parent = dev; +} + +/** + * wiphy_dev - get wiphy dev pointer + * + * @wiphy: The wiphy whose device struct to look up + */ +static inline struct device *wiphy_dev(struct wiphy *wiphy) +{ + return wiphy->dev.parent; +} + +/** + * wiphy_name - get wiphy name + * + * @wiphy: The wiphy whose name to return + */ +static inline const char *wiphy_name(struct wiphy *wiphy) +{ + return dev_name(&wiphy->dev); +} + +/** + * wiphy_new - create a new wiphy for use with cfg80211 + * + * @ops: The configuration operations for this device + * @sizeof_priv: The size of the private area to allocate + * + * Create a new wiphy and associate the given operations with it. + * @sizeof_priv bytes are allocated for private use. + * + * The returned pointer must be assigned to each netdev's + * ieee80211_ptr for proper operation. + */ +struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv); + +/** + * wiphy_register - register a wiphy with cfg80211 + * + * @wiphy: The wiphy to register. + * + * Returns a non-negative wiphy index or a negative error code. + */ +extern int wiphy_register(struct wiphy *wiphy); + +/** + * wiphy_unregister - deregister a wiphy from cfg80211 + * + * @wiphy: The wiphy to unregister. + * + * After this call, no more requests can be made with this priv + * pointer, but the call may sleep to wait for an outstanding + * request that is being handled. + */ +extern void wiphy_unregister(struct wiphy *wiphy); + +/** + * wiphy_free - free wiphy + * + * @wiphy: The wiphy to free + */ +extern void wiphy_free(struct wiphy *wiphy); + +/** + * struct wireless_dev - wireless per-netdev state + * + * This structure must be allocated by the driver/stack + * that uses the ieee80211_ptr field in struct net_device + * (this is intentional so it can be allocated along with + * the netdev.) + * + * @wiphy: pointer to hardware description + * @iftype: interface type + * @list: (private) Used to collect the interfaces + * @netdev: (private) Used to reference back to the netdev + * @current_bss: (private) Used by the internal configuration code + * @bssid: (private) Used by the internal configuration code + * @ssid: (private) Used by the internal configuration code + * @ssid_len: (private) Used by the internal configuration code + * @wext: (private) Used by the internal wireless extensions compat code + * @wext_bssid: (private) Used by the internal wireless extensions compat code + */ +struct wireless_dev { + struct wiphy *wiphy; + enum nl80211_iftype iftype; + + /* private to the generic wireless code */ + struct list_head list; + struct net_device *netdev; + + /* currently used for IBSS - might be rearranged in the future */ + struct cfg80211_bss *current_bss; + u8 bssid[ETH_ALEN]; + u8 ssid[IEEE80211_MAX_SSID_LEN]; + u8 ssid_len; + +#ifdef CONFIG_WIRELESS_EXT + /* wext data */ + struct cfg80211_ibss_params wext; + u8 wext_bssid[ETH_ALEN]; +#endif +}; + +/** + * wdev_priv - return wiphy priv from wireless_dev + * + * @wdev: The wireless device whose wiphy's priv pointer to return + */ +static inline void *wdev_priv(struct wireless_dev *wdev) +{ + BUG_ON(!wdev); + return wiphy_priv(wdev->wiphy); +} + +/* + * Utility functions + */ + +/** + * ieee80211_channel_to_frequency - convert channel number to frequency + */ +extern int ieee80211_channel_to_frequency(int chan); + +/** + * ieee80211_frequency_to_channel - convert frequency to channel number + */ +extern int ieee80211_frequency_to_channel(int freq); + +/* + * Name indirection necessary because the ieee80211 code also has + * a function named "ieee80211_get_channel", so if you include + * cfg80211's header file you get cfg80211's version, if you try + * to include both header files you'll (rightfully!) get a symbol + * clash. + */ +extern struct ieee80211_channel *__ieee80211_get_channel(struct wiphy *wiphy, + int freq); +/** + * ieee80211_get_channel - get channel struct from wiphy for specified frequency + */ +static inline struct ieee80211_channel * +ieee80211_get_channel(struct wiphy *wiphy, int freq) +{ + return __ieee80211_get_channel(wiphy, freq); +} + +/** + * ieee80211_get_response_rate - get basic rate for a given rate + * + * @sband: the band to look for rates in + * @basic_rates: bitmap of basic rates + * @bitrate: the bitrate for which to find the basic rate + * + * This function returns the basic rate corresponding to a given + * bitrate, that is the next lower bitrate contained in the basic + * rate map, which is, for this function, given as a bitmap of + * indices of rates in the band's bitrate table. + */ +struct ieee80211_rate * +ieee80211_get_response_rate(struct ieee80211_supported_band *sband, + u32 basic_rates, int bitrate); + +/* + * Radiotap parsing functions -- for controlled injection support + * + * Implemented in net/wireless/radiotap.c + * Documentation in Documentation/networking/radiotap-headers.txt + */ + +/** + * struct ieee80211_radiotap_iterator - tracks walk thru present radiotap args + * @rtheader: pointer to the radiotap header we are walking through + * @max_length: length of radiotap header in cpu byte ordering + * @this_arg_index: IEEE80211_RADIOTAP_... index of current arg + * @this_arg: pointer to current radiotap arg + * @arg_index: internal next argument index + * @arg: internal next argument pointer + * @next_bitmap: internal pointer to next present u32 + * @bitmap_shifter: internal shifter for curr u32 bitmap, b0 set == arg present + */ + +struct ieee80211_radiotap_iterator { + struct ieee80211_radiotap_header *rtheader; + int max_length; + int this_arg_index; + u8 *this_arg; + + int arg_index; + u8 *arg; + __le32 *next_bitmap; + u32 bitmap_shifter; +}; + +extern int ieee80211_radiotap_iterator_init( + struct ieee80211_radiotap_iterator *iterator, + struct ieee80211_radiotap_header *radiotap_header, + int max_length); + +extern int ieee80211_radiotap_iterator_next( + struct ieee80211_radiotap_iterator *iterator); + +/* + * Regulatory helper functions for wiphys + */ + +/** + * regulatory_hint - driver hint to the wireless core a regulatory domain + * @wiphy: the wireless device giving the hint (used only for reporting + * conflicts) + * @alpha2: the ISO/IEC 3166 alpha2 the driver claims its regulatory domain + * should be in. If @rd is set this should be NULL. Note that if you + * set this to NULL you should still set rd->alpha2 to some accepted + * alpha2. + * + * Wireless drivers can use this function to hint to the wireless core + * what it believes should be the current regulatory domain by + * giving it an ISO/IEC 3166 alpha2 country code it knows its regulatory + * domain should be in or by providing a completely build regulatory domain. + * If the driver provides an ISO/IEC 3166 alpha2 userspace will be queried + * for a regulatory domain structure for the respective country. + * + * The wiphy must have been registered to cfg80211 prior to this call. + * For cfg80211 drivers this means you must first use wiphy_register(), + * for mac80211 drivers you must first use ieee80211_register_hw(). + * + * Drivers should check the return value, its possible you can get + * an -ENOMEM. + */ +extern int regulatory_hint(struct wiphy *wiphy, const char *alpha2); + +/** + * regulatory_hint_11d - hints a country IE as a regulatory domain + * @wiphy: the wireless device giving the hint (used only for reporting + * conflicts) + * @country_ie: pointer to the country IE + * @country_ie_len: length of the country IE + * + * We will intersect the rd with the what CRDA tells us should apply + * for the alpha2 this country IE belongs to, this prevents APs from + * sending us incorrect or outdated information against a country. + */ +extern void regulatory_hint_11d(struct wiphy *wiphy, + u8 *country_ie, + u8 country_ie_len); +/** + * wiphy_apply_custom_regulatory - apply a custom driver regulatory domain + * @wiphy: the wireless device we want to process the regulatory domain on + * @regd: the custom regulatory domain to use for this wiphy + * + * Drivers can sometimes have custom regulatory domains which do not apply + * to a specific country. Drivers can use this to apply such custom regulatory + * domains. This routine must be called prior to wiphy registration. The + * custom regulatory domain will be trusted completely and as such previous + * default channel settings will be disregarded. If no rule is found for a + * channel on the regulatory domain the channel will be disabled. + */ +extern void wiphy_apply_custom_regulatory( + struct wiphy *wiphy, + const struct ieee80211_regdomain *regd); + +/** + * freq_reg_info - get regulatory information for the given frequency + * @wiphy: the wiphy for which we want to process this rule for + * @center_freq: Frequency in KHz for which we want regulatory information for + * @bandwidth: the bandwidth requirement you have in KHz, if you do not have one + * you can set this to 0. If this frequency is allowed we then set + * this value to the maximum allowed bandwidth. + * @reg_rule: the regulatory rule which we have for this frequency + * + * Use this function to get the regulatory rule for a specific frequency on + * a given wireless device. If the device has a specific regulatory domain + * it wants to follow we respect that unless a country IE has been received + * and processed already. + * + * Returns 0 if it was able to find a valid regulatory rule which does + * apply to the given center_freq otherwise it returns non-zero. It will + * also return -ERANGE if we determine the given center_freq does not even have + * a regulatory rule for a frequency range in the center_freq's band. See + * freq_in_rule_band() for our current definition of a band -- this is purely + * subjective and right now its 802.11 specific. + */ +extern int freq_reg_info(struct wiphy *wiphy, u32 center_freq, u32 *bandwidth, + const struct ieee80211_reg_rule **reg_rule); + +/* + * Temporary wext handlers & helper functions + * + * In the future cfg80211 will simply assign the entire wext handler + * structure to netdevs it manages, but we're not there yet. + */ int cfg80211_wext_giwname(struct net_device *dev, struct iw_request_info *info, char *name, char *extra); @@ -892,10 +1342,14 @@ int cfg80211_ibss_wext_giwap(struct net_device *dev, struct iw_request_info *info, struct sockaddr *ap_addr, char *extra); -/* wext helper for now (to be removed) */ struct ieee80211_channel *cfg80211_wext_freq(struct wiphy *wiphy, struct iw_freq *freq); +/* + * callbacks for asynchronous cfg80211 methods, notification + * functions and BSS handling helpers + */ + /** * cfg80211_scan_done - notify that scan finished * @@ -949,6 +1403,7 @@ struct cfg80211_bss *cfg80211_get_mesh(struct wiphy *wiphy, const u8 *meshid, size_t meshidlen, const u8 *meshcfg); void cfg80211_put_bss(struct cfg80211_bss *bss); + /** * cfg80211_unlink_bss - unlink BSS from internal data structures * @wiphy: the wiphy diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 52808bdcc6c..d9686917252 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -19,7 +19,6 @@ #include #include #include -#include #include /** diff --git a/include/net/regulatory.h b/include/net/regulatory.h new file mode 100644 index 00000000000..47995b81c5d --- /dev/null +++ b/include/net/regulatory.h @@ -0,0 +1,101 @@ +#ifndef __NET_REGULATORY_H +#define __NET_REGULATORY_H +/* + * regulatory support structures + * + * Copyright 2008-2009 Luis R. Rodriguez + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + + +/** + * enum environment_cap - Environment parsed from country IE + * @ENVIRON_ANY: indicates country IE applies to both indoor and + * outdoor operation. + * @ENVIRON_INDOOR: indicates country IE applies only to indoor operation + * @ENVIRON_OUTDOOR: indicates country IE applies only to outdoor operation + */ +enum environment_cap { + ENVIRON_ANY, + ENVIRON_INDOOR, + ENVIRON_OUTDOOR, +}; + +/** + * struct regulatory_request - used to keep track of regulatory requests + * + * @wiphy_idx: this is set if this request's initiator is + * %REGDOM_SET_BY_COUNTRY_IE or %REGDOM_SET_BY_DRIVER. This + * can be used by the wireless core to deal with conflicts + * and potentially inform users of which devices specifically + * cased the conflicts. + * @initiator: indicates who sent this request, could be any of + * of those set in nl80211_reg_initiator (%NL80211_REGDOM_SET_BY_*) + * @alpha2: the ISO / IEC 3166 alpha2 country code of the requested + * regulatory domain. We have a few special codes: + * 00 - World regulatory domain + * 99 - built by driver but a specific alpha2 cannot be determined + * 98 - result of an intersection between two regulatory domains + * @intersect: indicates whether the wireless core should intersect + * the requested regulatory domain with the presently set regulatory + * domain. + * @country_ie_checksum: checksum of the last processed and accepted + * country IE + * @country_ie_env: lets us know if the AP is telling us we are outdoor, + * indoor, or if it doesn't matter + * @list: used to insert into the reg_requests_list linked list + */ +struct regulatory_request { + int wiphy_idx; + enum nl80211_reg_initiator initiator; + char alpha2[2]; + bool intersect; + u32 country_ie_checksum; + enum environment_cap country_ie_env; + struct list_head list; +}; + +struct ieee80211_freq_range { + u32 start_freq_khz; + u32 end_freq_khz; + u32 max_bandwidth_khz; +}; + +struct ieee80211_power_rule { + u32 max_antenna_gain; + u32 max_eirp; +}; + +struct ieee80211_reg_rule { + struct ieee80211_freq_range freq_range; + struct ieee80211_power_rule power_rule; + u32 flags; +}; + +struct ieee80211_regdomain { + u32 n_reg_rules; + char alpha2[2]; + struct ieee80211_reg_rule reg_rules[]; +}; + +#define MHZ_TO_KHZ(freq) ((freq) * 1000) +#define KHZ_TO_MHZ(freq) ((freq) / 1000) +#define DBI_TO_MBI(gain) ((gain) * 100) +#define MBI_TO_DBI(gain) ((gain) / 100) +#define DBM_TO_MBM(gain) ((gain) * 100) +#define MBM_TO_DBM(gain) ((gain) / 100) + +#define REG_RULE(start, end, bw, gain, eirp, reg_flags) \ +{ \ + .freq_range.start_freq_khz = MHZ_TO_KHZ(start), \ + .freq_range.end_freq_khz = MHZ_TO_KHZ(end), \ + .freq_range.max_bandwidth_khz = MHZ_TO_KHZ(bw), \ + .power_rule.max_antenna_gain = DBI_TO_MBI(gain),\ + .power_rule.max_eirp = DBM_TO_MBM(eirp), \ + .flags = reg_flags, \ +} + +#endif diff --git a/include/net/wireless.h b/include/net/wireless.h deleted file mode 100644 index abd27b03333..00000000000 --- a/include/net/wireless.h +++ /dev/null @@ -1,492 +0,0 @@ -#ifndef __NET_WIRELESS_H -#define __NET_WIRELESS_H - -/* - * 802.11 device management - * - * Copyright 2007 Johannes Berg - */ - -#include -#include -#include -#include -#include - -/** - * enum ieee80211_band - supported frequency bands - * - * The bands are assigned this way because the supported - * bitrates differ in these bands. - * - * @IEEE80211_BAND_2GHZ: 2.4GHz ISM band - * @IEEE80211_BAND_5GHZ: around 5GHz band (4.9-5.7) - */ -enum ieee80211_band { - IEEE80211_BAND_2GHZ, - IEEE80211_BAND_5GHZ, - - /* keep last */ - IEEE80211_NUM_BANDS -}; - -/** - * enum ieee80211_channel_flags - channel flags - * - * Channel flags set by the regulatory control code. - * - * @IEEE80211_CHAN_DISABLED: This channel is disabled. - * @IEEE80211_CHAN_PASSIVE_SCAN: Only passive scanning is permitted - * on this channel. - * @IEEE80211_CHAN_NO_IBSS: IBSS is not allowed on this channel. - * @IEEE80211_CHAN_RADAR: Radar detection is required on this channel. - * @IEEE80211_CHAN_NO_FAT_ABOVE: extension channel above this channel - * is not permitted. - * @IEEE80211_CHAN_NO_FAT_BELOW: extension channel below this channel - * is not permitted. - */ -enum ieee80211_channel_flags { - IEEE80211_CHAN_DISABLED = 1<<0, - IEEE80211_CHAN_PASSIVE_SCAN = 1<<1, - IEEE80211_CHAN_NO_IBSS = 1<<2, - IEEE80211_CHAN_RADAR = 1<<3, - IEEE80211_CHAN_NO_FAT_ABOVE = 1<<4, - IEEE80211_CHAN_NO_FAT_BELOW = 1<<5, -}; - -/** - * struct ieee80211_channel - channel definition - * - * This structure describes a single channel for use - * with cfg80211. - * - * @center_freq: center frequency in MHz - * @max_bandwidth: maximum allowed bandwidth for this channel, in MHz - * @hw_value: hardware-specific value for the channel - * @flags: channel flags from &enum ieee80211_channel_flags. - * @orig_flags: channel flags at registration time, used by regulatory - * code to support devices with additional restrictions - * @band: band this channel belongs to. - * @max_antenna_gain: maximum antenna gain in dBi - * @max_power: maximum transmission power (in dBm) - * @beacon_found: helper to regulatory code to indicate when a beacon - * has been found on this channel. Use regulatory_hint_found_beacon() - * to enable this, this is is useful only on 5 GHz band. - * @orig_mag: internal use - * @orig_mpwr: internal use - */ -struct ieee80211_channel { - enum ieee80211_band band; - u16 center_freq; - u8 max_bandwidth; - u16 hw_value; - u32 flags; - int max_antenna_gain; - int max_power; - bool beacon_found; - u32 orig_flags; - int orig_mag, orig_mpwr; -}; - -/** - * enum ieee80211_rate_flags - rate flags - * - * Hardware/specification flags for rates. These are structured - * in a way that allows using the same bitrate structure for - * different bands/PHY modes. - * - * @IEEE80211_RATE_SHORT_PREAMBLE: Hardware can send with short - * preamble on this bitrate; only relevant in 2.4GHz band and - * with CCK rates. - * @IEEE80211_RATE_MANDATORY_A: This bitrate is a mandatory rate - * when used with 802.11a (on the 5 GHz band); filled by the - * core code when registering the wiphy. - * @IEEE80211_RATE_MANDATORY_B: This bitrate is a mandatory rate - * when used with 802.11b (on the 2.4 GHz band); filled by the - * core code when registering the wiphy. - * @IEEE80211_RATE_MANDATORY_G: This bitrate is a mandatory rate - * when used with 802.11g (on the 2.4 GHz band); filled by the - * core code when registering the wiphy. - * @IEEE80211_RATE_ERP_G: This is an ERP rate in 802.11g mode. - */ -enum ieee80211_rate_flags { - IEEE80211_RATE_SHORT_PREAMBLE = 1<<0, - IEEE80211_RATE_MANDATORY_A = 1<<1, - IEEE80211_RATE_MANDATORY_B = 1<<2, - IEEE80211_RATE_MANDATORY_G = 1<<3, - IEEE80211_RATE_ERP_G = 1<<4, -}; - -/** - * struct ieee80211_rate - bitrate definition - * - * This structure describes a bitrate that an 802.11 PHY can - * operate with. The two values @hw_value and @hw_value_short - * are only for driver use when pointers to this structure are - * passed around. - * - * @flags: rate-specific flags - * @bitrate: bitrate in units of 100 Kbps - * @hw_value: driver/hardware value for this rate - * @hw_value_short: driver/hardware value for this rate when - * short preamble is used - */ -struct ieee80211_rate { - u32 flags; - u16 bitrate; - u16 hw_value, hw_value_short; -}; - -/** - * struct ieee80211_sta_ht_cap - STA's HT capabilities - * - * This structure describes most essential parameters needed - * to describe 802.11n HT capabilities for an STA. - * - * @ht_supported: is HT supported by the STA - * @cap: HT capabilities map as described in 802.11n spec - * @ampdu_factor: Maximum A-MPDU length factor - * @ampdu_density: Minimum A-MPDU spacing - * @mcs: Supported MCS rates - */ -struct ieee80211_sta_ht_cap { - u16 cap; /* use IEEE80211_HT_CAP_ */ - bool ht_supported; - u8 ampdu_factor; - u8 ampdu_density; - struct ieee80211_mcs_info mcs; -}; - -/** - * struct ieee80211_supported_band - frequency band definition - * - * This structure describes a frequency band a wiphy - * is able to operate in. - * - * @channels: Array of channels the hardware can operate in - * in this band. - * @band: the band this structure represents - * @n_channels: Number of channels in @channels - * @bitrates: Array of bitrates the hardware can operate with - * in this band. Must be sorted to give a valid "supported - * rates" IE, i.e. CCK rates first, then OFDM. - * @n_bitrates: Number of bitrates in @bitrates - */ -struct ieee80211_supported_band { - struct ieee80211_channel *channels; - struct ieee80211_rate *bitrates; - enum ieee80211_band band; - int n_channels; - int n_bitrates; - struct ieee80211_sta_ht_cap ht_cap; -}; - -/** - * struct wiphy - wireless hardware description - * @idx: the wiphy index assigned to this item - * @class_dev: the class device representing /sys/class/ieee80211/ - * @custom_regulatory: tells us the driver for this device - * has its own custom regulatory domain and cannot identify the - * ISO / IEC 3166 alpha2 it belongs to. When this is enabled - * we will disregard the first regulatory hint (when the - * initiator is %REGDOM_SET_BY_CORE). - * @strict_regulatory: tells us the driver for this device will ignore - * regulatory domain settings until it gets its own regulatory domain - * via its regulatory_hint(). After its gets its own regulatory domain - * it will only allow further regulatory domain settings to further - * enhance compliance. For example if channel 13 and 14 are disabled - * by this regulatory domain no user regulatory domain can enable these - * channels at a later time. This can be used for devices which do not - * have calibration information gauranteed for frequencies or settings - * outside of its regulatory domain. - * @reg_notifier: the driver's regulatory notification callback - * @regd: the driver's regulatory domain, if one was requested via - * the regulatory_hint() API. This can be used by the driver - * on the reg_notifier() if it chooses to ignore future - * regulatory domain changes caused by other drivers. - * @signal_type: signal type reported in &struct cfg80211_bss. - * @cipher_suites: supported cipher suites - * @n_cipher_suites: number of supported cipher suites - */ -struct wiphy { - /* assign these fields before you register the wiphy */ - - /* permanent MAC address */ - u8 perm_addr[ETH_ALEN]; - - /* Supported interface modes, OR together BIT(NL80211_IFTYPE_...) */ - u16 interface_modes; - - bool custom_regulatory; - bool strict_regulatory; - - enum cfg80211_signal_type signal_type; - - int bss_priv_size; - u8 max_scan_ssids; - u16 max_scan_ie_len; - - int n_cipher_suites; - const u32 *cipher_suites; - - /* If multiple wiphys are registered and you're handed e.g. - * a regular netdev with assigned ieee80211_ptr, you won't - * know whether it points to a wiphy your driver has registered - * or not. Assign this to something global to your driver to - * help determine whether you own this wiphy or not. */ - void *privid; - - struct ieee80211_supported_band *bands[IEEE80211_NUM_BANDS]; - - /* Lets us get back the wiphy on the callback */ - int (*reg_notifier)(struct wiphy *wiphy, - struct regulatory_request *request); - - /* fields below are read-only, assigned by cfg80211 */ - - const struct ieee80211_regdomain *regd; - - /* the item in /sys/class/ieee80211/ points to this, - * you need use set_wiphy_dev() (see below) */ - struct device dev; - - /* dir in debugfs: ieee80211/ */ - struct dentry *debugfsdir; - - char priv[0] __attribute__((__aligned__(NETDEV_ALIGN))); -}; - -/** struct wireless_dev - wireless per-netdev state - * - * This structure must be allocated by the driver/stack - * that uses the ieee80211_ptr field in struct net_device - * (this is intentional so it can be allocated along with - * the netdev.) - * - * @wiphy: pointer to hardware description - * @iftype: interface type - * @list: (private) - * @netdev (private) - */ -struct wireless_dev { - struct wiphy *wiphy; - enum nl80211_iftype iftype; - - /* private to the generic wireless code */ - struct list_head list; - struct net_device *netdev; - - /* currently used for IBSS - might be rearranged in the future */ - struct cfg80211_bss *current_bss; - u8 bssid[ETH_ALEN]; - u8 ssid[IEEE80211_MAX_SSID_LEN]; - u8 ssid_len; - -#ifdef CONFIG_WIRELESS_EXT - /* wext data */ - struct cfg80211_ibss_params wext; - u8 wext_bssid[ETH_ALEN]; -#endif -}; - -/** - * wiphy_priv - return priv from wiphy - */ -static inline void *wiphy_priv(struct wiphy *wiphy) -{ - BUG_ON(!wiphy); - return &wiphy->priv; -} - -/** - * set_wiphy_dev - set device pointer for wiphy - */ -static inline void set_wiphy_dev(struct wiphy *wiphy, struct device *dev) -{ - wiphy->dev.parent = dev; -} - -/** - * wiphy_dev - get wiphy dev pointer - */ -static inline struct device *wiphy_dev(struct wiphy *wiphy) -{ - return wiphy->dev.parent; -} - -/** - * wiphy_name - get wiphy name - */ -static inline const char *wiphy_name(struct wiphy *wiphy) -{ - return dev_name(&wiphy->dev); -} - -/** - * wdev_priv - return wiphy priv from wireless_dev - */ -static inline void *wdev_priv(struct wireless_dev *wdev) -{ - BUG_ON(!wdev); - return wiphy_priv(wdev->wiphy); -} - -/** - * wiphy_new - create a new wiphy for use with cfg80211 - * - * create a new wiphy and associate the given operations with it. - * @sizeof_priv bytes are allocated for private use. - * - * the returned pointer must be assigned to each netdev's - * ieee80211_ptr for proper operation. - */ -struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv); - -/** - * wiphy_register - register a wiphy with cfg80211 - * - * register the given wiphy - * - * Returns a non-negative wiphy index or a negative error code. - */ -extern int wiphy_register(struct wiphy *wiphy); - -/** - * wiphy_unregister - deregister a wiphy from cfg80211 - * - * unregister a device with the given priv pointer. - * After this call, no more requests can be made with this priv - * pointer, but the call may sleep to wait for an outstanding - * request that is being handled. - */ -extern void wiphy_unregister(struct wiphy *wiphy); - -/** - * wiphy_free - free wiphy - */ -extern void wiphy_free(struct wiphy *wiphy); - -/** - * ieee80211_channel_to_frequency - convert channel number to frequency - */ -extern int ieee80211_channel_to_frequency(int chan); - -/** - * ieee80211_frequency_to_channel - convert frequency to channel number - */ -extern int ieee80211_frequency_to_channel(int freq); - -/* - * Name indirection necessary because the ieee80211 code also has - * a function named "ieee80211_get_channel", so if you include - * cfg80211's header file you get cfg80211's version, if you try - * to include both header files you'll (rightfully!) get a symbol - * clash. - */ -extern struct ieee80211_channel *__ieee80211_get_channel(struct wiphy *wiphy, - int freq); -/** - * ieee80211_get_channel - get channel struct from wiphy for specified frequency - */ -static inline struct ieee80211_channel * -ieee80211_get_channel(struct wiphy *wiphy, int freq) -{ - return __ieee80211_get_channel(wiphy, freq); -} - -/** - * ieee80211_get_response_rate - get basic rate for a given rate - * - * @sband: the band to look for rates in - * @basic_rates: bitmap of basic rates - * @bitrate: the bitrate for which to find the basic rate - * - * This function returns the basic rate corresponding to a given - * bitrate, that is the next lower bitrate contained in the basic - * rate map, which is, for this function, given as a bitmap of - * indices of rates in the band's bitrate table. - */ -struct ieee80211_rate * -ieee80211_get_response_rate(struct ieee80211_supported_band *sband, - u32 basic_rates, int bitrate); - -/** - * regulatory_hint - driver hint to the wireless core a regulatory domain - * @wiphy: the wireless device giving the hint (used only for reporting - * conflicts) - * @alpha2: the ISO/IEC 3166 alpha2 the driver claims its regulatory domain - * should be in. If @rd is set this should be NULL. Note that if you - * set this to NULL you should still set rd->alpha2 to some accepted - * alpha2. - * - * Wireless drivers can use this function to hint to the wireless core - * what it believes should be the current regulatory domain by - * giving it an ISO/IEC 3166 alpha2 country code it knows its regulatory - * domain should be in or by providing a completely build regulatory domain. - * If the driver provides an ISO/IEC 3166 alpha2 userspace will be queried - * for a regulatory domain structure for the respective country. - * - * The wiphy must have been registered to cfg80211 prior to this call. - * For cfg80211 drivers this means you must first use wiphy_register(), - * for mac80211 drivers you must first use ieee80211_register_hw(). - * - * Drivers should check the return value, its possible you can get - * an -ENOMEM. - */ -extern int regulatory_hint(struct wiphy *wiphy, const char *alpha2); - -/** - * regulatory_hint_11d - hints a country IE as a regulatory domain - * @wiphy: the wireless device giving the hint (used only for reporting - * conflicts) - * @country_ie: pointer to the country IE - * @country_ie_len: length of the country IE - * - * We will intersect the rd with the what CRDA tells us should apply - * for the alpha2 this country IE belongs to, this prevents APs from - * sending us incorrect or outdated information against a country. - */ -extern void regulatory_hint_11d(struct wiphy *wiphy, - u8 *country_ie, - u8 country_ie_len); -/** - * wiphy_apply_custom_regulatory - apply a custom driver regulatory domain - * @wiphy: the wireless device we want to process the regulatory domain on - * @regd: the custom regulatory domain to use for this wiphy - * - * Drivers can sometimes have custom regulatory domains which do not apply - * to a specific country. Drivers can use this to apply such custom regulatory - * domains. This routine must be called prior to wiphy registration. The - * custom regulatory domain will be trusted completely and as such previous - * default channel settings will be disregarded. If no rule is found for a - * channel on the regulatory domain the channel will be disabled. - */ -extern void wiphy_apply_custom_regulatory( - struct wiphy *wiphy, - const struct ieee80211_regdomain *regd); - -/** - * freq_reg_info - get regulatory information for the given frequency - * @wiphy: the wiphy for which we want to process this rule for - * @center_freq: Frequency in KHz for which we want regulatory information for - * @bandwidth: the bandwidth requirement you have in KHz, if you do not have one - * you can set this to 0. If this frequency is allowed we then set - * this value to the maximum allowed bandwidth. - * @reg_rule: the regulatory rule which we have for this frequency - * - * Use this function to get the regulatory rule for a specific frequency on - * a given wireless device. If the device has a specific regulatory domain - * it wants to follow we respect that unless a country IE has been received - * and processed already. - * - * Returns 0 if it was able to find a valid regulatory rule which does - * apply to the given center_freq otherwise it returns non-zero. It will - * also return -ERANGE if we determine the given center_freq does not even have - * a regulatory rule for a frequency range in the center_freq's band. See - * freq_in_rule_band() for our current definition of a band -- this is purely - * subjective and right now its 802.11 specific. - */ -extern int freq_reg_info(struct wiphy *wiphy, u32 center_freq, u32 *bandwidth, - const struct ieee80211_reg_rule **reg_rule); - -#endif /* __NET_WIRELESS_H */ diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 73bd427750e..0891bfb0699 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -14,7 +14,6 @@ */ #include -#include #include #include "ieee80211_i.h" #include "rate.h" diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index e770c1e8e08..92a573bf103 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include "key.h" diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index 5f7a2624ed7..48bf78e7fa7 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -15,7 +15,7 @@ */ #include -#include +#include #include #include "ieee80211_i.h" #include "sta_info.h" diff --git a/net/wireless/core.c b/net/wireless/core.c index de1ac51ae4e..827a5626355 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -14,7 +14,6 @@ #include #include #include -#include #include "nl80211.h" #include "core.h" #include "sysfs.h" diff --git a/net/wireless/core.h b/net/wireless/core.h index 2ef3595fd6e..89a8159ef0b 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -11,7 +11,6 @@ #include #include #include -#include #include #include "reg.h" diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 2bf42fdef3a..4e1fcb0c9e4 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -7,7 +7,6 @@ #include #include #include -#include #include "nl80211.h" diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 574e217bcc8..f38cc39fa79 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include "core.h" #include "reg.h" diff --git a/net/wireless/util.c b/net/wireless/util.c index 487cdd9bcff..5f7e997195c 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1,10 +1,10 @@ /* * Wireless utility functions * - * Copyright 2007 Johannes Berg + * Copyright 2007-2009 Johannes Berg */ -#include -#include +#include +#include #include "core.h" struct ieee80211_rate * diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 57eaea26b67..4e054ea9c0a 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include "core.h" -- cgit v1.2.3-70-g09d2 From b9a5f8cab751d362f7c2d94899ca788c22fcd1ef Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Mon, 20 Apr 2009 18:39:05 +0200 Subject: nl80211: Add set/get for frag/rts threshold and retry limits Add new nl80211 attributes that can be used with NL80211_CMD_SET_WIPHY and NL80211_CMD_GET_WIPHY to manage fragmentation/RTS threshold and retry limits. Since these values are stored in struct wiphy, remove the local copy from mac80211 where feasible (frag & rts threshold). The retry limits are currently needed in struct ieee80211_conf, but these could be eventually removed since the driver should have access to the values in struct wiphy. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 24 ++++++- include/net/cfg80211.h | 50 +++++++++++++++ net/mac80211/cfg.c | 27 ++++++++ net/mac80211/debugfs.c | 8 +-- net/mac80211/ieee80211_i.h | 3 - net/mac80211/main.c | 6 +- net/mac80211/tx.c | 9 ++- net/mac80211/util.c | 2 +- net/mac80211/wext.c | 138 ++--------------------------------------- net/wireless/core.c | 10 +++ net/wireless/nl80211.c | 95 ++++++++++++++++++++++++++++ net/wireless/wext-compat.c | 151 +++++++++++++++++++++++++++++++++++++++++++++ 12 files changed, 372 insertions(+), 151 deletions(-) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 25ce3e42bd1..dc9d9ec5d1a 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -46,8 +46,10 @@ * to get a list of all present wiphys. * @NL80211_CMD_SET_WIPHY: set wiphy parameters, needs %NL80211_ATTR_WIPHY or * %NL80211_ATTR_IFINDEX; can be used to set %NL80211_ATTR_WIPHY_NAME, - * %NL80211_ATTR_WIPHY_TXQ_PARAMS, %NL80211_ATTR_WIPHY_FREQ, and/or - * %NL80211_ATTR_WIPHY_CHANNEL_TYPE. + * %NL80211_ATTR_WIPHY_TXQ_PARAMS, %NL80211_ATTR_WIPHY_FREQ, + * %NL80211_ATTR_WIPHY_CHANNEL_TYPE, %NL80211_ATTR_WIPHY_RETRY_SHORT, + * %NL80211_ATTR_WIPHY_RETRY_LONG, %NL80211_ATTR_WIPHY_FRAG_THRESHOLD, + * and/or %NL80211_ATTR_WIPHY_RTS_THRESHOLD. * @NL80211_CMD_NEW_WIPHY: Newly created wiphy, response to get request * or rename notification. Has attributes %NL80211_ATTR_WIPHY and * %NL80211_ATTR_WIPHY_NAME. @@ -337,6 +339,18 @@ enum nl80211_commands { * NL80211_CHAN_HT20 = HT20 only * NL80211_CHAN_HT40MINUS = secondary channel is below the primary channel * NL80211_CHAN_HT40PLUS = secondary channel is above the primary channel + * @NL80211_ATTR_WIPHY_RETRY_SHORT: TX retry limit for frames whose length is + * less than or equal to the RTS threshold; allowed range: 1..255; + * dot11ShortRetryLimit; u8 + * @NL80211_ATTR_WIPHY_RETRY_LONG: TX retry limit for frames whose length is + * greater than the RTS threshold; allowed range: 1..255; + * dot11ShortLongLimit; u8 + * @NL80211_ATTR_WIPHY_FRAG_THRESHOLD: fragmentation threshold, i.e., maximum + * length in octets for frames; allowed range: 256..8000, disable + * fragmentation with (u32)-1; dot11FragmentationThreshold; u32 + * @NL80211_ATTR_WIPHY_RTS_THRESHOLD: RTS threshold (TX frames with length + * larger than or equal to this use RTS/CTS handshake); allowed range: + * 0..65536, disable with (u32)-1; dot11RTSThreshold; u32 * * @NL80211_ATTR_IFINDEX: network interface index of the device to operate on * @NL80211_ATTR_IFNAME: network interface name @@ -565,6 +579,12 @@ enum nl80211_attrs { NL80211_ATTR_FREQ_FIXED, + + NL80211_ATTR_WIPHY_RETRY_SHORT, + NL80211_ATTR_WIPHY_RETRY_LONG, + NL80211_ATTR_WIPHY_FRAG_THRESHOLD, + NL80211_ATTR_WIPHY_RTS_THRESHOLD, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 601eac64b02..54bc69c8369 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -743,6 +743,20 @@ struct cfg80211_ibss_params { bool channel_fixed; }; +/** + * enum wiphy_params_flags - set_wiphy_params bitfield values + * WIPHY_PARAM_RETRY_SHORT: wiphy->retry_short has changed + * WIPHY_PARAM_RETRY_LONG: wiphy->retry_long has changed + * WIPHY_PARAM_FRAG_THRESHOLD: wiphy->frag_threshold has changed + * WIPHY_PARAM_RTS_THRESHOLD: wiphy->rts_threshold has changed + */ +enum wiphy_params_flags { + WIPHY_PARAM_RETRY_SHORT = 1 << 0, + WIPHY_PARAM_RETRY_LONG = 1 << 1, + WIPHY_PARAM_FRAG_THRESHOLD = 1 << 2, + WIPHY_PARAM_RTS_THRESHOLD = 1 << 3, +}; + /** * struct cfg80211_ops - backend description for wireless configuration * @@ -823,6 +837,11 @@ struct cfg80211_ibss_params { * cfg80211_ibss_joined(), also call that function when changing BSSID due * to a merge. * @leave_ibss: Leave the IBSS. + * + * @set_wiphy_params: Notify that wiphy parameters have changed; + * @changed bitfield (see &enum wiphy_params_flags) describes which values + * have changed. The actual parameter values are available in + * struct wiphy. If returning an error, no value should be changed. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy); @@ -912,6 +931,8 @@ struct cfg80211_ops { int (*join_ibss)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ibss_params *params); int (*leave_ibss)(struct wiphy *wiphy, struct net_device *dev); + + int (*set_wiphy_params)(struct wiphy *wiphy, u32 changed); }; /* @@ -945,6 +966,11 @@ struct cfg80211_ops { * @signal_type: signal type reported in &struct cfg80211_bss. * @cipher_suites: supported cipher suites * @n_cipher_suites: number of supported cipher suites + * @retry_short: Retry limit for short frames (dot11ShortRetryLimit) + * @retry_long: Retry limit for long frames (dot11LongRetryLimit) + * @frag_threshold: Fragmentation threshold (dot11FragmentationThreshold); + * -1 = fragmentation disabled, only odd values >= 256 used + * @rts_threshold: RTS threshold (dot11RTSThreshold); -1 = RTS/CTS disabled */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -967,6 +993,11 @@ struct wiphy { int n_cipher_suites; const u32 *cipher_suites; + u8 retry_short; + u8 retry_long; + u32 frag_threshold; + u32 rts_threshold; + /* If multiple wiphys are registered and you're handed e.g. * a regular netdev with assigned ieee80211_ptr, you won't * know whether it points to a wiphy your driver has registered @@ -1345,6 +1376,25 @@ int cfg80211_ibss_wext_giwap(struct net_device *dev, struct ieee80211_channel *cfg80211_wext_freq(struct wiphy *wiphy, struct iw_freq *freq); +int cfg80211_wext_siwrts(struct net_device *dev, + struct iw_request_info *info, + struct iw_param *rts, char *extra); +int cfg80211_wext_giwrts(struct net_device *dev, + struct iw_request_info *info, + struct iw_param *rts, char *extra); +int cfg80211_wext_siwfrag(struct net_device *dev, + struct iw_request_info *info, + struct iw_param *frag, char *extra); +int cfg80211_wext_giwfrag(struct net_device *dev, + struct iw_request_info *info, + struct iw_param *frag, char *extra); +int cfg80211_wext_siwretry(struct net_device *dev, + struct iw_request_info *info, + struct iw_param *retry, char *extra); +int cfg80211_wext_giwretry(struct net_device *dev, + struct iw_request_info *info, + struct iw_param *retry, char *extra); + /* * callbacks for asynchronous cfg80211 methods, notification * functions and BSS handling helpers diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 14013dc6447..5e1c230744b 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1298,6 +1298,32 @@ static int ieee80211_leave_ibss(struct wiphy *wiphy, struct net_device *dev) return ieee80211_ibss_leave(sdata); } +static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + + if (changed & WIPHY_PARAM_RTS_THRESHOLD) { + int err; + + if (local->ops->set_rts_threshold) { + err = local->ops->set_rts_threshold( + local_to_hw(local), wiphy->rts_threshold); + if (err) + return err; + } + } + + if (changed & WIPHY_PARAM_RETRY_SHORT) + local->hw.conf.short_frame_max_tx_count = wiphy->retry_short; + if (changed & WIPHY_PARAM_RETRY_LONG) + local->hw.conf.long_frame_max_tx_count = wiphy->retry_long; + if (changed & + (WIPHY_PARAM_RETRY_SHORT | WIPHY_PARAM_RETRY_LONG)) + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_RETRY_LIMITS); + + return 0; +} + struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -1336,4 +1362,5 @@ struct cfg80211_ops mac80211_config_ops = { .disassoc = ieee80211_disassoc, .join_ibss = ieee80211_join_ibss, .leave_ibss = ieee80211_leave_ibss, + .set_wiphy_params = ieee80211_set_wiphy_params, }; diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 210b9b6fecd..5001328be46 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -52,13 +52,13 @@ static const struct file_operations name## _ops = { \ DEBUGFS_READONLY_FILE(frequency, 20, "%d", local->hw.conf.channel->center_freq); DEBUGFS_READONLY_FILE(rts_threshold, 20, "%d", - local->rts_threshold); + local->hw.wiphy->rts_threshold); DEBUGFS_READONLY_FILE(fragmentation_threshold, 20, "%d", - local->fragmentation_threshold); + local->hw.wiphy->frag_threshold); DEBUGFS_READONLY_FILE(short_retry_limit, 20, "%d", - local->hw.conf.short_frame_max_tx_count); + local->hw.wiphy->retry_short); DEBUGFS_READONLY_FILE(long_retry_limit, 20, "%d", - local->hw.conf.long_frame_max_tx_count); + local->hw.wiphy->retry_long); DEBUGFS_READONLY_FILE(total_ps_buffered, 20, "%d", local->total_ps_buffered); DEBUGFS_READONLY_FILE(wep_iv, 20, "%#08x", diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 92a573bf103..dba78d89a10 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -643,9 +643,6 @@ struct ieee80211_local { struct rate_control_ref *rate_ctrl; - int rts_threshold; - int fragmentation_threshold; - struct crypto_blkcipher *wep_tx_tfm; struct crypto_blkcipher *wep_rx_tfm; u32 wep_iv; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index d26fc399285..5320e08434a 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -776,10 +776,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, /* set up some defaults */ local->hw.queues = 1; local->hw.max_rates = 1; - local->rts_threshold = IEEE80211_MAX_RTS_THRESHOLD; - local->fragmentation_threshold = IEEE80211_MAX_FRAG_THRESHOLD; - local->hw.conf.long_frame_max_tx_count = 4; - local->hw.conf.short_frame_max_tx_count = 7; + local->hw.conf.long_frame_max_tx_count = wiphy->retry_long; + local->hw.conf.short_frame_max_tx_count = wiphy->retry_short; local->hw.conf.radio_enabled = true; INIT_LIST_HEAD(&local->interfaces); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index c53d77db3e4..9ab49826c15 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -516,7 +516,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) sband = tx->local->hw.wiphy->bands[tx->channel->band]; len = min_t(int, tx->skb->len + FCS_LEN, - tx->local->fragmentation_threshold); + tx->local->hw.wiphy->frag_threshold); /* set up the tx rate control struct we give the RC algo */ txrc.hw = local_to_hw(tx->local); @@ -527,8 +527,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) txrc.max_rate_idx = tx->sdata->max_ratectrl_rateidx; /* set up RTS protection if desired */ - if (tx->local->rts_threshold < IEEE80211_MAX_RTS_THRESHOLD && - len > tx->local->rts_threshold) { + if (len > tx->local->hw.wiphy->rts_threshold) { txrc.rts = rts = true; } @@ -770,7 +769,7 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) struct sk_buff *skb = tx->skb; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_hdr *hdr = (void *)skb->data; - int frag_threshold = tx->local->fragmentation_threshold; + int frag_threshold = tx->local->hw.wiphy->frag_threshold; int hdrlen; int fragnum; @@ -1088,7 +1087,7 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx, if (tx->flags & IEEE80211_TX_FRAGMENTED) { if ((tx->flags & IEEE80211_TX_UNICAST) && - skb->len + FCS_LEN > local->fragmentation_threshold && + skb->len + FCS_LEN > local->hw.wiphy->frag_threshold && !(info->flags & IEEE80211_TX_CTL_AMPDU)) tx->flags |= IEEE80211_TX_FRAGMENTED; else diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 3dd490fa4b6..11244212f41 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1044,7 +1044,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) /* setup RTS threshold */ if (local->ops->set_rts_threshold) - local->ops->set_rts_threshold(hw, local->rts_threshold); + local->ops->set_rts_threshold(hw, hw->wiphy->rts_threshold); /* reconfigure hardware */ ieee80211_hw_config(local, ~0); diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index eb63fc14801..1eb6d8642a7 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -472,132 +472,6 @@ static int ieee80211_ioctl_giwtxpower(struct net_device *dev, return 0; } -static int ieee80211_ioctl_siwrts(struct net_device *dev, - struct iw_request_info *info, - struct iw_param *rts, char *extra) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - - if (rts->disabled) - local->rts_threshold = IEEE80211_MAX_RTS_THRESHOLD; - else if (!rts->fixed) - /* if the rts value is not fixed, then take default */ - local->rts_threshold = IEEE80211_MAX_RTS_THRESHOLD; - else if (rts->value < 0 || rts->value > IEEE80211_MAX_RTS_THRESHOLD) - return -EINVAL; - else - local->rts_threshold = rts->value; - - /* If the wlan card performs RTS/CTS in hardware/firmware, - * configure it here */ - - if (local->ops->set_rts_threshold) - local->ops->set_rts_threshold(local_to_hw(local), - local->rts_threshold); - - return 0; -} - -static int ieee80211_ioctl_giwrts(struct net_device *dev, - struct iw_request_info *info, - struct iw_param *rts, char *extra) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - - rts->value = local->rts_threshold; - rts->disabled = (rts->value >= IEEE80211_MAX_RTS_THRESHOLD); - rts->fixed = 1; - - return 0; -} - - -static int ieee80211_ioctl_siwfrag(struct net_device *dev, - struct iw_request_info *info, - struct iw_param *frag, char *extra) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - - if (frag->disabled) - local->fragmentation_threshold = IEEE80211_MAX_FRAG_THRESHOLD; - else if (!frag->fixed) - local->fragmentation_threshold = IEEE80211_MAX_FRAG_THRESHOLD; - else if (frag->value < 256 || - frag->value > IEEE80211_MAX_FRAG_THRESHOLD) - return -EINVAL; - else { - /* Fragment length must be even, so strip LSB. */ - local->fragmentation_threshold = frag->value & ~0x1; - } - - return 0; -} - -static int ieee80211_ioctl_giwfrag(struct net_device *dev, - struct iw_request_info *info, - struct iw_param *frag, char *extra) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - - frag->value = local->fragmentation_threshold; - frag->disabled = (frag->value >= IEEE80211_MAX_FRAG_THRESHOLD); - frag->fixed = 1; - - return 0; -} - - -static int ieee80211_ioctl_siwretry(struct net_device *dev, - struct iw_request_info *info, - struct iw_param *retry, char *extra) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - - if (retry->disabled || - (retry->flags & IW_RETRY_TYPE) != IW_RETRY_LIMIT) - return -EINVAL; - - if (retry->flags & IW_RETRY_MAX) { - local->hw.conf.long_frame_max_tx_count = retry->value; - } else if (retry->flags & IW_RETRY_MIN) { - local->hw.conf.short_frame_max_tx_count = retry->value; - } else { - local->hw.conf.long_frame_max_tx_count = retry->value; - local->hw.conf.short_frame_max_tx_count = retry->value; - } - - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_RETRY_LIMITS); - - return 0; -} - - -static int ieee80211_ioctl_giwretry(struct net_device *dev, - struct iw_request_info *info, - struct iw_param *retry, char *extra) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - - retry->disabled = 0; - if (retry->flags == 0 || retry->flags & IW_RETRY_MIN) { - /* first return min value, iwconfig will ask max value - * later if needed */ - retry->flags |= IW_RETRY_LIMIT; - retry->value = local->hw.conf.short_frame_max_tx_count; - if (local->hw.conf.long_frame_max_tx_count != - local->hw.conf.short_frame_max_tx_count) - retry->flags |= IW_RETRY_MIN; - return 0; - } - if (retry->flags & IW_RETRY_MAX) { - retry->flags = IW_RETRY_LIMIT | IW_RETRY_MAX; - retry->value = local->hw.conf.long_frame_max_tx_count; - } - - return 0; -} - - static int ieee80211_ioctl_siwencode(struct net_device *dev, struct iw_request_info *info, struct iw_point *erq, char *keybuf) @@ -1050,14 +924,14 @@ static const iw_handler ieee80211_handler[] = (iw_handler) NULL, /* -- hole -- */ (iw_handler) ieee80211_ioctl_siwrate, /* SIOCSIWRATE */ (iw_handler) ieee80211_ioctl_giwrate, /* SIOCGIWRATE */ - (iw_handler) ieee80211_ioctl_siwrts, /* SIOCSIWRTS */ - (iw_handler) ieee80211_ioctl_giwrts, /* SIOCGIWRTS */ - (iw_handler) ieee80211_ioctl_siwfrag, /* SIOCSIWFRAG */ - (iw_handler) ieee80211_ioctl_giwfrag, /* SIOCGIWFRAG */ + (iw_handler) cfg80211_wext_siwrts, /* SIOCSIWRTS */ + (iw_handler) cfg80211_wext_giwrts, /* SIOCGIWRTS */ + (iw_handler) cfg80211_wext_siwfrag, /* SIOCSIWFRAG */ + (iw_handler) cfg80211_wext_giwfrag, /* SIOCGIWFRAG */ (iw_handler) ieee80211_ioctl_siwtxpower, /* SIOCSIWTXPOW */ (iw_handler) ieee80211_ioctl_giwtxpower, /* SIOCGIWTXPOW */ - (iw_handler) ieee80211_ioctl_siwretry, /* SIOCSIWRETRY */ - (iw_handler) ieee80211_ioctl_giwretry, /* SIOCGIWRETRY */ + (iw_handler) cfg80211_wext_siwretry, /* SIOCSIWRETRY */ + (iw_handler) cfg80211_wext_giwretry, /* SIOCGIWRETRY */ (iw_handler) ieee80211_ioctl_siwencode, /* SIOCSIWENCODE */ (iw_handler) ieee80211_ioctl_giwencode, /* SIOCGIWENCODE */ (iw_handler) ieee80211_ioctl_siwpower, /* SIOCSIWPOWER */ diff --git a/net/wireless/core.c b/net/wireless/core.c index 827a5626355..f256b4f7e83 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -273,6 +273,16 @@ struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv) drv->wiphy.dev.class = &ieee80211_class; drv->wiphy.dev.platform_data = drv; + /* + * Initialize wiphy parameters to IEEE 802.11 MIB default values. + * Fragmentation and RTS threshold are disabled by default with the + * special -1 value. + */ + drv->wiphy.retry_short = 7; + drv->wiphy.retry_long = 4; + drv->wiphy.frag_threshold = (u32) -1; + drv->wiphy.rts_threshold = (u32) -1; + return &drv->wiphy; } EXPORT_SYMBOL(wiphy_new); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 16f86356ac9..5a9a5c6c71d 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -61,6 +61,10 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { [NL80211_ATTR_WIPHY_TXQ_PARAMS] = { .type = NLA_NESTED }, [NL80211_ATTR_WIPHY_FREQ] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_CHANNEL_TYPE] = { .type = NLA_U32 }, + [NL80211_ATTR_WIPHY_RETRY_SHORT] = { .type = NLA_U8 }, + [NL80211_ATTR_WIPHY_RETRY_LONG] = { .type = NLA_U8 }, + [NL80211_ATTR_WIPHY_FRAG_THRESHOLD] = { .type = NLA_U32 }, + [NL80211_ATTR_WIPHY_RTS_THRESHOLD] = { .type = NLA_U32 }, [NL80211_ATTR_IFTYPE] = { .type = NLA_U32 }, [NL80211_ATTR_IFINDEX] = { .type = NLA_U32 }, @@ -204,6 +208,16 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, dev->wiphy_idx); NLA_PUT_STRING(msg, NL80211_ATTR_WIPHY_NAME, wiphy_name(&dev->wiphy)); + + NLA_PUT_U8(msg, NL80211_ATTR_WIPHY_RETRY_SHORT, + dev->wiphy.retry_short); + NLA_PUT_U8(msg, NL80211_ATTR_WIPHY_RETRY_LONG, + dev->wiphy.retry_long); + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_FRAG_THRESHOLD, + dev->wiphy.frag_threshold); + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_RTS_THRESHOLD, + dev->wiphy.rts_threshold); + NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS, dev->wiphy.max_scan_ssids); NLA_PUT_U16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN, @@ -416,6 +430,9 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev; int result = 0, rem_txq_params = 0; struct nlattr *nl_txq_params; + u32 changed; + u8 retry_short = 0, retry_long = 0; + u32 frag_threshold = 0, rts_threshold = 0; rtnl_lock(); @@ -530,6 +547,84 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) goto bad_res; } + changed = 0; + + if (info->attrs[NL80211_ATTR_WIPHY_RETRY_SHORT]) { + retry_short = nla_get_u8( + info->attrs[NL80211_ATTR_WIPHY_RETRY_SHORT]); + if (retry_short == 0) { + result = -EINVAL; + goto bad_res; + } + changed |= WIPHY_PARAM_RETRY_SHORT; + } + + if (info->attrs[NL80211_ATTR_WIPHY_RETRY_LONG]) { + retry_long = nla_get_u8( + info->attrs[NL80211_ATTR_WIPHY_RETRY_LONG]); + if (retry_long == 0) { + result = -EINVAL; + goto bad_res; + } + changed |= WIPHY_PARAM_RETRY_LONG; + } + + if (info->attrs[NL80211_ATTR_WIPHY_FRAG_THRESHOLD]) { + frag_threshold = nla_get_u32( + info->attrs[NL80211_ATTR_WIPHY_FRAG_THRESHOLD]); + if (frag_threshold < 256) { + result = -EINVAL; + goto bad_res; + } + if (frag_threshold != (u32) -1) { + /* + * Fragments (apart from the last one) are required to + * have even length. Make the fragmentation code + * simpler by stripping LSB should someone try to use + * odd threshold value. + */ + frag_threshold &= ~0x1; + } + changed |= WIPHY_PARAM_FRAG_THRESHOLD; + } + + if (info->attrs[NL80211_ATTR_WIPHY_RTS_THRESHOLD]) { + rts_threshold = nla_get_u32( + info->attrs[NL80211_ATTR_WIPHY_RTS_THRESHOLD]); + changed |= WIPHY_PARAM_RTS_THRESHOLD; + } + + if (changed) { + u8 old_retry_short, old_retry_long; + u32 old_frag_threshold, old_rts_threshold; + + if (!rdev->ops->set_wiphy_params) { + result = -EOPNOTSUPP; + goto bad_res; + } + + old_retry_short = rdev->wiphy.retry_short; + old_retry_long = rdev->wiphy.retry_long; + old_frag_threshold = rdev->wiphy.frag_threshold; + old_rts_threshold = rdev->wiphy.rts_threshold; + + if (changed & WIPHY_PARAM_RETRY_SHORT) + rdev->wiphy.retry_short = retry_short; + if (changed & WIPHY_PARAM_RETRY_LONG) + rdev->wiphy.retry_long = retry_long; + if (changed & WIPHY_PARAM_FRAG_THRESHOLD) + rdev->wiphy.frag_threshold = frag_threshold; + if (changed & WIPHY_PARAM_RTS_THRESHOLD) + rdev->wiphy.rts_threshold = rts_threshold; + + result = rdev->ops->set_wiphy_params(&rdev->wiphy, changed); + if (result) { + rdev->wiphy.retry_short = old_retry_short; + rdev->wiphy.retry_long = old_retry_long; + rdev->wiphy.frag_threshold = old_frag_threshold; + rdev->wiphy.rts_threshold = old_rts_threshold; + } + } bad_res: mutex_unlock(&rdev->mtx); diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 4e054ea9c0a..3279e7f038d 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -314,3 +314,154 @@ struct ieee80211_channel *cfg80211_wext_freq(struct wiphy *wiphy, } EXPORT_SYMBOL(cfg80211_wext_freq); + +int cfg80211_wext_siwrts(struct net_device *dev, + struct iw_request_info *info, + struct iw_param *rts, char *extra) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + u32 orts = wdev->wiphy->rts_threshold; + int err; + + if (rts->disabled || !rts->fixed) + wdev->wiphy->rts_threshold = (u32) -1; + else if (rts->value < 0) + return -EINVAL; + else + wdev->wiphy->rts_threshold = rts->value; + + err = rdev->ops->set_wiphy_params(wdev->wiphy, + WIPHY_PARAM_RTS_THRESHOLD); + if (err) + wdev->wiphy->rts_threshold = orts; + + return err; +} +EXPORT_SYMBOL(cfg80211_wext_siwrts); + +int cfg80211_wext_giwrts(struct net_device *dev, + struct iw_request_info *info, + struct iw_param *rts, char *extra) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + + rts->value = wdev->wiphy->rts_threshold; + rts->disabled = rts->value == (u32) -1; + rts->fixed = 1; + + return 0; +} +EXPORT_SYMBOL(cfg80211_wext_giwrts); + +int cfg80211_wext_siwfrag(struct net_device *dev, + struct iw_request_info *info, + struct iw_param *frag, char *extra) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + u32 ofrag = wdev->wiphy->frag_threshold; + int err; + + if (frag->disabled || !frag->fixed) + wdev->wiphy->frag_threshold = (u32) -1; + else if (frag->value < 256) + return -EINVAL; + else { + /* Fragment length must be even, so strip LSB. */ + wdev->wiphy->frag_threshold = frag->value & ~0x1; + } + + err = rdev->ops->set_wiphy_params(wdev->wiphy, + WIPHY_PARAM_FRAG_THRESHOLD); + if (err) + wdev->wiphy->frag_threshold = ofrag; + + return err; +} +EXPORT_SYMBOL(cfg80211_wext_siwfrag); + +int cfg80211_wext_giwfrag(struct net_device *dev, + struct iw_request_info *info, + struct iw_param *frag, char *extra) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + + frag->value = wdev->wiphy->frag_threshold; + frag->disabled = frag->value == (u32) -1; + frag->fixed = 1; + + return 0; +} +EXPORT_SYMBOL(cfg80211_wext_giwfrag); + +int cfg80211_wext_siwretry(struct net_device *dev, + struct iw_request_info *info, + struct iw_param *retry, char *extra) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + u32 changed = 0; + u8 olong = wdev->wiphy->retry_long; + u8 oshort = wdev->wiphy->retry_short; + int err; + + if (retry->disabled || + (retry->flags & IW_RETRY_TYPE) != IW_RETRY_LIMIT) + return -EINVAL; + + if (retry->flags & IW_RETRY_LONG) { + wdev->wiphy->retry_long = retry->value; + changed |= WIPHY_PARAM_RETRY_LONG; + } else if (retry->flags & IW_RETRY_SHORT) { + wdev->wiphy->retry_short = retry->value; + changed |= WIPHY_PARAM_RETRY_SHORT; + } else { + wdev->wiphy->retry_short = retry->value; + wdev->wiphy->retry_long = retry->value; + changed |= WIPHY_PARAM_RETRY_LONG; + changed |= WIPHY_PARAM_RETRY_SHORT; + } + + if (!changed) + return 0; + + err = rdev->ops->set_wiphy_params(wdev->wiphy, changed); + if (err) { + wdev->wiphy->retry_short = oshort; + wdev->wiphy->retry_long = olong; + } + + return err; +} +EXPORT_SYMBOL(cfg80211_wext_siwretry); + +int cfg80211_wext_giwretry(struct net_device *dev, + struct iw_request_info *info, + struct iw_param *retry, char *extra) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + + retry->disabled = 0; + + if (retry->flags == 0 || (retry->flags & IW_RETRY_SHORT)) { + /* + * First return short value, iwconfig will ask long value + * later if needed + */ + retry->flags |= IW_RETRY_LIMIT; + retry->value = wdev->wiphy->retry_short; + if (wdev->wiphy->retry_long != wdev->wiphy->retry_short) + retry->flags |= IW_RETRY_LONG; + + return 0; + } + + if (retry->flags & IW_RETRY_LONG) { + retry->flags = IW_RETRY_LIMIT | IW_RETRY_LONG; + retry->value = wdev->wiphy->retry_long; + } + + return 0; +} +EXPORT_SYMBOL(cfg80211_wext_giwretry); -- cgit v1.2.3-70-g09d2 From e7ec86f54e519e8e86f1cf328db13263f3ef8bd4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 18 Apr 2009 17:33:24 +0200 Subject: mac80211: validate TIM IE length (redux) The TIM IE must not be shorter than 4 bytes, so verify that when parsing it and use the proper type. To ease that adjust struct ieee80211_tim_ie to have a virtual bitmap of size at least 1. Also check that the TIM IE is actually present before trying to parse it! Because other people may need the function, make it a static inline in ieee80211.h. (The original "mac80211: validate TIM IE length" was a minimal fix for 2.6.30. This purports to be the full, correct fix. -- JWL) Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 32 +++++++++++++++++++++++++++++++- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/mlme.c | 27 ++------------------------- net/mac80211/util.c | 6 ++++-- 4 files changed, 38 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 53563d53b5a..c52e7fba4e4 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -540,7 +540,7 @@ struct ieee80211_tim_ie { u8 dtim_period; u8 bitmap_ctrl; /* variable size: 1 - 251 bytes */ - u8 virtual_map[0]; + u8 virtual_map[1]; } __attribute__ ((packed)); #define WLAN_SA_QUERY_TR_ID_LEN 16 @@ -1392,4 +1392,34 @@ static inline unsigned long ieee80211_tu_to_usec(unsigned long tu) return 1024 * tu; } +/** + * ieee80211_check_tim - check if AID bit is set in TIM + * @tim: the TIM IE + * @tim_len: length of the TIM IE + * @aid: the AID to look for + */ +static inline bool ieee80211_check_tim(struct ieee80211_tim_ie *tim, + u8 tim_len, u16 aid) +{ + u8 mask; + u8 index, indexn1, indexn2; + + if (unlikely(!tim || tim_len < sizeof(*tim))) + return false; + + aid &= 0x3fff; + index = aid / 8; + mask = 1 << (aid & 7); + + indexn1 = tim->bitmap_ctrl & 0xfe; + indexn2 = tim_len + indexn1 - 4; + + if (index < indexn1 || index > indexn2) + return false; + + index -= indexn1; + + return !!(tim->virtual_map[index] & mask); +} + #endif /* LINUX_IEEE80211_H */ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index dba78d89a10..1579bc92c88 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -832,7 +832,7 @@ struct ieee802_11_elems { u8 *fh_params; u8 *ds_params; u8 *cf_params; - u8 *tim; + struct ieee80211_tim_ie *tim; u8 *ibss_params; u8 *challenge; u8 *wpa; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 428742d7f44..1b0b7aa387e 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -675,30 +675,6 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local, } } -static bool ieee80211_check_tim(struct ieee802_11_elems *elems, u16 aid) -{ - u8 mask; - u8 index, indexn1, indexn2; - struct ieee80211_tim_ie *tim = (struct ieee80211_tim_ie *) elems->tim; - - if (unlikely(!tim || elems->tim_len < 4)) - return false; - - aid &= 0x3fff; - index = aid / 8; - mask = 1 << (aid & 7); - - indexn1 = tim->bitmap_ctrl & 0xfe; - indexn2 = elems->tim_len + indexn1 - 4; - - if (index < indexn1 || index > indexn2) - return false; - - index -= indexn1; - - return !!(tim->virtual_map[index] & mask); -} - static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, u16 capab, bool erp_valid, u8 erp) { @@ -1806,7 +1782,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, care_about_ies, ncrc); if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) - directed_tim = ieee80211_check_tim(&elems, ifmgd->aid); + directed_tim = ieee80211_check_tim(elems.tim, elems.tim_len, + ifmgd->aid); ncrc = crc32_be(ncrc, (void *)&directed_tim, sizeof(directed_tim)); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 11244212f41..61876eb50b4 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -588,8 +588,10 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, elems->cf_params_len = elen; break; case WLAN_EID_TIM: - elems->tim = pos; - elems->tim_len = elen; + if (elen >= sizeof(struct ieee80211_tim_ie)) { + elems->tim = (void *)pos; + elems->tim_len = elen; + } break; case WLAN_EID_IBSS_PARAMS: elems->ibss_params = pos; -- cgit v1.2.3-70-g09d2 From e255d5eb2b478eec1416b46aea03798b64355402 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 22 Apr 2009 12:40:07 +0200 Subject: mac80211: remove IEEE80211_CONF_CHANGE_DYNPS_TIMEOUT Just setting IEEE80211_CONF_CHANGE_PS should be sufficient for changes in the power saving things. The driver already tells us whether it wants notification of dynps via the "have dynps support" hw flag. Signed-off-by: Johannes Berg Reviewed-by: Kalle Valo Signed-off-by: John W. Linville --- include/net/mac80211.h | 10 ++++------ net/mac80211/wext.c | 3 +-- 2 files changed, 5 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index d9686917252..183956e4930 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -532,8 +532,7 @@ enum ieee80211_conf_flags { * @IEEE80211_CONF_CHANGE_BEACON_INTERVAL: the beacon interval changed * @IEEE80211_CONF_CHANGE_LISTEN_INTERVAL: the listen interval changed * @IEEE80211_CONF_CHANGE_RADIOTAP: the radiotap flag changed - * @IEEE80211_CONF_CHANGE_PS: the PS flag changed - * @IEEE80211_CONF_CHANGE_DYNPS_TIMEOUT: the dynamic PS timeout changed + * @IEEE80211_CONF_CHANGE_PS: the PS flag or dynamic PS timeout changed * @IEEE80211_CONF_CHANGE_POWER: the TX power changed * @IEEE80211_CONF_CHANGE_CHANNEL: the channel/channel_type changed * @IEEE80211_CONF_CHANGE_RETRY_LIMITS: retry limits changed @@ -544,10 +543,9 @@ enum ieee80211_conf_changed { IEEE80211_CONF_CHANGE_LISTEN_INTERVAL = BIT(2), IEEE80211_CONF_CHANGE_RADIOTAP = BIT(3), IEEE80211_CONF_CHANGE_PS = BIT(4), - IEEE80211_CONF_CHANGE_DYNPS_TIMEOUT = BIT(5), - IEEE80211_CONF_CHANGE_POWER = BIT(6), - IEEE80211_CONF_CHANGE_CHANNEL = BIT(7), - IEEE80211_CONF_CHANGE_RETRY_LIMITS = BIT(8), + IEEE80211_CONF_CHANGE_POWER = BIT(5), + IEEE80211_CONF_CHANGE_CHANNEL = BIT(6), + IEEE80211_CONF_CHANGE_RETRY_LIMITS = BIT(7), }; /** diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 1eb6d8642a7..1a649da42c4 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -622,8 +622,7 @@ static int ieee80211_ioctl_siwpower(struct net_device *dev, conf->dynamic_ps_timeout = timeout; if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS) - ieee80211_hw_config(local, - IEEE80211_CONF_CHANGE_DYNPS_TIMEOUT); + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); ieee80211_recalc_ps(local, -1); -- cgit v1.2.3-70-g09d2 From 8e30bc55de98c000b0b836cb42525c82f605f191 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 22 Apr 2009 17:45:38 +0200 Subject: nl80211: allow configuring IBSS beacon interval Make the JOIN_IBSS command look at the beacon interval attribute to see if the user requested a specific beacon interval, if not default to 100 TU (wext too). Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 4 +++- include/net/cfg80211.h | 2 ++ net/wireless/ibss.c | 3 +++ net/wireless/nl80211.c | 12 +++++++++++- 4 files changed, 19 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index dc9d9ec5d1a..b6a48dd502c 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -230,7 +230,9 @@ * and optionally a MAC (as BSSID) and FREQ_FIXED attribute if those * should be fixed rather than automatically determined. Can only be * executed on a network interface that is UP, and fixed BSSID/FREQ - * may be rejected. + * may be rejected. Another optional parameter is the beacon interval, + * given in the %NL80211_ATTR_BEACON_INTERVAL attribute, which if not + * given defaults to 100 TU (102.4ms). * @NL80211_CMD_LEAVE_IBSS: Leave the IBSS -- no special arguments, the IBSS is * determined by the network interface. * diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 54bc69c8369..7f7b53b69cb 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -733,6 +733,7 @@ struct cfg80211_disassoc_request { * IBSSs to join on other channels. * @ie: information element(s) to include in the beacon * @ie_len: length of that + * @beacon_interval: beacon interval to use */ struct cfg80211_ibss_params { u8 *ssid; @@ -740,6 +741,7 @@ struct cfg80211_ibss_params { struct ieee80211_channel *channel; u8 *ie; u8 ssid_len, ie_len; + u16 beacon_interval; bool channel_fixed; }; diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index b5c601e1b1b..3c38afaed28 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -116,6 +116,9 @@ static int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, enum ieee80211_band band; int i; + if (!wdev->wext.beacon_interval) + wdev->wext.beacon_interval = 100; + /* try to find an IBSS channel if none requested ... */ if (!wdev->wext.channel) { for (band = 0; band < IEEE80211_NUM_BANDS; band++) { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 97bb5c80125..3b21b3e89e9 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3159,6 +3159,8 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) struct wiphy *wiphy; int err; + memset(&ibss, 0, sizeof(ibss)); + if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE])) return -EINVAL; @@ -3167,6 +3169,15 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) !nla_len(info->attrs[NL80211_ATTR_SSID])) return -EINVAL; + ibss.beacon_interval = 100; + + if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) { + ibss.beacon_interval = + nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]); + if (ibss.beacon_interval < 1 || ibss.beacon_interval > 10000) + return -EINVAL; + } + rtnl_lock(); err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); @@ -3189,7 +3200,6 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) } wiphy = &drv->wiphy; - memset(&ibss, 0, sizeof(ibss)); if (info->attrs[NL80211_ATTR_MAC]) ibss.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); -- cgit v1.2.3-70-g09d2 From 04fe20372e70685d9f15966216cdffd3795fe590 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 22 Apr 2009 18:44:37 +0200 Subject: mac80211: calculate maximum sleep interval The maximum sleep interval, for powersave purposes, is determined by the DTIM period (it may not be larger) and the required networking latency (it must be small enough to fulfil those constraints). This makes mac80211 calculate the maximum sleep interval based on those constraints, and pass it to the driver. Then the driver should instruct the device to sleep at most that long. Note that the device is responsible for aligning the maximum sleep interval between DTIMs, we make sure it's not longer but it needs to make sure it's between them. Also, group some powersave documentation together and make it more explicit that we support managed mode only, and no IBSS powersaving (yet). Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 19 ++++++++++++++++--- net/mac80211/mlme.c | 20 ++++++++++++++++---- 2 files changed, 32 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 183956e4930..446dbf75a1c 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -517,7 +517,7 @@ struct ieee80211_rx_status { * Flags to define PHY configuration options * * @IEEE80211_CONF_RADIOTAP: add radiotap header at receive time (if supported) - * @IEEE80211_CONF_PS: Enable 802.11 power save mode + * @IEEE80211_CONF_PS: Enable 802.11 power save mode (managed mode only) */ enum ieee80211_conf_flags { IEEE80211_CONF_RADIOTAP = (1<<0), @@ -553,14 +553,26 @@ enum ieee80211_conf_changed { * * This struct indicates how the driver shall configure the hardware. * + * @flags: configuration flags defined above + * * @radio_enabled: when zero, driver is required to switch off the radio. * @beacon_int: beacon interval (TODO make interface config) + * * @listen_interval: listen interval in units of beacon interval - * @flags: configuration flags defined above + * @max_sleep_interval: the maximum number of beacon intervals to sleep for + * before checking the beacon for a TIM bit (managed mode only); this + * value will be only achievable between DTIM frames, the hardware + * needs to check for the multicast traffic bit in DTIM beacons. + * This variable is valid only when the CONF_PS flag is set. + * @dynamic_ps_timeout: The dynamic powersave timeout (in ms), see the + * powersave documentation below. This variable is valid only when + * the CONF_PS flag is set. + * * @power_level: requested transmit power (in dBm) - * @dynamic_ps_timeout: dynamic powersave timeout (in ms) + * * @channel: the channel to tune to * @channel_type: the channel (HT) type + * * @long_frame_max_tx_count: Maximum number of transmissions for a "long" frame * (a frame not RTS protected), called "dot11LongRetryLimit" in 802.11, * but actually means the number of transmissions not the number of retries @@ -572,6 +584,7 @@ struct ieee80211_conf { int beacon_int; u32 flags; int power_level, dynamic_ps_timeout; + int max_sleep_interval; u16 listen_interval; bool radio_enabled; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e819c02d13f..df27c68620c 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -545,10 +545,19 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency) beaconint_us = ieee80211_tu_to_usec( found->vif.bss_conf.beacon_int); - if (beaconint_us > latency) + if (beaconint_us > latency) { local->ps_sdata = NULL; - else + } else { + u8 dtimper = found->vif.bss_conf.dtim_period; + int maxslp = 1; + + if (dtimper > 1) + maxslp = min_t(int, dtimper, + latency / beaconint_us); + + local->hw.conf.max_sleep_interval = maxslp; local->ps_sdata = found; + } } else { local->ps_sdata = NULL; } @@ -851,8 +860,11 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, ieee80211_bss_info_change_notify(sdata, bss_info_changed); /* will be same as sdata */ - if (local->ps_sdata) - ieee80211_enable_ps(local, sdata); + if (local->ps_sdata) { + mutex_lock(&local->iflist_mtx); + ieee80211_recalc_ps(local, -1); + mutex_unlock(&local->iflist_mtx); + } netif_tx_start_all_queues(sdata->dev); netif_carrier_on(sdata->dev); -- cgit v1.2.3-70-g09d2 From 1965c85331ed29dc4fd32479ff31663e3e9a518f Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Wed, 22 Apr 2009 21:38:25 +0300 Subject: nl80211: Add event for authentication/association timeout SME needs to be notified when the authentication or association attempt times out and MLME has stopped processing in order to allow the SME to decide what to do next. Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- include/linux/nl80211.h | 13 +++++++++++-- include/net/cfg80211.h | 22 ++++++++++++++++++++-- net/mac80211/mlme.c | 4 ++-- net/wireless/mlme.c | 27 +++++++++++++++++++++++++++ net/wireless/nl80211.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ net/wireless/nl80211.h | 6 ++++++ 6 files changed, 115 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index b6a48dd502c..e9fd13aa79f 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -203,8 +203,12 @@ * frame, i.e., it was for the local STA and was received in correct * state. This is similar to MLME-AUTHENTICATE.confirm primitive in the * MLME SAP interface (kernel providing MLME, userspace SME). The - * included NL80211_ATTR_FRAME attribute contains the management frame - * (including both the header and frame body, but not FCS). + * included %NL80211_ATTR_FRAME attribute contains the management frame + * (including both the header and frame body, but not FCS). This event is + * also used to indicate if the authentication attempt timed out. In that + * case the %NL80211_ATTR_FRAME attribute is replaced with a + * %NL80211_ATTR_TIMED_OUT flag (and %NL80211_ATTR_MAC to indicate which + * pending authentication timed out). * @NL80211_CMD_ASSOCIATE: association request and notification; like * NL80211_CMD_AUTHENTICATE but for Association and Reassociation * (similar to MLME-ASSOCIATE.request, MLME-REASSOCIATE.request, @@ -487,6 +491,9 @@ enum nl80211_commands { * @NL80211_ATTR_FREQ_FIXED: a flag indicating the IBSS should not try to look * for other networks on different channels * + * @NL80211_ATTR_TIMED_OUT: a flag indicating than an operation timed out; this + * is used, e.g., with %NL80211_CMD_AUTHENTICATE event + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -587,6 +594,8 @@ enum nl80211_attrs { NL80211_ATTR_WIPHY_FRAG_THRESHOLD, NL80211_ATTR_WIPHY_RTS_THRESHOLD, + NL80211_ATTR_TIMED_OUT, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 7f7b53b69cb..b8a76764e1c 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1475,10 +1475,19 @@ void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *bss); * @len: length of the frame data * * This function is called whenever an authentication has been processed in - * station mode. + * station mode. The driver is required to call either this function or + * cfg80211_send_auth_timeout() to indicate the result of cfg80211_ops::auth() + * call. */ void cfg80211_send_rx_auth(struct net_device *dev, const u8 *buf, size_t len); +/** + * cfg80211_send_auth_timeout - notification of timed out authentication + * @dev: network device + * @addr: The MAC address of the device with which the authentication timed out + */ +void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr); + /** * cfg80211_send_rx_assoc - notification of processed association * @dev: network device @@ -1486,10 +1495,19 @@ void cfg80211_send_rx_auth(struct net_device *dev, const u8 *buf, size_t len); * @len: length of the frame data * * This function is called whenever a (re)association response has been - * processed in station mode. + * processed in station mode. The driver is required to call either this + * function or cfg80211_send_assoc_timeout() to indicate the result of + * cfg80211_ops::assoc() call. */ void cfg80211_send_rx_assoc(struct net_device *dev, const u8 *buf, size_t len); +/** + * cfg80211_send_assoc_timeout - notification of timed out association + * @dev: network device + * @addr: The MAC address of the device with which the association timed out + */ +void cfg80211_send_assoc_timeout(struct net_device *dev, const u8 *addr); + /** * cfg80211_send_deauth - notification of processed deauthentication * @dev: network device diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index df27c68620c..3610c11286b 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -932,7 +932,7 @@ static void ieee80211_authenticate(struct ieee80211_sub_if_data *sdata) " timed out\n", sdata->dev->name, ifmgd->bssid); ifmgd->state = IEEE80211_STA_MLME_DISABLED; - ieee80211_sta_send_apinfo(sdata); + cfg80211_send_auth_timeout(sdata->dev, ifmgd->bssid); ieee80211_rx_bss_remove(sdata, ifmgd->bssid, sdata->local->hw.conf.channel->center_freq, ifmgd->ssid, ifmgd->ssid_len); @@ -1115,7 +1115,7 @@ static void ieee80211_associate(struct ieee80211_sub_if_data *sdata) " timed out\n", sdata->dev->name, ifmgd->bssid); ifmgd->state = IEEE80211_STA_MLME_DISABLED; - ieee80211_sta_send_apinfo(sdata); + cfg80211_send_assoc_timeout(sdata->dev, ifmgd->bssid); ieee80211_rx_bss_remove(sdata, ifmgd->bssid, sdata->local->hw.conf.channel->center_freq, ifmgd->ssid, ifmgd->ssid_len); diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 1407244a647..42184361a10 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -44,6 +44,33 @@ void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len) } EXPORT_SYMBOL(cfg80211_send_disassoc); +static void cfg80211_wext_disconnected(struct net_device *dev) +{ +#ifdef CONFIG_WIRELESS_EXT + union iwreq_data wrqu; + memset(&wrqu, 0, sizeof(wrqu)); + wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL); +#endif +} + +void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr) +{ + struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + nl80211_send_auth_timeout(rdev, dev, addr); + cfg80211_wext_disconnected(dev); +} +EXPORT_SYMBOL(cfg80211_send_auth_timeout); + +void cfg80211_send_assoc_timeout(struct net_device *dev, const u8 *addr) +{ + struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + nl80211_send_assoc_timeout(rdev, dev, addr); + cfg80211_wext_disconnected(dev); +} +EXPORT_SYMBOL(cfg80211_send_assoc_timeout); + void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr, enum nl80211_key_type key_type, int key_id, const u8 *tsc) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 3b21b3e89e9..b1fc98225fd 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -121,6 +121,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { [NL80211_ATTR_AUTH_TYPE] = { .type = NLA_U32 }, [NL80211_ATTR_REASON_CODE] = { .type = NLA_U16 }, [NL80211_ATTR_FREQ_FIXED] = { .type = NLA_FLAG }, + [NL80211_ATTR_TIMED_OUT] = { .type = NLA_FLAG }, }; /* IE validation */ @@ -3695,6 +3696,54 @@ void nl80211_send_disassoc(struct cfg80211_registered_device *rdev, NL80211_CMD_DISASSOCIATE); } +void nl80211_send_mlme_timeout(struct cfg80211_registered_device *rdev, + struct net_device *netdev, int cmd, + const u8 *addr) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, cmd); + if (!hdr) { + nlmsg_free(msg); + return; + } + + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx); + NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex); + NLA_PUT_FLAG(msg, NL80211_ATTR_TIMED_OUT); + NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, addr); + + if (genlmsg_end(msg, hdr) < 0) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, GFP_ATOMIC); + return; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); +} + +void nl80211_send_auth_timeout(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *addr) +{ + nl80211_send_mlme_timeout(rdev, netdev, NL80211_CMD_AUTHENTICATE, + addr); +} + +void nl80211_send_assoc_timeout(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *addr) +{ + nl80211_send_mlme_timeout(rdev, netdev, NL80211_CMD_ASSOCIATE, addr); +} + void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *bssid, gfp_t gfp) diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 17d2d8bfaf7..5c12ad13499 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -23,6 +23,12 @@ extern void nl80211_send_deauth(struct cfg80211_registered_device *rdev, extern void nl80211_send_disassoc(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *buf, size_t len); +extern void nl80211_send_auth_timeout(struct cfg80211_registered_device *rdev, + struct net_device *netdev, + const u8 *addr); +extern void nl80211_send_assoc_timeout(struct cfg80211_registered_device *rdev, + struct net_device *netdev, + const u8 *addr); extern void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *addr, -- cgit v1.2.3-70-g09d2 From 9d5c5d8f4105dc56ec10864b195dd1714f282c22 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Tue, 21 Apr 2009 23:26:22 +0000 Subject: af_iucv: add sockopt() to enable/disable use of IPRM_DATA msgs Provide the socket operations getsocktopt() and setsockopt() to enable/disable sending of data in the parameter list of IUCV messages. The patch sets respective flag only. Signed-off-by: Hendrik Brueckner Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- include/linux/socket.h | 1 + include/net/iucv/af_iucv.h | 4 +++ net/iucv/af_iucv.c | 79 ++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 81 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/socket.h b/include/linux/socket.h index 42a0396f2c5..d2310cb45d2 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -303,6 +303,7 @@ struct ucred { #define SOL_BLUETOOTH 274 #define SOL_PNPIPE 275 #define SOL_RDS 276 +#define SOL_IUCV 277 /* IPX options */ #define IPX_TYPE 1 diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h index 85f80eadfa3..78a72764aef 100644 --- a/include/net/iucv/af_iucv.h +++ b/include/net/iucv/af_iucv.h @@ -73,8 +73,12 @@ struct iucv_sock { struct sk_buff_head backlog_skb_q; struct sock_msg_q message_q; unsigned int send_tag; + u8 flags; }; +/* iucv socket options (SOL_IUCV) */ +#define SO_IPRMDATA_MSG 0x0080 /* send/recv IPRM_DATA msgs */ + struct iucv_sock_list { struct hlist_head head; rwlock_t lock; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 6cf02b41ef9..b7c40c97992 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -32,7 +32,7 @@ #define CONFIG_IUCV_SOCK_DEBUG 1 #define IPRMDATA 0x80 -#define VERSION "1.0" +#define VERSION "1.1" static char iucv_userid[80]; @@ -226,6 +226,7 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio) spin_lock_init(&iucv_sk(sk)->message_q.lock); skb_queue_head_init(&iucv_sk(sk)->backlog_skb_q); iucv_sk(sk)->send_tag = 0; + iucv_sk(sk)->flags = 0; sk->sk_destruct = iucv_sock_destruct; sk->sk_sndtimeo = IUCV_CONN_TIMEOUT; @@ -1003,6 +1004,78 @@ static int iucv_sock_release(struct socket *sock) return err; } +/* getsockopt and setsockopt */ +static int iucv_sock_setsockopt(struct socket *sock, int level, int optname, + char __user *optval, int optlen) +{ + struct sock *sk = sock->sk; + struct iucv_sock *iucv = iucv_sk(sk); + int val; + int rc; + + if (level != SOL_IUCV) + return -ENOPROTOOPT; + + if (optlen < sizeof(int)) + return -EINVAL; + + if (get_user(val, (int __user *) optval)) + return -EFAULT; + + rc = 0; + + lock_sock(sk); + switch (optname) { + case SO_IPRMDATA_MSG: + if (val) + iucv->flags |= IUCV_IPRMDATA; + else + iucv->flags &= ~IUCV_IPRMDATA; + break; + default: + rc = -ENOPROTOOPT; + break; + } + release_sock(sk); + + return rc; +} + +static int iucv_sock_getsockopt(struct socket *sock, int level, int optname, + char __user *optval, int __user *optlen) +{ + struct sock *sk = sock->sk; + struct iucv_sock *iucv = iucv_sk(sk); + int val, len; + + if (level != SOL_IUCV) + return -ENOPROTOOPT; + + if (get_user(len, optlen)) + return -EFAULT; + + if (len < 0) + return -EINVAL; + + len = min_t(unsigned int, len, sizeof(int)); + + switch (optname) { + case SO_IPRMDATA_MSG: + val = (iucv->flags & IUCV_IPRMDATA) ? 1 : 0; + break; + default: + return -ENOPROTOOPT; + } + + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &val, len)) + return -EFAULT; + + return 0; +} + + /* Callback wrappers - called from iucv base support */ static int iucv_callback_connreq(struct iucv_path *path, u8 ipvmid[8], u8 ipuser[16]) @@ -1229,8 +1302,8 @@ static struct proto_ops iucv_sock_ops = { .mmap = sock_no_mmap, .socketpair = sock_no_socketpair, .shutdown = iucv_sock_shutdown, - .setsockopt = sock_no_setsockopt, - .getsockopt = sock_no_getsockopt + .setsockopt = iucv_sock_setsockopt, + .getsockopt = iucv_sock_getsockopt, }; static struct net_proto_family iucv_sock_family_ops = { -- cgit v1.2.3-70-g09d2 From 44b1e6b5f9a93cc2ba024e09cf137d5f1b5f8426 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Tue, 21 Apr 2009 23:26:24 +0000 Subject: af_iucv: Modify iucv msg target class using control msghdr Allow 'classification' of socket data that is sent or received over an af_iucv socket. For classification of data, the target class of an (native) iucv message is used. This patch provides the cmsg interface for iucv_sock_recvmsg() and iucv_sock_sendmsg(). Applications can use the msg_control field of struct msghdr to set or get the target class as a "socket control message" (SCM/CMSG). Signed-off-by: Hendrik Brueckner Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- include/net/iucv/af_iucv.h | 3 ++ net/iucv/af_iucv.c | 79 +++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 78 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h index 78a72764aef..b57739e49dc 100644 --- a/include/net/iucv/af_iucv.h +++ b/include/net/iucv/af_iucv.h @@ -79,6 +79,9 @@ struct iucv_sock { /* iucv socket options (SOL_IUCV) */ #define SO_IPRMDATA_MSG 0x0080 /* send/recv IPRM_DATA msgs */ +/* iucv related control messages (scm) */ +#define SCM_IUCV_TRGCLS 0x0001 /* target class control message */ + struct iucv_sock_list { struct hlist_head head; rwlock_t lock; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 5fc077ee583..47c5c8d3703 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -45,6 +45,15 @@ static struct proto iucv_proto = { static const u8 iprm_shutdown[8] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}; +#define TRGCLS_SIZE (sizeof(((struct iucv_message *)0)->class)) + +/* macros to set/get socket control buffer at correct offset */ +#define CB_TAG(skb) ((skb)->cb) /* iucv message tag */ +#define CB_TAG_LEN (sizeof(((struct iucv_message *) 0)->tag)) +#define CB_TRGCLS(skb) ((skb)->cb + CB_TAG_LEN) /* iucv msg target class */ +#define CB_TRGCLS_LEN (TRGCLS_SIZE) + + static void iucv_sock_kill(struct sock *sk); static void iucv_sock_close(struct sock *sk); @@ -698,6 +707,8 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct iucv_sock *iucv = iucv_sk(sk); struct sk_buff *skb; struct iucv_message txmsg; + struct cmsghdr *cmsg; + int cmsg_done; char user_id[9]; char appl_id[9]; int err; @@ -717,6 +728,48 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, } if (sk->sk_state == IUCV_CONNECTED) { + /* initialize defaults */ + cmsg_done = 0; /* check for duplicate headers */ + txmsg.class = 0; + + /* iterate over control messages */ + for (cmsg = CMSG_FIRSTHDR(msg); cmsg; + cmsg = CMSG_NXTHDR(msg, cmsg)) { + + if (!CMSG_OK(msg, cmsg)) { + err = -EINVAL; + goto out; + } + + if (cmsg->cmsg_level != SOL_IUCV) + continue; + + if (cmsg->cmsg_type & cmsg_done) { + err = -EINVAL; + goto out; + } + cmsg_done |= cmsg->cmsg_type; + + switch (cmsg->cmsg_type) { + case SCM_IUCV_TRGCLS: + if (cmsg->cmsg_len != CMSG_LEN(TRGCLS_SIZE)) { + err = -EINVAL; + goto out; + } + + /* set iucv message target class */ + memcpy(&txmsg.class, + (void *) CMSG_DATA(cmsg), TRGCLS_SIZE); + + break; + + default: + err = -EINVAL; + goto out; + break; + } + } + if (!(skb = sock_alloc_send_skb(sk, len, msg->msg_flags & MSG_DONTWAIT, &err))) @@ -727,10 +780,9 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, goto fail; } - txmsg.class = 0; - memcpy(&txmsg.class, skb->data, skb->len >= 4 ? 4 : skb->len); + /* increment and save iucv message tag for msg_completion cbk */ txmsg.tag = iucv->send_tag++; - memcpy(skb->cb, &txmsg.tag, 4); + memcpy(CB_TAG(skb), &txmsg.tag, CB_TAG_LEN); skb_queue_tail(&iucv->send_skb_q, skb); if (((iucv->path->flags & IUCV_IPRMDATA) & iucv->flags) @@ -801,6 +853,10 @@ static int iucv_fragment_skb(struct sock *sk, struct sk_buff *skb, int len) if (!nskb) return -ENOMEM; + /* copy target class to control buffer of new skb */ + memcpy(CB_TRGCLS(nskb), CB_TRGCLS(skb), CB_TRGCLS_LEN); + + /* copy data fragment */ memcpy(nskb->data, skb->data + copied, size); copied += size; dataleft -= size; @@ -824,6 +880,10 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb, len = iucv_msg_length(msg); + /* store msg target class in the second 4 bytes of skb ctrl buffer */ + /* Note: the first 4 bytes are reserved for msg tag */ + memcpy(CB_TRGCLS(skb), &msg->class, CB_TRGCLS_LEN); + /* check for special IPRM messages (e.g. iucv_sock_shutdown) */ if ((msg->flags & IUCV_IPRMDATA) && len > 7) { if (memcmp(msg->rmmsg, iprm_shutdown, 8) == 0) { @@ -915,6 +975,17 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, len -= copied; + /* create control message to store iucv msg target class: + * get the trgcls from the control buffer of the skb due to + * fragmentation of original iucv message. */ + err = put_cmsg(msg, SOL_IUCV, SCM_IUCV_TRGCLS, + CB_TRGCLS_LEN, CB_TRGCLS(skb)); + if (err) { + if (!(flags & MSG_PEEK)) + skb_queue_head(&sk->sk_receive_queue, skb); + return err; + } + /* Mark read part of skb as used */ if (!(flags & MSG_PEEK)) { skb_pull(skb, copied); @@ -1316,7 +1387,7 @@ static void iucv_callback_txdone(struct iucv_path *path, spin_lock_irqsave(&list->lock, flags); while (list_skb != (struct sk_buff *)list) { - if (!memcmp(&msg->tag, list_skb->cb, 4)) { + if (!memcmp(&msg->tag, CB_TAG(list_skb), CB_TAG_LEN)) { this = list_skb; break; } -- cgit v1.2.3-70-g09d2 From 09488e2e0fab14ebe41135f0d066cfe2c56ba9e5 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Tue, 21 Apr 2009 23:26:27 +0000 Subject: af_iucv: New socket option for setting IUCV MSGLIMITs The SO_MSGLIMIT socket option modifies the message limit for new IUCV communication paths. The message limit specifies the maximum number of outstanding messages that are allowed for connections. This setting can be lowered by z/VM when an IUCV connection is established. Expects an integer value in the range of 1 to 65535. The default value is 65535. The message limit must be set before calling connect() or listen() for sockets. If sockets are already connected or in state listen, changing the message limit is not supported. For reading the message limit value, unconnected sockets return the limit that has been set or the default limit. For connected sockets, the actual message limit is returned. The actual message limit is assigned by z/VM for each connection and it depends on IUCV MSGLIMIT authorizations specified for the z/VM guest virtual machine. Signed-off-by: Hendrik Brueckner Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- include/net/iucv/af_iucv.h | 2 ++ net/iucv/af_iucv.c | 27 +++++++++++++++++++++++++-- 2 files changed, 27 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h index b57739e49dc..21ee49ffcba 100644 --- a/include/net/iucv/af_iucv.h +++ b/include/net/iucv/af_iucv.h @@ -74,10 +74,12 @@ struct iucv_sock { struct sock_msg_q message_q; unsigned int send_tag; u8 flags; + u16 msglimit; }; /* iucv socket options (SOL_IUCV) */ #define SO_IPRMDATA_MSG 0x0080 /* send/recv IPRM_DATA msgs */ +#define SO_MSGLIMIT 0x1000 /* get/set IUCV MSGLIMIT */ /* iucv related control messages (scm) */ #define SCM_IUCV_TRGCLS 0x0001 /* target class control message */ diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index f0dea1b8ed4..264c6b36931 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -268,6 +268,7 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio) skb_queue_head_init(&iucv_sk(sk)->backlog_skb_q); iucv_sk(sk)->send_tag = 0; iucv_sk(sk)->flags = 0; + iucv_sk(sk)->msglimit = IUCV_QUEUELEN_DEFAULT; sk->sk_destruct = iucv_sock_destruct; sk->sk_sndtimeo = IUCV_CONN_TIMEOUT; @@ -536,7 +537,7 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr, iucv = iucv_sk(sk); /* Create path. */ - iucv->path = iucv_path_alloc(IUCV_QUEUELEN_DEFAULT, + iucv->path = iucv_path_alloc(iucv->msglimit, IUCV_IPRMDATA, GFP_KERNEL); if (!iucv->path) { err = -ENOMEM; @@ -1219,6 +1220,20 @@ static int iucv_sock_setsockopt(struct socket *sock, int level, int optname, else iucv->flags &= ~IUCV_IPRMDATA; break; + case SO_MSGLIMIT: + switch (sk->sk_state) { + case IUCV_OPEN: + case IUCV_BOUND: + if (val < 1 || val > (u16)(~0)) + rc = -EINVAL; + else + iucv->msglimit = val; + break; + default: + rc = -EINVAL; + break; + } + break; default: rc = -ENOPROTOOPT; break; @@ -1250,6 +1265,12 @@ static int iucv_sock_getsockopt(struct socket *sock, int level, int optname, case SO_IPRMDATA_MSG: val = (iucv->flags & IUCV_IPRMDATA) ? 1 : 0; break; + case SO_MSGLIMIT: + lock_sock(sk); + val = (iucv->path != NULL) ? iucv->path->msglim /* connected */ + : iucv->msglimit; /* default */ + release_sock(sk); + break; default: return -ENOPROTOOPT; } @@ -1339,7 +1360,9 @@ static int iucv_callback_connreq(struct iucv_path *path, memcpy(nuser_data + 8, niucv->src_name, 8); ASCEBC(nuser_data + 8, 8); - path->msglim = IUCV_QUEUELEN_DEFAULT; + /* set message limit for path based on msglimit of accepting socket */ + niucv->msglimit = iucv->msglimit; + path->msglim = iucv->msglimit; err = iucv_path_accept(path, &af_iucv_handler, nuser_data, nsk); if (err) { err = iucv_path_sever(path, user_data); -- cgit v1.2.3-70-g09d2 From 89ec0dee9eba6275d47be0b878cf5f6d5c2fb6eb Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 26 Mar 2009 11:03:29 -0400 Subject: tracing: increase size of number of possible events With the new event tracing registration, we must increase the number of events that can be registered. Currently the type field is only one byte, which leaves us only 256 possible events. Since we do not save the CPU number in the tracer anymore (it is determined by the per cpu ring buffer that is used) we have an extra byte to use. This patch increases the size of type from 1 byte (256 events) to 2 bytes (65,536 events). It also adds a WARN_ON_ONCE if we exceed that limit. [ Impact: allow more than 255 events ] Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 5 ++++- kernel/trace/trace_events.c | 2 +- kernel/trace/trace_output.c | 2 ++ 3 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 2a4a4074991..07e0a6d64a2 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -16,13 +16,16 @@ struct dentry; * bash-15816 [01] 235.197585: idle_cpu <- irq_enter */ struct trace_entry { - int type; + unsigned short type; unsigned char flags; unsigned char preempt_count; int pid; int tgid; }; +#define FTRACE_MAX_EVENT \ + ((1 << (sizeof(((struct trace_entry *)0)->type) * 8)) - 1) + /* * Trace iterator - used by printout routines who present trace * results to users and which routines might sleep, etc: diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 5d6e879cf87..9887131afa0 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -398,7 +398,7 @@ static int trace_write_header(struct trace_seq *s) "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" "\n", - FIELD(int, type), + FIELD(unsigned short, type), FIELD(unsigned char, flags), FIELD(unsigned char, preempt_count), FIELD(int, pid), diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 83a8abb9640..06997e75114 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -537,6 +537,8 @@ int register_ftrace_event(struct trace_event *event) out: mutex_unlock(&trace_event_mutex); + WARN_ON_ONCE(next_event_type > FTRACE_MAX_EVENT); + return ret; } EXPORT_SYMBOL_GPL(register_ftrace_event); -- cgit v1.2.3-70-g09d2 From c2518c4366f087ebc10b3919cb2461bbe4f42d0c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 23 Apr 2009 23:26:18 -0400 Subject: tracing: fix cut and paste macro error In case a module uses the TRACE_EVENT macro for creating automated events in ftrace, it may choose to use a different file name than the defined system name, or choose to use a different path than the default "include/trace/events" include path. If this is done, then before including trace/define_trace.h the header would define either "TRACE_INCLUDE_FILE" for the file name or "TRACE_INCLUDE_PATH" for the include path. If it does not define these, then the define_trace.h defines them instead. If define trace defines them, then define_trace.h should also undefine them before exiting. To do this a macro is used to note this: #ifndef TRACE_INCLUDE_FILE # define TRACE_INCLUDE_FILE TRACE_SYSTEM # define UNDEF_TRACE_INCLUDE_FILE #endif [...] #ifdef UNDEF_TRACE_INCLUDE_FILE # undef TRACE_INCLUDE_FILE # undef UNDEF_TRACE_INCLUDE_FILE #endif The UNDEF_TRACE_INCLUDE_FILE acts as a CPP variable to know to undef the TRACE_INCLUDE_FILE before leaving define_trace.h. Unfortunately, due to cut and paste errors, the macros between FILE and PATH got mixed up. [ Impact: undef TRACE_INCLUDE_FILE and/or TRACE_INCLUDE_PATH when needed ] Signed-off-by: Steven Rostedt --- include/trace/define_trace.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index 7f1f23d601e..abc611feeb8 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -44,7 +44,7 @@ #ifndef TRACE_INCLUDE_PATH # define __TRACE_INCLUDE(system) -# define UNDEF_TRACE_INCLUDE_FILE +# define UNDEF_TRACE_INCLUDE_PATH #else # define __TRACE_INCLUDE(system) __stringify(TRACE_INCLUDE_PATH/system.h) #endif @@ -64,13 +64,13 @@ /* Only undef what we defined in this file */ #ifdef UNDEF_TRACE_INCLUDE_FILE -# undef TRACE_INCLUDE_PATH +# undef TRACE_INCLUDE_FILE # undef UNDEF_TRACE_INCLUDE_FILE #endif -#ifdef UNDEF_TRACE_INCLUDE_FILE +#ifdef UNDEF_TRACE_INCLUDE_PATH # undef TRACE_INCLUDE_PATH -# undef UNDEF_TRACE_INCLUDE_FILE +# undef UNDEF_TRACE_INCLUDE_PATH #endif /* We may be processing more files */ -- cgit v1.2.3-70-g09d2 From 334d4169a6592d3fcd863bbe822a8f6985ffa9af Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Fri, 24 Apr 2009 11:27:05 +0800 Subject: ring_buffer: compressed event header RB_MAX_SMALL_DATA = 28bytes is too small for most tracers, it wastes an 'u32' to save the actually length for events which data size > 28. This fix uses compressed event header and enlarges RB_MAX_SMALL_DATA. [ Impact: saves about 0%-12.5%(depends on tracer) memory in ring_buffer ] Signed-off-by: Lai Jiangshan LKML-Reference: <49F13189.3090000@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 16 +++++---- kernel/trace/ring_buffer.c | 83 ++++++++++++++++++++++----------------------- 2 files changed, 50 insertions(+), 49 deletions(-) (limited to 'include') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index fac8f1ac6f4..1c2f80911fb 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -11,7 +11,7 @@ struct ring_buffer_iter; * Don't refer to this struct directly, use functions below. */ struct ring_buffer_event { - u32 type:2, len:3, time_delta:27; + u32 type_len:5, time_delta:27; u32 array[]; }; @@ -24,7 +24,8 @@ struct ring_buffer_event { * size is variable depending on how much * padding is needed * If time_delta is non zero: - * everything else same as RINGBUF_TYPE_DATA + * array[0] holds the actual length + * size = 4 + length (bytes) * * @RINGBUF_TYPE_TIME_EXTEND: Extend the time delta * array[0] = time delta (28 .. 59) @@ -35,22 +36,23 @@ struct ring_buffer_event { * array[1..2] = tv_sec * size = 16 bytes * - * @RINGBUF_TYPE_DATA: Data record - * If len is zero: + * <= @RINGBUF_TYPE_DATA_TYPE_LEN_MAX: + * Data record + * If type_len is zero: * array[0] holds the actual length * array[1..(length+3)/4] holds data - * size = 4 + 4 + length (bytes) + * size = 4 + length (bytes) * else - * length = len << 2 + * length = type_len << 2 * array[0..(length+3)/4-1] holds data * size = 4 + length (bytes) */ enum ring_buffer_type { + RINGBUF_TYPE_DATA_TYPE_LEN_MAX = 28, RINGBUF_TYPE_PADDING, RINGBUF_TYPE_TIME_EXTEND, /* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */ RINGBUF_TYPE_TIME_STAMP, - RINGBUF_TYPE_DATA, }; unsigned ring_buffer_event_length(struct ring_buffer_event *event); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 61dbdf21cd3..9692f100ec1 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -28,8 +28,8 @@ int ring_buffer_print_entry_header(struct trace_seq *s) { int ret; - ret = trace_seq_printf(s, "\ttype : 2 bits\n"); - ret = trace_seq_printf(s, "\tlen : 3 bits\n"); + ret = trace_seq_printf(s, "# compressed entry header\n"); + ret = trace_seq_printf(s, "\ttype_len : 5 bits\n"); ret = trace_seq_printf(s, "\ttime_delta : 27 bits\n"); ret = trace_seq_printf(s, "\tarray : 32 bits\n"); ret = trace_seq_printf(s, "\n"); @@ -37,8 +37,8 @@ int ring_buffer_print_entry_header(struct trace_seq *s) RINGBUF_TYPE_PADDING); ret = trace_seq_printf(s, "\ttime_extend : type == %d\n", RINGBUF_TYPE_TIME_EXTEND); - ret = trace_seq_printf(s, "\tdata : type == %d\n", - RINGBUF_TYPE_DATA); + ret = trace_seq_printf(s, "\tdata max type_len == %d\n", + RINGBUF_TYPE_DATA_TYPE_LEN_MAX); return ret; } @@ -204,7 +204,10 @@ EXPORT_SYMBOL_GPL(tracing_is_on); #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) #define RB_ALIGNMENT 4U -#define RB_MAX_SMALL_DATA 28 +#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) + +/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ +#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX enum { RB_LEN_TIME_EXTEND = 8, @@ -213,17 +216,18 @@ enum { static inline int rb_null_event(struct ring_buffer_event *event) { - return event->type == RINGBUF_TYPE_PADDING && event->time_delta == 0; + return event->type_len == RINGBUF_TYPE_PADDING + && event->time_delta == 0; } static inline int rb_discarded_event(struct ring_buffer_event *event) { - return event->type == RINGBUF_TYPE_PADDING && event->time_delta; + return event->type_len == RINGBUF_TYPE_PADDING && event->time_delta; } static void rb_event_set_padding(struct ring_buffer_event *event) { - event->type = RINGBUF_TYPE_PADDING; + event->type_len = RINGBUF_TYPE_PADDING; event->time_delta = 0; } @@ -232,8 +236,8 @@ rb_event_data_length(struct ring_buffer_event *event) { unsigned length; - if (event->len) - length = event->len * RB_ALIGNMENT; + if (event->type_len) + length = event->type_len * RB_ALIGNMENT; else length = event->array[0]; return length + RB_EVNT_HDR_SIZE; @@ -243,12 +247,12 @@ rb_event_data_length(struct ring_buffer_event *event) static unsigned rb_event_length(struct ring_buffer_event *event) { - switch (event->type) { + switch (event->type_len) { case RINGBUF_TYPE_PADDING: if (rb_null_event(event)) /* undefined */ return -1; - return rb_event_data_length(event); + return event->array[0] + RB_EVNT_HDR_SIZE; case RINGBUF_TYPE_TIME_EXTEND: return RB_LEN_TIME_EXTEND; @@ -272,7 +276,7 @@ rb_event_length(struct ring_buffer_event *event) unsigned ring_buffer_event_length(struct ring_buffer_event *event) { unsigned length = rb_event_length(event); - if (event->type != RINGBUF_TYPE_DATA) + if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) return length; length -= RB_EVNT_HDR_SIZE; if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0])) @@ -285,9 +289,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length); static void * rb_event_data(struct ring_buffer_event *event) { - BUG_ON(event->type != RINGBUF_TYPE_DATA); + BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); /* If length is in len field, then array[0] has the data */ - if (event->len) + if (event->type_len) return (void *)&event->array[0]; /* Otherwise length is in array[0] and array[1] has the data */ return (void *)&event->array[1]; @@ -988,7 +992,7 @@ static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer) if (RB_WARN_ON(cpu_buffer, rb_null_event(event))) return; /* Only count data entries */ - if (event->type != RINGBUF_TYPE_DATA) + if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) continue; cpu_buffer->overrun++; cpu_buffer->entries--; @@ -1133,28 +1137,21 @@ static void rb_update_event(struct ring_buffer_event *event, unsigned type, unsigned length) { - event->type = type; + event->type_len = type; switch (type) { case RINGBUF_TYPE_PADDING: - break; - case RINGBUF_TYPE_TIME_EXTEND: - event->len = DIV_ROUND_UP(RB_LEN_TIME_EXTEND, RB_ALIGNMENT); - break; - case RINGBUF_TYPE_TIME_STAMP: - event->len = DIV_ROUND_UP(RB_LEN_TIME_STAMP, RB_ALIGNMENT); break; - case RINGBUF_TYPE_DATA: + case 0: length -= RB_EVNT_HDR_SIZE; - if (length > RB_MAX_SMALL_DATA) { - event->len = 0; + if (length > RB_MAX_SMALL_DATA) event->array[0] = length; - } else - event->len = DIV_ROUND_UP(length, RB_ALIGNMENT); + else + event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); break; default: BUG(); @@ -1562,7 +1559,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) if (length > BUF_PAGE_SIZE) goto out; - event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length); + event = rb_reserve_next_event(cpu_buffer, 0, length); if (!event) goto out; @@ -1634,7 +1631,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit); static inline void rb_event_discard(struct ring_buffer_event *event) { - event->type = RINGBUF_TYPE_PADDING; + /* array[0] holds the actual length for the discarded event */ + event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE; + event->type_len = RINGBUF_TYPE_PADDING; /* time delta must be non zero */ if (!event->time_delta) event->time_delta = 1; @@ -1786,8 +1785,7 @@ int ring_buffer_write(struct ring_buffer *buffer, goto out; event_length = rb_calculate_event_length(length); - event = rb_reserve_next_event(cpu_buffer, - RINGBUF_TYPE_DATA, event_length); + event = rb_reserve_next_event(cpu_buffer, 0, event_length); if (!event) goto out; @@ -2035,7 +2033,7 @@ rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer, { u64 delta; - switch (event->type) { + switch (event->type_len) { case RINGBUF_TYPE_PADDING: return; @@ -2066,7 +2064,7 @@ rb_update_iter_read_stamp(struct ring_buffer_iter *iter, { u64 delta; - switch (event->type) { + switch (event->type_len) { case RINGBUF_TYPE_PADDING: return; @@ -2181,7 +2179,8 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) event = rb_reader_event(cpu_buffer); - if (event->type == RINGBUF_TYPE_DATA || rb_discarded_event(event)) + if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX + || rb_discarded_event(event)) cpu_buffer->entries--; rb_update_read_stamp(cpu_buffer, event); @@ -2262,7 +2261,7 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) event = rb_reader_event(cpu_buffer); - switch (event->type) { + switch (event->type_len) { case RINGBUF_TYPE_PADDING: if (rb_null_event(event)) RB_WARN_ON(cpu_buffer, 1); @@ -2334,7 +2333,7 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) event = rb_iter_head_event(iter); - switch (event->type) { + switch (event->type_len) { case RINGBUF_TYPE_PADDING: if (rb_null_event(event)) { rb_inc_iter(iter); @@ -2393,7 +2392,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) event = rb_buffer_peek(buffer, cpu, ts); spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); - if (event && event->type == RINGBUF_TYPE_PADDING) { + if (event && event->type_len == RINGBUF_TYPE_PADDING) { cpu_relax(); goto again; } @@ -2421,7 +2420,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) event = rb_iter_peek(iter, ts); spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); - if (event && event->type == RINGBUF_TYPE_PADDING) { + if (event && event->type_len == RINGBUF_TYPE_PADDING) { cpu_relax(); goto again; } @@ -2466,7 +2465,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) out: preempt_enable(); - if (event && event->type == RINGBUF_TYPE_PADDING) { + if (event && event->type_len == RINGBUF_TYPE_PADDING) { cpu_relax(); goto again; } @@ -2559,7 +2558,7 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) out: spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); - if (event && event->type == RINGBUF_TYPE_PADDING) { + if (event && event->type_len == RINGBUF_TYPE_PADDING) { cpu_relax(); goto again; } @@ -2766,7 +2765,7 @@ static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer, if (RB_WARN_ON(cpu_buffer, rb_null_event(event))) return; /* Only count data entries */ - if (event->type != RINGBUF_TYPE_DATA) + if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) continue; cpu_buffer->entries--; } -- cgit v1.2.3-70-g09d2 From 1cb81b143fa8f0e4629f10690862e2e52ca792ff Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 24 Apr 2009 09:51:43 +0200 Subject: x86, bts, mm: clean up buffer allocation The current mm interface is asymetric. One function allocates a locked buffer, another function only refunds the memory. Change this to have two functions for accounting and refunding locked memory, respectively; and do the actual buffer allocation in ptrace. [ Impact: refactor BTS buffer allocation code ] Signed-off-by: Markus Metzger Acked-by: Andrew Morton Cc: Peter Zijlstra LKML-Reference: <20090424095143.A30265@sedona.ch.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/ptrace.c | 39 ++++++++++++++++++++++++++------------- include/linux/mm.h | 6 ++++-- mm/mlock.c | 36 +++++++++++++++++------------------- 3 files changed, 47 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index d5252ae6c52..09ecbde91c1 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -617,17 +617,28 @@ struct bts_context { struct work_struct work; }; -static inline void alloc_bts_buffer(struct bts_context *context, - unsigned int size) +static int alloc_bts_buffer(struct bts_context *context, unsigned int size) { - void *buffer; + void *buffer = NULL; + int err = -ENOMEM; - buffer = alloc_locked_buffer(size); - if (buffer) { - context->buffer = buffer; - context->size = size; - context->mm = get_task_mm(current); - } + err = account_locked_memory(current->mm, current->signal->rlim, size); + if (err < 0) + return err; + + buffer = kzalloc(size, GFP_KERNEL); + if (!buffer) + goto out_refund; + + context->buffer = buffer; + context->size = size; + context->mm = get_task_mm(current); + + return 0; + + out_refund: + refund_locked_memory(current->mm, size); + return err; } static inline void free_bts_buffer(struct bts_context *context) @@ -638,7 +649,7 @@ static inline void free_bts_buffer(struct bts_context *context) kfree(context->buffer); context->buffer = NULL; - refund_locked_buffer_memory(context->mm, context->size); + refund_locked_memory(context->mm, context->size); context->size = 0; mmput(context->mm); @@ -786,13 +797,15 @@ static int ptrace_bts_config(struct task_struct *child, context->tracer = NULL; if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != context->size)) { + int err; + free_bts_buffer(context); if (!cfg.size) return 0; - alloc_bts_buffer(context, cfg.size); - if (!context->buffer) - return -ENOMEM; + err = alloc_bts_buffer(context, cfg.size); + if (err < 0) + return err; } if (cfg.flags & PTRACE_BTS_O_TRACE) diff --git a/include/linux/mm.h b/include/linux/mm.h index a3963ba23a6..009eabd3c21 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -19,6 +19,7 @@ struct anon_vma; struct file_ra_state; struct user_struct; struct writeback_control; +struct rlimit; #ifndef CONFIG_DISCONTIGMEM /* Don't use mapnrs, do it properly */ extern unsigned long max_mapnr; @@ -1319,7 +1320,8 @@ int vmemmap_populate_basepages(struct page *start_page, int vmemmap_populate(struct page *start_page, unsigned long pages, int node); void vmemmap_populate_print_last(void); -extern void *alloc_locked_buffer(size_t size); -extern void refund_locked_buffer_memory(struct mm_struct *mm, size_t size); +extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim, + size_t size); +extern void refund_locked_memory(struct mm_struct *mm, size_t size); #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/mm/mlock.c b/mm/mlock.c index 28be15ead9c..ac130433c7d 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -629,38 +629,36 @@ void user_shm_unlock(size_t size, struct user_struct *user) free_uid(user); } -void *alloc_locked_buffer(size_t size) +int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim, + size_t size) { - unsigned long rlim, vm, pgsz; - void *buffer = NULL; + unsigned long lim, vm, pgsz; + int error = -ENOMEM; pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; - down_write(¤t->mm->mmap_sem); - - rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; - vm = current->mm->total_vm + pgsz; - if (rlim < vm) - goto out; + down_write(&mm->mmap_sem); - rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; - vm = current->mm->locked_vm + pgsz; - if (rlim < vm) + lim = rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; + vm = mm->total_vm + pgsz; + if (lim < vm) goto out; - buffer = kzalloc(size, GFP_KERNEL); - if (!buffer) + lim = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; + vm = mm->locked_vm + pgsz; + if (lim < vm) goto out; - current->mm->total_vm += pgsz; - current->mm->locked_vm += pgsz; + mm->total_vm += pgsz; + mm->locked_vm += pgsz; + error = 0; out: - up_write(¤t->mm->mmap_sem); - return buffer; + up_write(&mm->mmap_sem); + return error; } -void refund_locked_buffer_memory(struct mm_struct *mm, size_t size) +void refund_locked_memory(struct mm_struct *mm, size_t size) { unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; -- cgit v1.2.3-70-g09d2 From 39517091f88fae32b52254b561ced78da1eaf0a7 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 24 Apr 2009 11:05:52 -0400 Subject: tracing/lockdep: convert lockdep to use TRACE_EVENT macro The TRACE_FORMAT will soon be deprecated. This patch converts it to the TRACE_EVENT macro. Note, this change should also speed up the tracing. [ Impact: remove a user of deprecated TRACE_FORMAT ] Cc: Peter Zijlstra Signed-off-by: Steven Rostedt --- include/trace/events/lockdep.h | 56 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 46 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/trace/events/lockdep.h b/include/trace/events/lockdep.h index 3ca315c1429..0e956c9dfd7 100644 --- a/include/trace/events/lockdep.h +++ b/include/trace/events/lockdep.h @@ -9,28 +9,64 @@ #ifdef CONFIG_LOCKDEP -TRACE_FORMAT(lock_acquire, +TRACE_EVENT(lock_acquire, + TP_PROTO(struct lockdep_map *lock, unsigned int subclass, int trylock, int read, int check, struct lockdep_map *next_lock, unsigned long ip), + TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), - TP_FMT("%s%s%s", trylock ? "try " : "", - read ? "read " : "", lock->name) - ); -TRACE_FORMAT(lock_release, + TP_STRUCT__entry( + __field(unsigned int, flags) + __string(name, lock->name) + ), + + TP_fast_assign( + __entry->flags = (trylock ? 1 : 0) | (read ? 2 : 0); + __assign_str(name, lock->name); + ), + + TP_printk("%s%s%s", (__entry->flags & 1) ? "try " : "", + (__entry->flags & 2) ? "read " : "", + __get_str(name)) +); + +TRACE_EVENT(lock_release, + TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), + TP_ARGS(lock, nested, ip), - TP_FMT("%s", lock->name) - ); + + TP_STRUCT__entry( + __string(name, lock->name) + ), + + TP_fast_assign( + __assign_str(name, lock->name); + ), + + TP_printk("%s", __get_str(name)) +); #ifdef CONFIG_LOCK_STAT -TRACE_FORMAT(lock_contended, +TRACE_EVENT(lock_contended, + TP_PROTO(struct lockdep_map *lock, unsigned long ip), + TP_ARGS(lock, ip), - TP_FMT("%s", lock->name) - ); + + TP_STRUCT__entry( + __string(name, lock->name) + ), + + TP_fast_assign( + __assign_str(name, lock->name); + ), + + TP_printk("%s", __get_str(name)) +); TRACE_EVENT(lock_acquired, TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), -- cgit v1.2.3-70-g09d2 From 160031b556e93590fa8635210d73d93c3d3853a9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 24 Apr 2009 11:26:55 -0400 Subject: tracing/irq: convert irq traces to use TRACE_EVENT macro The TRACE_FORMAT will soon be deprecated. This patch converts it to the TRACE_EVENT macro. Note, this change should also speed up the tracing. [ Impact: remove a user of deprecated TRACE_FORMAT ] Cc: Jason Baron Signed-off-by: Steven Rostedt --- include/trace/events/irq.h | 57 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index 75e3468e449..76868646751 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -10,11 +10,24 @@ /* * Tracepoint for entry of interrupt handler: */ -TRACE_FORMAT(irq_handler_entry, +TRACE_EVENT(irq_handler_entry, + TP_PROTO(int irq, struct irqaction *action), + TP_ARGS(irq, action), - TP_FMT("irq=%d handler=%s", irq, action->name) - ); + + TP_STRUCT__entry( + __field( int, irq ) + __string( name, action->name ) + ), + + TP_fast_assign( + __entry->irq = irq; + __assign_str(name, action->name); + ), + + TP_printk("irq=%d handler=%s", __entry->irq, __get_str(name)) +); /* * Tracepoint for return of an interrupt handler: @@ -39,17 +52,43 @@ TRACE_EVENT(irq_handler_exit, __entry->irq, __entry->ret ? "handled" : "unhandled") ); -TRACE_FORMAT(softirq_entry, +TRACE_EVENT(softirq_entry, + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + TP_ARGS(h, vec), - TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) - ); -TRACE_FORMAT(softirq_exit, + TP_STRUCT__entry( + __field( int, vec ) + __string( name, softirq_to_name[h-vec] ) + ), + + TP_fast_assign( + __entry->vec = (int)(h - vec); + __assign_str(name, softirq_to_name[h-vec]); + ), + + TP_printk("softirq=%d action=%s", __entry->vec, __get_str(name)) +); + +TRACE_EVENT(softirq_exit, + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + TP_ARGS(h, vec), - TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) - ); + + TP_STRUCT__entry( + __field( int, vec ) + __string( name, softirq_to_name[h-vec] ) + ), + + TP_fast_assign( + __entry->vec = (int)(h - vec); + __assign_str(name, softirq_to_name[h-vec]); + ), + + TP_printk("softirq=%d action=%s", __entry->vec, __get_str(name)) +); #endif /* _TRACE_IRQ_H */ -- cgit v1.2.3-70-g09d2 From b8e65554d80b4c560d201362d0e8fa02109d89fd Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 24 Apr 2009 11:50:39 -0400 Subject: tracing: remove deprecated TRACE_FORMAT The TRACE_FORMAT macro has been deprecated by the TRACE_EVENT macro. There are no more users. All new users must use the TRACE_EVENT macro. [ Impact: remove old functionality ] Cc: Peter Zijlstra Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 5 ---- include/trace/define_trace.h | 4 --- include/trace/ftrace.h | 66 -------------------------------------------- 3 files changed, 75 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 4353f3f7e62..14df7e635d4 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -158,11 +158,6 @@ static inline void tracepoint_synchronize_unregister(void) #define PARAMS(args...) args -#ifndef TRACE_FORMAT -#define TRACE_FORMAT(name, proto, args, fmt) \ - DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) -#endif - #ifndef TRACE_EVENT /* * For use with the TRACE_EVENT macro: diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index abc611feeb8..f7a7ae1e8f9 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -26,10 +26,6 @@ #define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ DEFINE_TRACE(name) -#undef TRACE_FORMAT -#define TRACE_FORMAT(name, proto, args, print) \ - DEFINE_TRACE(name) - #undef DECLARE_TRACE #define DECLARE_TRACE(name, proto, args) \ DEFINE_TRACE(name) diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index a77f71a46db..1e681142f1d 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -18,9 +18,6 @@ #include -#undef TRACE_FORMAT -#define TRACE_FORMAT(call, proto, args, fmt) - #undef __array #define __array(type, item, len) type item[len]; @@ -62,9 +59,6 @@ * */ -#undef TRACE_FORMAT -#define TRACE_FORMAT(call, proto, args, fmt) - #undef __array #define __array(type, item, len) @@ -298,16 +292,6 @@ ftrace_define_fields_##call(void) \ * unregister_trace_(ftrace_event_); * } * - * For those macros defined with TRACE_FORMAT: - * - * static struct ftrace_event_call __used - * __attribute__((__aligned__(4))) - * __attribute__((section("_ftrace_events"))) event_ = { - * .name = "", - * .regfunc = ftrace_reg_event_, - * .unregfunc = ftrace_unreg_event_, - * } - * * * For those macros defined with TRACE_EVENT: * @@ -417,56 +401,6 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \ #define _TRACE_PROFILE_INIT(call) #endif -#define _TRACE_FORMAT(call, proto, args, fmt) \ -static void ftrace_event_##call(proto) \ -{ \ - event_trace_printk(_RET_IP_, #call ": " fmt); \ -} \ - \ -static int ftrace_reg_event_##call(void) \ -{ \ - int ret; \ - \ - ret = register_trace_##call(ftrace_event_##call); \ - if (ret) \ - pr_info("event trace: Could not activate trace point " \ - "probe to " #call "\n"); \ - return ret; \ -} \ - \ -static void ftrace_unreg_event_##call(void) \ -{ \ - unregister_trace_##call(ftrace_event_##call); \ -} \ - \ -static struct ftrace_event_call event_##call; \ - \ -static int ftrace_init_event_##call(void) \ -{ \ - int id; \ - \ - id = register_ftrace_event(NULL); \ - if (!id) \ - return -ENODEV; \ - event_##call.id = id; \ - return 0; \ -} - -#undef TRACE_FORMAT -#define TRACE_FORMAT(call, proto, args, fmt) \ -_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \ -_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \ -static struct ftrace_event_call __used \ -__attribute__((__aligned__(4))) \ -__attribute__((section("_ftrace_events"))) event_##call = { \ - .name = #call, \ - .system = __stringify(TRACE_SYSTEM), \ - .raw_init = ftrace_init_event_##call, \ - .regfunc = ftrace_reg_event_##call, \ - .unregfunc = ftrace_unreg_event_##call, \ - _TRACE_PROFILE_INIT(call) \ -} - #undef __entry #define __entry entry -- cgit v1.2.3-70-g09d2 From 7629ad24f2b3df95c8b4cd8869e3c04e1df6c442 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Fri, 24 Apr 2009 16:37:44 +0200 Subject: ASoC: add SOC_DOUBLE_EXT macro Add a macro for double controls with special callback functions. Signed-off-by: Daniel Mack Signed-off-by: Mark Brown --- include/sound/soc.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index b1f2f8819fe..6ab80bf7abd 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -118,6 +118,14 @@ .info = snd_soc_info_volsw, \ .get = xhandler_get, .put = xhandler_put, \ .private_value = SOC_SINGLE_VALUE(xreg, xshift, xmax, xinvert) } +#define SOC_DOUBLE_EXT(xname, xreg, shift_left, shift_right, xmax, xinvert,\ + xhandler_get, xhandler_put) \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = (xname),\ + .info = snd_soc_info_volsw, \ + .get = xhandler_get, .put = xhandler_put, \ + .private_value = (unsigned long)&(struct soc_mixer_control) \ + {.reg = xreg, .shift = shift_left, .rshift = shift_right, \ + .max = xmax, .invert = xinvert} } #define SOC_SINGLE_EXT_TLV(xname, xreg, xshift, xmax, xinvert,\ xhandler_get, xhandler_put, tlv_array) \ { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, \ -- cgit v1.2.3-70-g09d2 From a9e61e25f9d2e7e43bf17625f5cb56c9e0a89b17 Mon Sep 17 00:00:00 2001 From: Felix Blyakher Date: Tue, 31 Mar 2009 15:12:56 -0500 Subject: lockd: call locks_release_private to cleanup per-filesystem state For every lock request lockd creates a new file_lock object in nlmsvc_setgrantargs() by copying the passed in file_lock with locks_copy_lock(). A filesystem can attach it's own lock_operations vector to the file_lock. It has to be cleaned up at the end of the file_lock's life. However, lockd doesn't do it today, yet it asserts in nlmclnt_release_lockargs() that the per-filesystem state is clean. This patch fixes it by exporting locks_release_private() and adding it to nlmsvc_freegrantargs(), to be symmetrical to creating a file_lock in nlmsvc_setgrantargs(). Signed-off-by: Felix Blyakher Signed-off-by: J. Bruce Fields --- fs/lockd/svclock.c | 2 ++ fs/locks.c | 3 ++- include/linux/fs.h | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 83ee34203bd..e577a78d7ba 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -326,6 +326,8 @@ static void nlmsvc_freegrantargs(struct nlm_rqst *call) { if (call->a_args.lock.oh.data != call->a_owner) kfree(call->a_args.lock.oh.data); + + locks_release_private(&call->a_args.lock.fl); } /* diff --git a/fs/locks.c b/fs/locks.c index ec3deea29e3..b6440f52178 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -151,7 +151,7 @@ static struct file_lock *locks_alloc_lock(void) return kmem_cache_alloc(filelock_cache, GFP_KERNEL); } -static void locks_release_private(struct file_lock *fl) +void locks_release_private(struct file_lock *fl) { if (fl->fl_ops) { if (fl->fl_ops->fl_release_private) @@ -165,6 +165,7 @@ static void locks_release_private(struct file_lock *fl) } } +EXPORT_SYMBOL_GPL(locks_release_private); /* Free a lock which is not in use. */ static void locks_free_lock(struct file_lock *fl) diff --git a/include/linux/fs.h b/include/linux/fs.h index 5bed436f435..5ba615e8f53 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1108,6 +1108,7 @@ extern void locks_copy_lock(struct file_lock *, struct file_lock *); extern void __locks_copy_lock(struct file_lock *, const struct file_lock *); extern void locks_remove_posix(struct file *, fl_owner_t); extern void locks_remove_flock(struct file *); +extern void locks_release_private(struct file_lock *); extern void posix_test_lock(struct file *, struct file_lock *); extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); extern int posix_lock_file_wait(struct file *, struct file_lock *); -- cgit v1.2.3-70-g09d2 From 060fa5c83e67901ba47ab484cfcdb32737d630ba Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 24 Apr 2009 12:20:52 -0400 Subject: tracing/events: reuse trace event ids after overflow With modules being able to add trace events, and the max trace event counter is 16 bits (65536) we can overflow the counter easily with a simple while loop adding and removing modules that contain trace events. This patch links together the registered trace events and on overflow searches for available trace event ids. It will still fail if over 65536 events are registered, but considering that a typical kernel only has 22000 functions, 65000 events should be sufficient. Reported-by: Li Zefan Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 1 + kernel/trace/trace_output.c | 71 +++++++++++++++++++++++++++++++++++++------- 2 files changed, 61 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 07e0a6d64a2..78a9ba24cbf 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -56,6 +56,7 @@ typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter, int flags); struct trace_event { struct hlist_node node; + struct list_head list; int type; trace_print_func trace; trace_print_func raw; diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 06997e75114..5fc51f0f75f 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -483,6 +483,36 @@ struct trace_event *ftrace_find_event(int type) return NULL; } +static LIST_HEAD(ftrace_event_list); + +static int trace_search_list(struct list_head **list) +{ + struct trace_event *e; + int last = __TRACE_LAST_TYPE; + + if (list_empty(&ftrace_event_list)) { + *list = &ftrace_event_list; + return last + 1; + } + + /* + * We used up all possible max events, + * lets see if somebody freed one. + */ + list_for_each_entry(e, &ftrace_event_list, list) { + if (e->type != last + 1) + break; + last++; + } + + /* Did we used up all 65 thousand events??? */ + if ((last + 1) > FTRACE_MAX_EVENT) + return 0; + + *list = &e->list; + return last + 1; +} + /** * register_ftrace_event - register output for an event type * @event: the event type to register @@ -505,20 +535,40 @@ int register_ftrace_event(struct trace_event *event) mutex_lock(&trace_event_mutex); - if (!event) { - ret = next_event_type++; + if (WARN_ON(!event)) goto out; - } - if (!event->type) - event->type = next_event_type++; - else if (event->type > __TRACE_LAST_TYPE) { + INIT_LIST_HEAD(&event->list); + + if (!event->type) { + struct list_head *list; + + if (next_event_type > FTRACE_MAX_EVENT) { + + event->type = trace_search_list(&list); + if (!event->type) + goto out; + + } else { + + event->type = next_event_type++; + list = &ftrace_event_list; + } + + if (WARN_ON(ftrace_find_event(event->type))) + goto out; + + list_add_tail(&event->list, list); + + } else if (event->type > __TRACE_LAST_TYPE) { printk(KERN_WARNING "Need to add type to trace.h\n"); WARN_ON(1); - } - - if (ftrace_find_event(event->type)) goto out; + } else { + /* Is this event already used */ + if (ftrace_find_event(event->type)) + goto out; + } if (event->trace == NULL) event->trace = trace_nop_print; @@ -537,8 +587,6 @@ int register_ftrace_event(struct trace_event *event) out: mutex_unlock(&trace_event_mutex); - WARN_ON_ONCE(next_event_type > FTRACE_MAX_EVENT); - return ret; } EXPORT_SYMBOL_GPL(register_ftrace_event); @@ -551,6 +599,7 @@ int unregister_ftrace_event(struct trace_event *event) { mutex_lock(&trace_event_mutex); hlist_del(&event->node); + list_del(&event->list); mutex_unlock(&trace_event_mutex); return 0; -- cgit v1.2.3-70-g09d2 From 92ae3efa53b481ad669d8b85c6cfa37d774bb21e Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Mon, 27 Apr 2009 02:35:32 -0700 Subject: ipv4: remove unused member in fib_table. This patch removes an unused parameter (tb_stamp) from fib_table structure in include/net/ip_fib.h. Signed-off-by: Rami Rosen Signed-off-by: David S. Miller --- include/net/ip_fib.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 8b12667f7a2..34855eede5f 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -145,7 +145,6 @@ struct fib_result_nl { struct fib_table { struct hlist_node tb_hlist; u32 tb_id; - unsigned tb_stamp; int tb_default; int (*tb_lookup)(struct fib_table *tb, const struct flowi *flp, struct fib_result *res); int (*tb_insert)(struct fib_table *, struct fib_config *); -- cgit v1.2.3-70-g09d2 From edf391ff17232f097d72441c9ad467bcb3b5db18 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 27 Apr 2009 02:45:02 -0700 Subject: snmp: add missing counters for RFC 4293 The IP MIB (RFC 4293) defines stats for InOctets, OutOctets, InMcastOctets and OutMcastOctets: http://tools.ietf.org/html/rfc4293 But it seems we don't track those in any way that easy to separate from other protocols. This patch adds those missing counters to the stats file. Tested successfully by me With help from Eric Dumazet. Signed-off-by: Neil Horman Signed-off-by: David S. Miller --- include/linux/snmp.h | 10 ++++++++-- include/net/ip.h | 3 +++ include/net/ipv6.h | 15 ++++++++++++++- include/net/snmp.h | 19 ++++++++++++++++++- net/ipv4/ip_input.c | 13 ++++++++----- net/ipv4/ip_output.c | 12 ++++++------ net/ipv4/proc.c | 10 ++++++++-- net/ipv6/ip6_input.c | 7 ++++--- net/ipv6/ip6_output.c | 9 +++++---- net/ipv6/mcast.c | 19 ++++++++++++------- net/ipv6/ndisc.c | 4 ++-- net/ipv6/proc.c | 10 ++++++++-- net/ipv6/raw.c | 2 +- 13 files changed, 97 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/include/linux/snmp.h b/include/linux/snmp.h index aee3f1e1d1c..0f953fe4041 100644 --- a/include/linux/snmp.h +++ b/include/linux/snmp.h @@ -18,7 +18,7 @@ enum { IPSTATS_MIB_NUM = 0, - IPSTATS_MIB_INRECEIVES, /* InReceives */ + IPSTATS_MIB_INPKTS, /* InReceives */ IPSTATS_MIB_INHDRERRORS, /* InHdrErrors */ IPSTATS_MIB_INTOOBIGERRORS, /* InTooBigErrors */ IPSTATS_MIB_INNOROUTES, /* InNoRoutes */ @@ -28,7 +28,7 @@ enum IPSTATS_MIB_INDISCARDS, /* InDiscards */ IPSTATS_MIB_INDELIVERS, /* InDelivers */ IPSTATS_MIB_OUTFORWDATAGRAMS, /* OutForwDatagrams */ - IPSTATS_MIB_OUTREQUESTS, /* OutRequests */ + IPSTATS_MIB_OUTPKTS, /* OutRequests */ IPSTATS_MIB_OUTDISCARDS, /* OutDiscards */ IPSTATS_MIB_OUTNOROUTES, /* OutNoRoutes */ IPSTATS_MIB_REASMTIMEOUT, /* ReasmTimeout */ @@ -42,6 +42,12 @@ enum IPSTATS_MIB_OUTMCASTPKTS, /* OutMcastPkts */ IPSTATS_MIB_INBCASTPKTS, /* InBcastPkts */ IPSTATS_MIB_OUTBCASTPKTS, /* OutBcastPkts */ + IPSTATS_MIB_INOCTETS, /* InOctets */ + IPSTATS_MIB_OUTOCTETS, /* OutOctets */ + IPSTATS_MIB_INMCASTOCTETS, /* InMcastOctets */ + IPSTATS_MIB_OUTMCASTOCTETS, /* OutMcastOctets */ + IPSTATS_MIB_INBCASTOCTETS, /* InBcastOctets */ + IPSTATS_MIB_OUTBCASTOCTETS, /* OutBcastOctets */ __IPSTATS_MIB_MAX }; diff --git a/include/net/ip.h b/include/net/ip.h index 4ac7577f98d..72c36926c26 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -168,7 +168,10 @@ struct ipv4_config extern struct ipv4_config ipv4_config; #define IP_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.ip_statistics, field) #define IP_INC_STATS_BH(net, field) SNMP_INC_STATS_BH((net)->mib.ip_statistics, field) +#define IP_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.ip_statistics, field, val) #define IP_ADD_STATS_BH(net, field, val) SNMP_ADD_STATS_BH((net)->mib.ip_statistics, field, val) +#define IP_UPD_PO_STATS(net, field, val) SNMP_UPD_PO_STATS((net)->mib.ip_statistics, field, val) +#define IP_UPD_PO_STATS_BH(net, field, val) SNMP_UPD_PO_STATS_BH((net)->mib.ip_statistics, field, val) #define NET_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.net_statistics, field) #define NET_INC_STATS_BH(net, field) SNMP_INC_STATS_BH((net)->mib.net_statistics, field) #define NET_INC_STATS_USER(net, field) SNMP_INC_STATS_USER((net)->mib.net_statistics, field) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index c1f16fc49ad..f27fd83d67d 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -126,15 +126,28 @@ extern struct ctl_path net_ipv6_ctl_path[]; SNMP_ADD_STATS##modifier((net)->mib.statname##_statistics, (field), (val));\ }) +#define _DEVUPD(net, statname, modifier, idev, field, val) \ +({ \ + struct inet6_dev *_idev = (idev); \ + if (likely(_idev != NULL)) \ + SNMP_UPD_PO_STATS##modifier((_idev)->stats.statname, field, (val)); \ + SNMP_UPD_PO_STATS##modifier((net)->mib.statname##_statistics, field, (val));\ +}) + /* MIBs */ #define IP6_INC_STATS(net, idev,field) \ _DEVINC(net, ipv6, , idev, field) #define IP6_INC_STATS_BH(net, idev,field) \ _DEVINC(net, ipv6, _BH, idev, field) +#define IP6_ADD_STATS(net, idev,field,val) \ + _DEVADD(net, ipv6, , idev, field, val) #define IP6_ADD_STATS_BH(net, idev,field,val) \ _DEVADD(net, ipv6, _BH, idev, field, val) - +#define IP6_UPD_PO_STATS(net, idev,field,val) \ + _DEVUPD(net, ipv6, , idev, field, val) +#define IP6_UPD_PO_STATS_BH(net, idev,field,val) \ + _DEVUPD(net, ipv6, _BH, idev, field, val) #define ICMP6_INC_STATS(net, idev, field) \ _DEVINC(net, icmpv6, , idev, field) #define ICMP6_INC_STATS_BH(net, idev, field) \ diff --git a/include/net/snmp.h b/include/net/snmp.h index 57c93628695..8c842e06bec 100644 --- a/include/net/snmp.h +++ b/include/net/snmp.h @@ -153,6 +153,11 @@ struct linux_xfrm_mib { per_cpu_ptr(mib[!in_softirq()], get_cpu())->mibs[field]--; \ put_cpu(); \ } while (0) +#define SNMP_ADD_STATS(mib, field, addend) \ + do { \ + per_cpu_ptr(mib[!in_softirq()], get_cpu())->mibs[field] += addend; \ + put_cpu(); \ + } while (0) #define SNMP_ADD_STATS_BH(mib, field, addend) \ (per_cpu_ptr(mib[0], raw_smp_processor_id())->mibs[field] += addend) #define SNMP_ADD_STATS_USER(mib, field, addend) \ @@ -160,5 +165,17 @@ struct linux_xfrm_mib { per_cpu_ptr(mib[1], get_cpu())->mibs[field] += addend; \ put_cpu(); \ } while (0) - +#define SNMP_UPD_PO_STATS(mib, basefield, addend) \ + do { \ + __typeof__(mib[0]) ptr = per_cpu_ptr(mib[!in_softirq()], get_cpu());\ + ptr->mibs[basefield##PKTS]++; \ + ptr->mibs[basefield##OCTETS] += addend;\ + put_cpu(); \ + } while (0) +#define SNMP_UPD_PO_STATS_BH(mib, basefield, addend) \ + do { \ + __typeof__(mib[0]) ptr = per_cpu_ptr(mib[!in_softirq()], raw_smp_processor_id());\ + ptr->mibs[basefield##PKTS]++; \ + ptr->mibs[basefield##OCTETS] += addend;\ + } while (0) #endif diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 1a58a6fa1dc..40f6206b2aa 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -358,10 +358,12 @@ static int ip_rcv_finish(struct sk_buff *skb) goto drop; rt = skb->rtable; - if (rt->rt_type == RTN_MULTICAST) - IP_INC_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INMCASTPKTS); - else if (rt->rt_type == RTN_BROADCAST) - IP_INC_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INBCASTPKTS); + if (rt->rt_type == RTN_MULTICAST) { + IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INMCAST, + skb->len); + } else if (rt->rt_type == RTN_BROADCAST) + IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INBCAST, + skb->len); return dst_input(skb); @@ -384,7 +386,8 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, if (skb->pkt_type == PACKET_OTHERHOST) goto drop; - IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INRECEIVES); + + IP_UPD_PO_STATS_BH(dev_net(dev), IPSTATS_MIB_IN, skb->len); if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) { IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 3e7e910c7c0..ea19c37ccc0 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -181,10 +181,10 @@ static inline int ip_finish_output2(struct sk_buff *skb) struct net_device *dev = dst->dev; unsigned int hh_len = LL_RESERVED_SPACE(dev); - if (rt->rt_type == RTN_MULTICAST) - IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTMCASTPKTS); - else if (rt->rt_type == RTN_BROADCAST) - IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTBCASTPKTS); + if (rt->rt_type == RTN_MULTICAST) { + IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len); + } else if (rt->rt_type == RTN_BROADCAST) + IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTBCAST, skb->len); /* Be paranoid, rather than too clever. */ if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { @@ -244,7 +244,7 @@ int ip_mc_output(struct sk_buff *skb) /* * If the indicated interface is up and running, send the packet. */ - IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTREQUESTS); + IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len); skb->dev = dev; skb->protocol = htons(ETH_P_IP); @@ -298,7 +298,7 @@ int ip_output(struct sk_buff *skb) { struct net_device *dev = skb->dst->dev; - IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTREQUESTS); + IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len); skb->dev = dev; skb->protocol = htons(ETH_P_IP); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index cf0cdeeb1db..f25542c48b7 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -90,14 +90,14 @@ static const struct file_operations sockstat_seq_fops = { /* snmp items */ static const struct snmp_mib snmp4_ipstats_list[] = { - SNMP_MIB_ITEM("InReceives", IPSTATS_MIB_INRECEIVES), + SNMP_MIB_ITEM("InReceives", IPSTATS_MIB_INPKTS), SNMP_MIB_ITEM("InHdrErrors", IPSTATS_MIB_INHDRERRORS), SNMP_MIB_ITEM("InAddrErrors", IPSTATS_MIB_INADDRERRORS), SNMP_MIB_ITEM("ForwDatagrams", IPSTATS_MIB_OUTFORWDATAGRAMS), SNMP_MIB_ITEM("InUnknownProtos", IPSTATS_MIB_INUNKNOWNPROTOS), SNMP_MIB_ITEM("InDiscards", IPSTATS_MIB_INDISCARDS), SNMP_MIB_ITEM("InDelivers", IPSTATS_MIB_INDELIVERS), - SNMP_MIB_ITEM("OutRequests", IPSTATS_MIB_OUTREQUESTS), + SNMP_MIB_ITEM("OutRequests", IPSTATS_MIB_OUTPKTS), SNMP_MIB_ITEM("OutDiscards", IPSTATS_MIB_OUTDISCARDS), SNMP_MIB_ITEM("OutNoRoutes", IPSTATS_MIB_OUTNOROUTES), SNMP_MIB_ITEM("ReasmTimeout", IPSTATS_MIB_REASMTIMEOUT), @@ -118,6 +118,12 @@ static const struct snmp_mib snmp4_ipextstats_list[] = { SNMP_MIB_ITEM("OutMcastPkts", IPSTATS_MIB_OUTMCASTPKTS), SNMP_MIB_ITEM("InBcastPkts", IPSTATS_MIB_INBCASTPKTS), SNMP_MIB_ITEM("OutBcastPkts", IPSTATS_MIB_OUTBCASTPKTS), + SNMP_MIB_ITEM("InOctets", IPSTATS_MIB_INOCTETS), + SNMP_MIB_ITEM("OutOctets", IPSTATS_MIB_OUTOCTETS), + SNMP_MIB_ITEM("InMcastOctets", IPSTATS_MIB_INMCASTOCTETS), + SNMP_MIB_ITEM("OutMcastOctets", IPSTATS_MIB_OUTMCASTOCTETS), + SNMP_MIB_ITEM("InBcastOctets", IPSTATS_MIB_INBCASTOCTETS), + SNMP_MIB_ITEM("OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS), SNMP_MIB_SENTINEL }; diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 8f04bd9da27..bc1a920c34a 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -70,7 +70,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt idev = __in6_dev_get(skb->dev); - IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INRECEIVES); + IP6_UPD_PO_STATS_BH(net, idev, IPSTATS_MIB_IN, skb->len); if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL || !idev || unlikely(idev->cnf.disable_ipv6)) { @@ -242,8 +242,9 @@ int ip6_mc_input(struct sk_buff *skb) struct ipv6hdr *hdr; int deliver; - IP6_INC_STATS_BH(dev_net(skb->dst->dev), - ip6_dst_idev(skb->dst), IPSTATS_MIB_INMCASTPKTS); + IP6_UPD_PO_STATS_BH(dev_net(skb->dst->dev), + ip6_dst_idev(skb->dst), IPSTATS_MIB_INMCAST, + skb->len); hdr = ipv6_hdr(skb); deliver = ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, NULL); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 9fb49c3b518..735a2bf4b5f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -159,7 +159,8 @@ static int ip6_output2(struct sk_buff *skb) } } - IP6_INC_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCASTPKTS); + IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST, + skb->len); } return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev, @@ -275,8 +276,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, mtu = dst_mtu(dst); if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { - IP6_INC_STATS(net, ip6_dst_idev(skb->dst), - IPSTATS_MIB_OUTREQUESTS); + IP6_UPD_PO_STATS(net, ip6_dst_idev(skb->dst), + IPSTATS_MIB_OUT, skb->len); return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev, dst_output); } @@ -1516,7 +1517,7 @@ int ip6_push_pending_frames(struct sock *sk) skb->mark = sk->sk_mark; skb->dst = dst_clone(&rt->u.dst); - IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); + IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); if (proto == IPPROTO_ICMPV6) { struct inet6_dev *idev = ip6_dst_idev(skb->dst); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index a51fb33e686..4b48819a5b8 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1449,7 +1449,8 @@ static void mld_sendpack(struct sk_buff *skb) int err; struct flowi fl; - IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); + IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); + payload_len = (skb->tail - skb->network_header) - sizeof(*pip6); mldlen = skb->tail - skb->transport_header; pip6->payload_len = htons(payload_len); @@ -1473,13 +1474,15 @@ static void mld_sendpack(struct sk_buff *skb) if (err) goto err_out; + payload_len = skb->len; + err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev, dst_output); out: if (!err) { ICMP6MSGOUT_INC_STATS_BH(net, idev, ICMPV6_MLD2_REPORT); ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); - IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTMCASTPKTS); + IP6_UPD_PO_STATS_BH(net, idev, IPSTATS_MIB_OUTMCAST, payload_len); } else IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTDISCARDS); @@ -1773,10 +1776,6 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) IPV6_TLV_PADN, 0 }; struct flowi fl; - rcu_read_lock(); - IP6_INC_STATS(net, __in6_dev_get(dev), - IPSTATS_MIB_OUTREQUESTS); - rcu_read_unlock(); if (type == ICMPV6_MGM_REDUCTION) snd_addr = &in6addr_linklocal_allrouters; else @@ -1786,6 +1785,11 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) payload_len = len + sizeof(ra); full_len = sizeof(struct ipv6hdr) + payload_len; + rcu_read_lock(); + IP6_UPD_PO_STATS(net, __in6_dev_get(dev), + IPSTATS_MIB_OUT, full_len); + rcu_read_unlock(); + skb = sock_alloc_send_skb(sk, LL_ALLOCATED_SPACE(dev) + full_len, 1, &err); if (skb == NULL) { @@ -1838,13 +1842,14 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) if (err) goto err_out; + err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev, dst_output); out: if (!err) { ICMP6MSGOUT_INC_STATS(net, idev, type); ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); - IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTMCASTPKTS); + IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, full_len); } else IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 9f061d1adbc..ab65cc51b00 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -533,7 +533,7 @@ void ndisc_send_skb(struct sk_buff *skb, skb->dst = dst; idev = in6_dev_get(dst->dev); - IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); + IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev, dst_output); @@ -1613,7 +1613,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, buff->dst = dst; idev = in6_dev_get(dst->dev); - IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); + IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev, dst_output); if (!err) { diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 97c17fdd6f7..590ddefb7ff 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -61,7 +61,7 @@ static const struct file_operations sockstat6_seq_fops = { static struct snmp_mib snmp6_ipstats_list[] = { /* ipv6 mib according to RFC 2465 */ - SNMP_MIB_ITEM("Ip6InReceives", IPSTATS_MIB_INRECEIVES), + SNMP_MIB_ITEM("Ip6InReceives", IPSTATS_MIB_INPKTS), SNMP_MIB_ITEM("Ip6InHdrErrors", IPSTATS_MIB_INHDRERRORS), SNMP_MIB_ITEM("Ip6InTooBigErrors", IPSTATS_MIB_INTOOBIGERRORS), SNMP_MIB_ITEM("Ip6InNoRoutes", IPSTATS_MIB_INNOROUTES), @@ -71,7 +71,7 @@ static struct snmp_mib snmp6_ipstats_list[] = { SNMP_MIB_ITEM("Ip6InDiscards", IPSTATS_MIB_INDISCARDS), SNMP_MIB_ITEM("Ip6InDelivers", IPSTATS_MIB_INDELIVERS), SNMP_MIB_ITEM("Ip6OutForwDatagrams", IPSTATS_MIB_OUTFORWDATAGRAMS), - SNMP_MIB_ITEM("Ip6OutRequests", IPSTATS_MIB_OUTREQUESTS), + SNMP_MIB_ITEM("Ip6OutRequests", IPSTATS_MIB_OUTPKTS), SNMP_MIB_ITEM("Ip6OutDiscards", IPSTATS_MIB_OUTDISCARDS), SNMP_MIB_ITEM("Ip6OutNoRoutes", IPSTATS_MIB_OUTNOROUTES), SNMP_MIB_ITEM("Ip6ReasmTimeout", IPSTATS_MIB_REASMTIMEOUT), @@ -83,6 +83,12 @@ static struct snmp_mib snmp6_ipstats_list[] = { SNMP_MIB_ITEM("Ip6FragCreates", IPSTATS_MIB_FRAGCREATES), SNMP_MIB_ITEM("Ip6InMcastPkts", IPSTATS_MIB_INMCASTPKTS), SNMP_MIB_ITEM("Ip6OutMcastPkts", IPSTATS_MIB_OUTMCASTPKTS), + SNMP_MIB_ITEM("Ip6InOctets", IPSTATS_MIB_INOCTETS), + SNMP_MIB_ITEM("Ip6OutOctets", IPSTATS_MIB_OUTOCTETS), + SNMP_MIB_ITEM("Ip6InMcastOctets", IPSTATS_MIB_INMCASTOCTETS), + SNMP_MIB_ITEM("Ip6OutMcastOctets", IPSTATS_MIB_OUTMCASTOCTETS), + SNMP_MIB_ITEM("Ip6InBcastOctets", IPSTATS_MIB_INBCASTOCTETS), + SNMP_MIB_ITEM("Ip6OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS), SNMP_MIB_SENTINEL }; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 61f6827e590..e99307fba0b 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -638,7 +638,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, if (err) goto error_fault; - IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); + IP6_UPD_PO_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev, dst_output); if (err > 0) -- cgit v1.2.3-70-g09d2 From 739649c53d7f78f5bf41bdfd1a858ee90c7a687a Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Sat, 25 Apr 2009 12:52:40 +0000 Subject: of: add of_parse_phandle() helper for parsing phandle properties of_parse_phandle() is a helper function to read and parse a phandle property and return a pointer to the resulting device_node. Signed-off-by: Grant Likely Acked-by: Andy Fleming Signed-off-by: David S. Miller --- drivers/of/base.c | 24 ++++++++++++++++++++++++ include/linux/of.h | 3 +++ 2 files changed, 27 insertions(+) (limited to 'include') diff --git a/drivers/of/base.c b/drivers/of/base.c index 41c5dfd8535..ddf224d456b 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -494,6 +494,30 @@ int of_modalias_node(struct device_node *node, char *modalias, int len) } EXPORT_SYMBOL_GPL(of_modalias_node); +/** + * of_parse_phandle - Resolve a phandle property to a device_node pointer + * @np: Pointer to device node holding phandle property + * @phandle_name: Name of property holding a phandle value + * @index: For properties holding a table of phandles, this is the index into + * the table + * + * Returns the device_node pointer with refcount incremented. Use + * of_node_put() on it when done. + */ +struct device_node * +of_parse_phandle(struct device_node *np, const char *phandle_name, int index) +{ + const phandle *phandle; + int size; + + phandle = of_get_property(np, phandle_name, &size); + if ((!phandle) || (size < sizeof(*phandle) * (index + 1))) + return NULL; + + return of_find_node_by_phandle(phandle[index]); +} +EXPORT_SYMBOL(of_parse_phandle); + /** * of_parse_phandles_with_args - Find a node pointed by phandle in a list * @np: pointer to a device tree node containing a list diff --git a/include/linux/of.h b/include/linux/of.h index 6a7efa242f5..7be2d1043c1 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -77,6 +77,9 @@ extern int of_n_size_cells(struct device_node *np); extern const struct of_device_id *of_match_node( const struct of_device_id *matches, const struct device_node *node); extern int of_modalias_node(struct device_node *node, char *modalias, int len); +extern struct device_node *of_parse_phandle(struct device_node *np, + const char *phandle_name, + int index); extern int of_parse_phandles_with_args(struct device_node *np, const char *list_name, const char *cells_name, int index, struct device_node **out_node, const void **out_args); -- cgit v1.2.3-70-g09d2 From 4dea547fef1ba23f9d23f5e7f5218187a7dcf1b3 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Sat, 25 Apr 2009 12:52:46 +0000 Subject: phylib: rework to prepare for OF registration of PHYs This patch makes changes in preparation for supporting open firmware device tree descriptions of MDIO busses. Changes include: - Cleanup handling of phy_map[] entries; they are already NULLed when registering and so don't need to be re-cleared, and it is good practice to clear them out when unregistering. - Split phy_device registration out into a new function so that the OF helpers can do two stage registration (separate allocation and registration steps). Signed-off-by: Grant Likely Acked-by: Andy Fleming Signed-off-by: David S. Miller --- drivers/net/phy/mdio_bus.c | 29 +++------------------------- drivers/net/phy/phy_device.c | 45 ++++++++++++++++++++++++++++++++++++++++---- include/linux/phy.h | 1 + 3 files changed, 45 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index b754020cbe7..bd4e8d72dc0 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -113,7 +113,6 @@ int mdiobus_register(struct mii_bus *bus) bus->reset(bus); for (i = 0; i < PHY_MAX_ADDR; i++) { - bus->phy_map[i] = NULL; if ((bus->phy_mask & (1 << i)) == 0) { struct phy_device *phydev; @@ -150,6 +149,7 @@ void mdiobus_unregister(struct mii_bus *bus) for (i = 0; i < PHY_MAX_ADDR; i++) { if (bus->phy_map[i]) device_unregister(&bus->phy_map[i]->dev); + bus->phy_map[i] = NULL; } } EXPORT_SYMBOL(mdiobus_unregister); @@ -188,35 +188,12 @@ struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr) if (IS_ERR(phydev) || phydev == NULL) return phydev; - /* There's a PHY at this address - * We need to set: - * 1) IRQ - * 2) bus_id - * 3) parent - * 4) bus - * 5) mii_bus - * And, we need to register it */ - - phydev->irq = bus->irq != NULL ? bus->irq[addr] : PHY_POLL; - - phydev->dev.parent = bus->parent; - phydev->dev.bus = &mdio_bus_type; - dev_set_name(&phydev->dev, PHY_ID_FMT, bus->id, addr); - - phydev->bus = bus; - - /* Run all of the fixups for this PHY */ - phy_scan_fixups(phydev); - - err = device_register(&phydev->dev); + err = phy_device_register(phydev); if (err) { - printk(KERN_ERR "phy %d failed to register\n", addr); phy_device_free(phydev); - phydev = NULL; + return NULL; } - bus->phy_map[addr] = phydev; - return phydev; } EXPORT_SYMBOL(mdiobus_scan); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 0a06e4fd37d..9352ca8fa2c 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -39,20 +39,21 @@ MODULE_DESCRIPTION("PHY library"); MODULE_AUTHOR("Andy Fleming"); MODULE_LICENSE("GPL"); -static struct phy_driver genphy_driver; -extern int mdio_bus_init(void); -extern void mdio_bus_exit(void); - void phy_device_free(struct phy_device *phydev) { kfree(phydev); } +EXPORT_SYMBOL(phy_device_free); static void phy_device_release(struct device *dev) { phy_device_free(to_phy_device(dev)); } +static struct phy_driver genphy_driver; +extern int mdio_bus_init(void); +extern void mdio_bus_exit(void); + static LIST_HEAD(phy_fixup_list); static DEFINE_MUTEX(phy_fixup_lock); @@ -166,6 +167,10 @@ struct phy_device* phy_device_create(struct mii_bus *bus, int addr, int phy_id) dev->addr = addr; dev->phy_id = phy_id; dev->bus = bus; + dev->dev.parent = bus->parent; + dev->dev.bus = &mdio_bus_type; + dev->irq = bus->irq != NULL ? bus->irq[addr] : PHY_POLL; + dev_set_name(&dev->dev, PHY_ID_FMT, bus->id, addr); dev->state = PHY_DOWN; @@ -235,6 +240,38 @@ struct phy_device * get_phy_device(struct mii_bus *bus, int addr) return dev; } +EXPORT_SYMBOL(get_phy_device); + +/** + * phy_device_register - Register the phy device on the MDIO bus + * @phy_device: phy_device structure to be added to the MDIO bus + */ +int phy_device_register(struct phy_device *phydev) +{ + int err; + + /* Don't register a phy if one is already registered at this + * address */ + if (phydev->bus->phy_map[phydev->addr]) + return -EINVAL; + phydev->bus->phy_map[phydev->addr] = phydev; + + /* Run all of the fixups for this PHY */ + phy_scan_fixups(phydev); + + err = device_register(&phydev->dev); + if (err) { + pr_err("phy %d failed to register\n", phydev->addr); + goto out; + } + + return 0; + + out: + phydev->bus->phy_map[phydev->addr] = NULL; + return err; +} +EXPORT_SYMBOL(phy_device_register); /** * phy_prepare_link - prepares the PHY layer to monitor link status diff --git a/include/linux/phy.h b/include/linux/phy.h index 97e40cb6b58..bf0b5f112dc 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -444,6 +444,7 @@ static inline int phy_write(struct phy_device *phydev, u16 regnum, u16 val) int get_phy_id(struct mii_bus *bus, int addr, u32 *phy_id); struct phy_device* get_phy_device(struct mii_bus *bus, int addr); +int phy_device_register(struct phy_device *phy); int phy_clear_interrupt(struct phy_device *phydev); int phy_config_interrupt(struct phy_device *phydev, u32 interrupts); struct phy_device * phy_attach(struct net_device *dev, -- cgit v1.2.3-70-g09d2 From fa94f6d93c5382810ff41f010f12ca8698fc775e Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Sat, 25 Apr 2009 12:52:51 +0000 Subject: phylib: add *_direct() variants of phy_connect and phy_attach functions Add phy_connect_direct() and phy_attach_direct() functions so that drivers can use a pointer to the phy_device instead of trying to determine the phy's bus_id string. This patch is useful for OF device tree descriptions of phy devices where the driver doesn't need or know what the bus_id value in order to get a phy_device pointer. Signed-off-by: Grant Likely Acked-by: Andy Fleming Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 118 +++++++++++++++++++++++++++++++------------ include/linux/phy.h | 5 ++ 2 files changed, 90 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 9352ca8fa2c..a2ece89622d 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -291,6 +291,33 @@ void phy_prepare_link(struct phy_device *phydev, phydev->adjust_link = handler; } +/** + * phy_connect_direct - connect an ethernet device to a specific phy_device + * @dev: the network device to connect + * @phydev: the pointer to the phy device + * @handler: callback function for state change notifications + * @flags: PHY device's dev_flags + * @interface: PHY device's interface + */ +int phy_connect_direct(struct net_device *dev, struct phy_device *phydev, + void (*handler)(struct net_device *), u32 flags, + phy_interface_t interface) +{ + int rc; + + rc = phy_attach_direct(dev, phydev, flags, interface); + if (rc) + return rc; + + phy_prepare_link(phydev, handler); + phy_start_machine(phydev, NULL); + if (phydev->irq > 0) + phy_start_interrupts(phydev); + + return 0; +} +EXPORT_SYMBOL(phy_connect_direct); + /** * phy_connect - connect an ethernet device to a PHY device * @dev: the network device to connect @@ -312,18 +339,21 @@ struct phy_device * phy_connect(struct net_device *dev, const char *bus_id, phy_interface_t interface) { struct phy_device *phydev; + struct device *d; + int rc; - phydev = phy_attach(dev, bus_id, flags, interface); - - if (IS_ERR(phydev)) - return phydev; - - phy_prepare_link(phydev, handler); - - phy_start_machine(phydev, NULL); + /* Search the list of PHY devices on the mdio bus for the + * PHY with the requested name */ + d = bus_find_device_by_name(&mdio_bus_type, NULL, bus_id); + if (!d) { + pr_err("PHY %s not found\n", bus_id); + return ERR_PTR(-ENODEV); + } + phydev = to_phy_device(d); - if (phydev->irq > 0) - phy_start_interrupts(phydev); + rc = phy_connect_direct(dev, phydev, handler, flags, interface); + if (rc) + return ERR_PTR(rc); return phydev; } @@ -347,9 +377,9 @@ void phy_disconnect(struct phy_device *phydev) EXPORT_SYMBOL(phy_disconnect); /** - * phy_attach - attach a network device to a particular PHY device + * phy_attach_direct - attach a network device to a given PHY device pointer * @dev: network device to attach - * @bus_id: PHY device to attach + * @phydev: Pointer to phy_device to attach * @flags: PHY device's dev_flags * @interface: PHY device's interface * @@ -360,22 +390,10 @@ EXPORT_SYMBOL(phy_disconnect); * the attaching device, and given a callback for link status * change. The phy_device is returned to the attaching driver. */ -struct phy_device *phy_attach(struct net_device *dev, - const char *bus_id, u32 flags, phy_interface_t interface) +int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, + u32 flags, phy_interface_t interface) { - struct bus_type *bus = &mdio_bus_type; - struct phy_device *phydev; - struct device *d; - - /* Search the list of PHY devices on the mdio bus for the - * PHY with the requested name */ - d = bus_find_device_by_name(bus, NULL, bus_id); - if (d) { - phydev = to_phy_device(d); - } else { - printk(KERN_ERR "%s not found\n", bus_id); - return ERR_PTR(-ENODEV); - } + struct device *d = &phydev->dev; /* Assume that if there is no driver, that it doesn't * exist, and we should use the genphy driver. */ @@ -388,13 +406,12 @@ struct phy_device *phy_attach(struct net_device *dev, err = device_bind_driver(d); if (err) - return ERR_PTR(err); + return err; } if (phydev->attached_dev) { - printk(KERN_ERR "%s: %s already attached\n", - dev->name, bus_id); - return ERR_PTR(-EBUSY); + dev_err(&dev->dev, "PHY already attached\n"); + return -EBUSY; } phydev->attached_dev = dev; @@ -412,13 +429,48 @@ struct phy_device *phy_attach(struct net_device *dev, err = phy_scan_fixups(phydev); if (err < 0) - return ERR_PTR(err); + return err; err = phydev->drv->config_init(phydev); if (err < 0) - return ERR_PTR(err); + return err; + } + + return 0; +} +EXPORT_SYMBOL(phy_attach_direct); + +/** + * phy_attach - attach a network device to a particular PHY device + * @dev: network device to attach + * @bus_id: Bus ID of PHY device to attach + * @flags: PHY device's dev_flags + * @interface: PHY device's interface + * + * Description: Same as phy_attach_direct() except that a PHY bus_id + * string is passed instead of a pointer to a struct phy_device. + */ +struct phy_device *phy_attach(struct net_device *dev, + const char *bus_id, u32 flags, phy_interface_t interface) +{ + struct bus_type *bus = &mdio_bus_type; + struct phy_device *phydev; + struct device *d; + int rc; + + /* Search the list of PHY devices on the mdio bus for the + * PHY with the requested name */ + d = bus_find_device_by_name(bus, NULL, bus_id); + if (!d) { + pr_err("PHY %s not found\n", bus_id); + return ERR_PTR(-ENODEV); } + phydev = to_phy_device(d); + + rc = phy_attach_direct(dev, phydev, flags, interface); + if (rc) + return ERR_PTR(rc); return phydev; } diff --git a/include/linux/phy.h b/include/linux/phy.h index bf0b5f112dc..c216e4e503b 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -447,8 +447,13 @@ struct phy_device* get_phy_device(struct mii_bus *bus, int addr); int phy_device_register(struct phy_device *phy); int phy_clear_interrupt(struct phy_device *phydev); int phy_config_interrupt(struct phy_device *phydev, u32 interrupts); +int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, + u32 flags, phy_interface_t interface); struct phy_device * phy_attach(struct net_device *dev, const char *bus_id, u32 flags, phy_interface_t interface); +int phy_connect_direct(struct net_device *dev, struct phy_device *phydev, + void (*handler)(struct net_device *), u32 flags, + phy_interface_t interface); struct phy_device * phy_connect(struct net_device *dev, const char *bus_id, void (*handler)(struct net_device *), u32 flags, phy_interface_t interface); -- cgit v1.2.3-70-g09d2 From 8bc487d150b939e69830c39322df4ee486efe381 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Sat, 25 Apr 2009 12:52:56 +0000 Subject: openfirmware: Add OF phylib support code Add support for parsing the device tree for PHY devices on an MDIO bus. Currently many of the PowerPC ethernet drivers are open coding a solution for reading data out of the device tree to find the correct PHY device. This patch implements a set of common routines to: a) let MDIO bus drivers register phy_devices described in the tree, and b) let MAC drivers find the correct phy_device via the tree. Signed-off-by: Grant Likely Acked-by: Andy Fleming Signed-off-by: David S. Miller --- drivers/of/Kconfig | 6 +++ drivers/of/Makefile | 1 + drivers/of/of_mdio.c | 139 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/of_mdio.h | 22 ++++++++ 4 files changed, 168 insertions(+) create mode 100644 drivers/of/of_mdio.c create mode 100644 include/linux/of_mdio.h (limited to 'include') diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig index f821dbc952a..6fe043bd377 100644 --- a/drivers/of/Kconfig +++ b/drivers/of/Kconfig @@ -19,3 +19,9 @@ config OF_SPI depends on OF && PPC_OF && SPI help OpenFirmware SPI accessors + +config OF_MDIO + def_tristate PHYLIB + depends on OF && PHYLIB + help + OpenFirmware MDIO bus (Ethernet PHY) accessors diff --git a/drivers/of/Makefile b/drivers/of/Makefile index 4c3c6f8e36f..bdfb5f5d4b0 100644 --- a/drivers/of/Makefile +++ b/drivers/of/Makefile @@ -3,3 +3,4 @@ obj-$(CONFIG_OF_DEVICE) += device.o platform.o obj-$(CONFIG_OF_GPIO) += gpio.o obj-$(CONFIG_OF_I2C) += of_i2c.o obj-$(CONFIG_OF_SPI) += of_spi.o +obj-$(CONFIG_OF_MDIO) += of_mdio.o diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c new file mode 100644 index 00000000000..aee967d7f76 --- /dev/null +++ b/drivers/of/of_mdio.c @@ -0,0 +1,139 @@ +/* + * OF helpers for the MDIO (Ethernet PHY) API + * + * Copyright (c) 2009 Secret Lab Technologies, Ltd. + * + * This file is released under the GPLv2 + * + * This file provides helper functions for extracting PHY device information + * out of the OpenFirmware device tree and using it to populate an mii_bus. + */ + +#include +#include +#include +#include + +MODULE_AUTHOR("Grant Likely "); +MODULE_LICENSE("GPL"); + +/** + * of_mdiobus_register - Register mii_bus and create PHYs from the device tree + * @mdio: pointer to mii_bus structure + * @np: pointer to device_node of MDIO bus. + * + * This function registers the mii_bus structure and registers a phy_device + * for each child node of @np. + */ +int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) +{ + struct phy_device *phy; + struct device_node *child; + int rc, i; + + /* Mask out all PHYs from auto probing. Instead the PHYs listed in + * the device tree are populated after the bus has been registered */ + mdio->phy_mask = ~0; + + /* Clear all the IRQ properties */ + if (mdio->irq) + for (i=0; iirq[i] = PHY_POLL; + + /* Register the MDIO bus */ + rc = mdiobus_register(mdio); + if (rc) + return rc; + + /* Loop over the child nodes and register a phy_device for each one */ + for_each_child_of_node(np, child) { + const u32 *addr; + int len; + + /* A PHY must have a reg property in the range [0-31] */ + addr = of_get_property(child, "reg", &len); + if (!addr || len < sizeof(*addr) || *addr >= 32 || *addr < 0) { + dev_err(&mdio->dev, "%s has invalid PHY address\n", + child->full_name); + continue; + } + + if (mdio->irq) { + mdio->irq[*addr] = irq_of_parse_and_map(child, 0); + if (!mdio->irq[*addr]) + mdio->irq[*addr] = PHY_POLL; + } + + phy = get_phy_device(mdio, *addr); + if (!phy) { + dev_err(&mdio->dev, "error probing PHY at address %i\n", + *addr); + continue; + } + phy_scan_fixups(phy); + + /* Associate the OF node with the device structure so it + * can be looked up later */ + of_node_get(child); + dev_archdata_set_node(&phy->dev.archdata, child); + + /* All data is now stored in the phy struct; register it */ + rc = phy_device_register(phy); + if (rc) { + phy_device_free(phy); + of_node_put(child); + continue; + } + + dev_dbg(&mdio->dev, "registered phy %s at address %i\n", + child->name, *addr); + } + + return 0; +} +EXPORT_SYMBOL(of_mdiobus_register); + +/** + * of_phy_find_device - Give a PHY node, find the phy_device + * @phy_np: Pointer to the phy's device tree node + * + * Returns a pointer to the phy_device. + */ +struct phy_device *of_phy_find_device(struct device_node *phy_np) +{ + struct device *d; + int match(struct device *dev, void *phy_np) + { + return dev_archdata_get_node(&dev->archdata) == phy_np; + } + + if (!phy_np) + return NULL; + + d = bus_find_device(&mdio_bus_type, NULL, phy_np, match); + return d ? to_phy_device(d) : NULL; +} +EXPORT_SYMBOL(of_phy_find_device); + +/** + * of_phy_connect - Connect to the phy described in the device tree + * @dev: pointer to net_device claiming the phy + * @phy_np: Pointer to device tree node for the PHY + * @hndlr: Link state callback for the network device + * @iface: PHY data interface type + * + * Returns a pointer to the phy_device if successfull. NULL otherwise + */ +struct phy_device *of_phy_connect(struct net_device *dev, + struct device_node *phy_np, + void (*hndlr)(struct net_device *), u32 flags, + phy_interface_t iface) +{ + struct phy_device *phy = of_phy_find_device(phy_np); + + if (!phy) + return NULL; + + return phy_connect_direct(dev, phy, hndlr, flags, iface) ? NULL : phy; +} +EXPORT_SYMBOL(of_phy_connect); diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h new file mode 100644 index 00000000000..c9663c69030 --- /dev/null +++ b/include/linux/of_mdio.h @@ -0,0 +1,22 @@ +/* + * OF helpers for the MDIO (Ethernet PHY) API + * + * Copyright (c) 2009 Secret Lab Technologies, Ltd. + * + * This file is released under the GPLv2 + */ + +#ifndef __LINUX_OF_MDIO_H +#define __LINUX_OF_MDIO_H + +#include +#include + +extern int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np); +extern struct phy_device *of_phy_find_device(struct device_node *phy_np); +extern struct phy_device *of_phy_connect(struct net_device *dev, + struct device_node *phy_np, + void (*hndlr)(struct net_device *), + u32 flags, phy_interface_t iface); + +#endif /* __LINUX_OF_MDIO_H */ -- cgit v1.2.3-70-g09d2 From aa73832c5a80d6c52c69b18af858d88fa595dd3c Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Sat, 25 Apr 2009 12:53:33 +0000 Subject: net: Rework fs_enet driver to use of_mdio infrastructure This patch simplifies the driver by making use of more common code. Signed-off-by: Grant Likely Acked-by: Andy Fleming Signed-off-by: David S. Miller --- drivers/net/fs_enet/fs_enet-main.c | 69 ++++++-------------------------------- drivers/net/fs_enet/mii-bitbang.c | 29 ++-------------- drivers/net/fs_enet/mii-fec.c | 26 +------------- include/linux/fs_enet_pd.h | 6 ++-- 4 files changed, 16 insertions(+), 114 deletions(-) (limited to 'include') diff --git a/drivers/net/fs_enet/fs_enet-main.c b/drivers/net/fs_enet/fs_enet-main.c index a9cbc3191a2..9604aaed61d 100644 --- a/drivers/net/fs_enet/fs_enet-main.c +++ b/drivers/net/fs_enet/fs_enet-main.c @@ -36,6 +36,8 @@ #include #include #include +#include +#include #include #include @@ -752,9 +754,10 @@ static int fs_init_phy(struct net_device *dev) fep->oldlink = 0; fep->oldspeed = 0; fep->oldduplex = -1; - if(fep->fpi->bus_id) - phydev = phy_connect(dev, fep->fpi->bus_id, &fs_adjust_link, 0, - PHY_INTERFACE_MODE_MII); + if(fep->fpi->phy_node) + phydev = of_phy_connect(dev, fep->fpi->phy_node, + &fs_adjust_link, 0, + PHY_INTERFACE_MODE_MII); else { printk("No phy bus ID specified in BSP code\n"); return -EINVAL; @@ -962,57 +965,6 @@ static void cleanup_immap(void) /**************************************************************************************/ -static int __devinit find_phy(struct device_node *np, - struct fs_platform_info *fpi) -{ - struct device_node *phynode, *mdionode; - int ret = 0, len, bus_id; - const u32 *data; - - data = of_get_property(np, "fixed-link", NULL); - if (data) { - snprintf(fpi->bus_id, 16, "%x:%02x", 0, *data); - return 0; - } - - data = of_get_property(np, "phy-handle", &len); - if (!data || len != 4) - return -EINVAL; - - phynode = of_find_node_by_phandle(*data); - if (!phynode) - return -EINVAL; - - data = of_get_property(phynode, "reg", &len); - if (!data || len != 4) { - ret = -EINVAL; - goto out_put_phy; - } - - mdionode = of_get_parent(phynode); - if (!mdionode) { - ret = -EINVAL; - goto out_put_phy; - } - - bus_id = of_get_gpio(mdionode, 0); - if (bus_id < 0) { - struct resource res; - ret = of_address_to_resource(mdionode, 0, &res); - if (ret) - goto out_put_mdio; - bus_id = res.start; - } - - snprintf(fpi->bus_id, 16, "%x:%02x", bus_id, *data); - -out_put_mdio: - of_node_put(mdionode); -out_put_phy: - of_node_put(phynode); - return ret; -} - #ifdef CONFIG_FS_ENET_HAS_FEC #define IS_FEC(match) ((match)->data == &fs_fec_ops) #else @@ -1062,9 +1014,9 @@ static int __devinit fs_enet_probe(struct of_device *ofdev, fpi->rx_copybreak = 240; fpi->use_napi = 1; fpi->napi_weight = 17; - - ret = find_phy(ofdev->node, fpi); - if (ret) + fpi->phy_node = of_parse_phandle(ofdev->node, "phy-handle", 0); + if ((!fpi->phy_node) && (!of_get_property(ofdev->node, "fixed-link", + NULL))) goto out_free_fpi; privsize = sizeof(*fep) + @@ -1136,6 +1088,7 @@ out_cleanup_data: out_free_dev: free_netdev(ndev); dev_set_drvdata(&ofdev->dev, NULL); + of_node_put(fpi->phy_node); out_free_fpi: kfree(fpi); return ret; @@ -1151,7 +1104,7 @@ static int fs_enet_remove(struct of_device *ofdev) fep->ops->free_bd(ndev); fep->ops->cleanup_data(ndev); dev_set_drvdata(fep->dev, NULL); - + of_node_put(fep->fpi->phy_node); free_netdev(ndev); return 0; } diff --git a/drivers/net/fs_enet/mii-bitbang.c b/drivers/net/fs_enet/mii-bitbang.c index 49b6645d7e0..93b481b0e3c 100644 --- a/drivers/net/fs_enet/mii-bitbang.c +++ b/drivers/net/fs_enet/mii-bitbang.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include "fs_enet.h" @@ -149,31 +150,12 @@ static int __devinit fs_mii_bitbang_init(struct mii_bus *bus, return 0; } -static void __devinit add_phy(struct mii_bus *bus, struct device_node *np) -{ - const u32 *data; - int len, id, irq; - - data = of_get_property(np, "reg", &len); - if (!data || len != 4) - return; - - id = *data; - bus->phy_mask &= ~(1 << id); - - irq = of_irq_to_resource(np, 0, NULL); - if (irq != NO_IRQ) - bus->irq[id] = irq; -} - static int __devinit fs_enet_mdio_probe(struct of_device *ofdev, const struct of_device_id *match) { - struct device_node *np = NULL; struct mii_bus *new_bus; struct bb_info *bitbang; int ret = -ENOMEM; - int i; bitbang = kzalloc(sizeof(struct bb_info), GFP_KERNEL); if (!bitbang) @@ -196,17 +178,10 @@ static int __devinit fs_enet_mdio_probe(struct of_device *ofdev, if (!new_bus->irq) goto out_unmap_regs; - for (i = 0; i < PHY_MAX_ADDR; i++) - new_bus->irq[i] = -1; - - while ((np = of_get_next_child(ofdev->node, np))) - if (!strcmp(np->type, "ethernet-phy")) - add_phy(new_bus, np); - new_bus->parent = &ofdev->dev; dev_set_drvdata(&ofdev->dev, new_bus); - ret = mdiobus_register(new_bus); + ret = of_mdiobus_register(new_bus, ofdev->node); if (ret) goto out_free_irqs; diff --git a/drivers/net/fs_enet/mii-fec.c b/drivers/net/fs_enet/mii-fec.c index 61aaae444b4..75a09994d66 100644 --- a/drivers/net/fs_enet/mii-fec.c +++ b/drivers/net/fs_enet/mii-fec.c @@ -100,23 +100,6 @@ static int fs_enet_fec_mii_reset(struct mii_bus *bus) return 0; } -static void __devinit add_phy(struct mii_bus *bus, struct device_node *np) -{ - const u32 *data; - int len, id, irq; - - data = of_get_property(np, "reg", &len); - if (!data || len != 4) - return; - - id = *data; - bus->phy_mask &= ~(1 << id); - - irq = of_irq_to_resource(np, 0, NULL); - if (irq != NO_IRQ) - bus->irq[id] = irq; -} - static int __devinit fs_enet_mdio_probe(struct of_device *ofdev, const struct of_device_id *match) { @@ -163,17 +146,10 @@ static int __devinit fs_enet_mdio_probe(struct of_device *ofdev, if (!new_bus->irq) goto out_unmap_regs; - for (i = 0; i < PHY_MAX_ADDR; i++) - new_bus->irq[i] = -1; - - while ((np = of_get_next_child(ofdev->node, np))) - if (!strcmp(np->type, "ethernet-phy")) - add_phy(new_bus, np); - new_bus->parent = &ofdev->dev; dev_set_drvdata(&ofdev->dev, new_bus); - ret = mdiobus_register(new_bus); + ret = of_mdiobus_register(new_bus, ofdev->node); if (ret) goto out_free_irqs; diff --git a/include/linux/fs_enet_pd.h b/include/linux/fs_enet_pd.h index 8300cab30f9..51b793466ff 100644 --- a/include/linux/fs_enet_pd.h +++ b/include/linux/fs_enet_pd.h @@ -17,6 +17,7 @@ #define FS_ENET_PD_H #include +#include #include #define FS_ENET_NAME "fs_enet" @@ -130,10 +131,7 @@ struct fs_platform_info { u32 device_flags; - int phy_addr; /* the phy address (-1 no phy) */ - char bus_id[16]; - int phy_irq; /* the phy irq (if it exists) */ - + struct device_node *phy_node; const struct fs_mii_bus_info *bus_info; int rx_ring, tx_ring; /* number of buffers on rx */ -- cgit v1.2.3-70-g09d2 From f85ba78068ac137fe9c1f50d25405d2783d75c77 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 27 Apr 2009 03:23:54 -0700 Subject: tun: add IFF_TUN_EXCL flag to avoid opening a persistent device. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When creating a certain types of VPN, NetworkManager will first attempt to find an available tun device by iterating through 'vpn%d' until it finds one that isn't already busy. Then it'll set that to be persistent and owned by the otherwise unprivileged user that the VPN dæmon itself runs as. There's a race condition here -- during the period where the vpn%d device is created and we're waiting for the VPN dæmon to actually connect and use it, if we try to create _another_ device we could end up re-using the same one -- because trying to open it again doesn't get -EBUSY as it would while it's _actually_ busy. So solve this, we add an IFF_TUN_EXCL flag which causes tun_set_iff() to fail if it would be opening an existing persistent tundevice -- so that we can make sure we're getting an entirely _new_ device. Signed-off-by: David Woodhouse Signed-off-by: David S. Miller --- drivers/net/tun.c | 2 ++ include/linux/if_tun.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 589f0ca668d..94622e5fb93 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -874,6 +874,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) dev = __dev_get_by_name(net, ifr->ifr_name); if (dev) { + if (ifr->ifr_flags & IFF_TUN_EXCL) + return -EBUSY; if ((ifr->ifr_flags & IFF_TUN) && dev->netdev_ops == &tun_netdev_ops) tun = netdev_priv(dev); else if ((ifr->ifr_flags & IFF_TAP) && dev->netdev_ops == &tap_netdev_ops) diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h index 049d6c9428d..915ba5789f0 100644 --- a/include/linux/if_tun.h +++ b/include/linux/if_tun.h @@ -55,6 +55,7 @@ #define IFF_NO_PI 0x1000 #define IFF_ONE_QUEUE 0x2000 #define IFF_VNET_HDR 0x4000 +#define IFF_TUN_EXCL 0x8000 /* Features for GSO (TUNSETOFFLOAD). */ #define TUN_F_CSUM 0x01 /* You can hand me unchecksummed packets. */ -- cgit v1.2.3-70-g09d2 From edbd9e30306067c3a45c035eb95a6f49daaa2337 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 27 Apr 2009 05:44:29 -0700 Subject: gro: Fix handling of headers that extend over the tail The skb_gro_* code fails to handle the case where a header starts in the linear area but ends in the frags area. Since the goal of skb_gro_* is to optimise the case of completely non-linear packets, we can simply bail out if we have anything in the linear area. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- net/core/dev.c | 11 +++-------- 2 files changed, 4 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 31167451d08..c9ef4191607 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1137,7 +1137,7 @@ static inline void skb_gro_reset_offset(struct sk_buff *skb) static inline void *skb_gro_mac_header(struct sk_buff *skb) { - return skb_mac_header(skb) < skb->data ? skb_mac_header(skb) : + return skb_headlen(skb) ? skb_mac_header(skb) : page_address(skb_shinfo(skb)->frags[0].page) + skb_shinfo(skb)->frags[0].page_offset; } diff --git a/net/core/dev.c b/net/core/dev.c index e48c08af76a..6785b067ad5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2378,18 +2378,13 @@ void *skb_gro_header(struct sk_buff *skb, unsigned int hlen) unsigned int offset = skb_gro_offset(skb); hlen += offset; - if (hlen <= skb_headlen(skb)) - return skb->data + offset; - - if (unlikely(!skb_shinfo(skb)->nr_frags || - skb_shinfo(skb)->frags[0].size <= - hlen - skb_headlen(skb) || + if (unlikely(skb_headlen(skb) || + skb_shinfo(skb)->frags[0].size < hlen || PageHighMem(skb_shinfo(skb)->frags[0].page))) return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL; return page_address(skb_shinfo(skb)->frags[0].page) + - skb_shinfo(skb)->frags[0].page_offset + - offset - skb_headlen(skb); + skb_shinfo(skb)->frags[0].page_offset + offset; } EXPORT_SYMBOL(skb_gro_header); -- cgit v1.2.3-70-g09d2 From 36e7b1b8dac1a785abca3a121b6b0b79f1a8d7df Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 27 Apr 2009 05:44:45 -0700 Subject: gro: Fix COMPLETE checksum handling On a brand new GRO skb, we cannot call ip_hdr since the header may lie in the non-linear area. This patch adds the helper skb_gro_network_header to handle this. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 +++++++ net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c9ef4191607..fe20d171acf 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1142,6 +1142,13 @@ static inline void *skb_gro_mac_header(struct sk_buff *skb) skb_shinfo(skb)->frags[0].page_offset; } +static inline void *skb_gro_network_header(struct sk_buff *skb) +{ + return skb_headlen(skb) ? skb_network_header(skb) : + page_address(skb_shinfo(skb)->frags[0].page) + + skb_shinfo(skb)->frags[0].page_offset + skb_network_offset(skb); +} + static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5d427f86b41..bda74e8aed7 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2343,7 +2343,7 @@ void tcp4_proc_exit(void) struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) { - struct iphdr *iph = ip_hdr(skb); + struct iphdr *iph = skb_gro_network_header(skb); switch (skb->ip_summed) { case CHECKSUM_COMPLETE: diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 4b5aa185426..d9dd94b6bf6 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -943,7 +943,7 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb) struct sk_buff **tcp6_gro_receive(struct sk_buff **head, struct sk_buff *skb) { - struct ipv6hdr *iph = ipv6_hdr(skb); + struct ipv6hdr *iph = skb_gro_network_header(skb); switch (skb->ip_summed) { case CHECKSUM_COMPLETE: -- cgit v1.2.3-70-g09d2 From 879c5e6b7cb4c689d08ca9b2e353d8ab3dc425d5 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 17 Jun 2009 11:47:48 -0400 Subject: jbd2: convert instrumentation from markers to tracepoints Signed-off-by: "Theodore Ts'o" --- fs/jbd2/checkpoint.c | 5 +- fs/jbd2/commit.c | 13 ++-- fs/jbd2/journal.c | 69 ++++++++++++++++++ include/linux/jbd2.h | 6 ++ include/trace/events/jbd2.h | 168 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 252 insertions(+), 9 deletions(-) create mode 100644 include/trace/events/jbd2.h (limited to 'include') diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 17159cacbd9..5d70b3e6d49 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -20,9 +20,9 @@ #include #include #include -#include #include #include +#include /* * Unlink a buffer from a transaction checkpoint list. @@ -358,8 +358,7 @@ int jbd2_log_do_checkpoint(journal_t *journal) * journal straight away. */ result = jbd2_cleanup_journal_tail(journal); - trace_mark(jbd2_checkpoint, "dev %s need_checkpoint %d", - journal->j_devname, result); + trace_jbd2_checkpoint(journal, result); jbd_debug(1, "cleanup_journal_tail returned %d\n", result); if (result <= 0) return result; diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 0b7d3b8226f..7b4088b2364 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -26,6 +25,7 @@ #include #include #include +#include /* * Default IO end handler for temporary BJ_IO buffer_heads. @@ -253,6 +253,7 @@ static int journal_submit_data_buffers(journal_t *journal, * block allocation with delalloc. We need to write * only allocated blocks here. */ + trace_jbd2_submit_inode_data(jinode->i_vfs_inode); err = journal_submit_inode_data_buffers(mapping); if (!ret) ret = err; @@ -394,8 +395,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) commit_transaction = journal->j_running_transaction; J_ASSERT(commit_transaction->t_state == T_RUNNING); - trace_mark(jbd2_start_commit, "dev %s transaction %d", - journal->j_devname, commit_transaction->t_tid); + trace_jbd2_start_commit(journal, commit_transaction); jbd_debug(1, "JBD: starting commit of transaction %d\n", commit_transaction->t_tid); @@ -409,6 +409,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) */ if (commit_transaction->t_synchronous_commit) write_op = WRITE_SYNC_PLUG; + trace_jbd2_commit_locking(journal, commit_transaction); stats.u.run.rs_wait = commit_transaction->t_max_wait; stats.u.run.rs_locked = jiffies; stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start, @@ -484,6 +485,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) */ jbd2_journal_switch_revoke_table(journal); + trace_jbd2_commit_flushing(journal, commit_transaction); stats.u.run.rs_flushing = jiffies; stats.u.run.rs_locked = jbd2_time_diff(stats.u.run.rs_locked, stats.u.run.rs_flushing); @@ -520,6 +522,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) commit_transaction->t_state = T_COMMIT; spin_unlock(&journal->j_state_lock); + trace_jbd2_commit_logging(journal, commit_transaction); stats.u.run.rs_logging = jiffies; stats.u.run.rs_flushing = jbd2_time_diff(stats.u.run.rs_flushing, stats.u.run.rs_logging); @@ -1054,9 +1057,7 @@ restart_loop: if (journal->j_commit_callback) journal->j_commit_callback(journal, commit_transaction); - trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", - journal->j_devname, commit_transaction->t_tid, - journal->j_tail_sequence); + trace_jbd2_end_commit(journal, commit_transaction); jbd_debug(1, "JBD: commit %d complete, head %d\n", journal->j_commit_sequence, journal->j_tail_sequence); if (to_free) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 62be7d294ec..18bfd5dab64 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -38,6 +38,10 @@ #include #include #include +#include + +#define CREATE_TRACE_POINTS +#include #include #include @@ -2377,6 +2381,71 @@ static void __exit journal_exit(void) jbd2_journal_destroy_caches(); } +/* + * jbd2_dev_to_name is a utility function used by the jbd2 and ext4 + * tracing infrastructure to map a dev_t to a device name. + * + * The caller should use rcu_read_lock() in order to make sure the + * device name stays valid until its done with it. We use + * rcu_read_lock() as well to make sure we're safe in case the caller + * gets sloppy, and because rcu_read_lock() is cheap and can be safely + * nested. + */ +struct devname_cache { + struct rcu_head rcu; + dev_t device; + char devname[BDEVNAME_SIZE]; +}; +#define CACHE_SIZE_BITS 6 +static struct devname_cache *devcache[1 << CACHE_SIZE_BITS]; +static DEFINE_SPINLOCK(devname_cache_lock); + +static void free_devcache(struct rcu_head *rcu) +{ + kfree(rcu); +} + +const char *jbd2_dev_to_name(dev_t device) +{ + int i = hash_32(device, CACHE_SIZE_BITS); + char *ret; + struct block_device *bd; + + rcu_read_lock(); + if (devcache[i] && devcache[i]->device == device) { + ret = devcache[i]->devname; + rcu_read_unlock(); + return ret; + } + rcu_read_unlock(); + + spin_lock(&devname_cache_lock); + if (devcache[i]) { + if (devcache[i]->device == device) { + ret = devcache[i]->devname; + spin_unlock(&devname_cache_lock); + return ret; + } + call_rcu(&devcache[i]->rcu, free_devcache); + } + devcache[i] = kmalloc(sizeof(struct devname_cache), GFP_KERNEL); + if (!devcache[i]) { + spin_unlock(&devname_cache_lock); + return "NODEV-ALLOCFAILURE"; /* Something non-NULL */ + } + devcache[i]->device = device; + bd = bdget(device); + if (bd) { + bdevname(bd, devcache[i]->devname); + bdput(bd); + } else + __bdevname(device, devcache[i]->devname); + ret = devcache[i]->devname; + spin_unlock(&devname_cache_lock); + return ret; +} +EXPORT_SYMBOL(jbd2_dev_to_name); + MODULE_LICENSE("GPL"); module_init(journal_init); module_exit(journal_exit); diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index cc02393bfce..d97eb652d6c 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1315,6 +1315,12 @@ extern int jbd_blocks_per_page(struct inode *inode); #define BUFFER_TRACE2(bh, bh2, info) do {} while (0) #define JBUFFER_TRACE(jh, info) do {} while (0) +/* + * jbd2_dev_to_name is a utility function used by the jbd2 and ext4 + * tracing infrastructure to map a dev_t to a device name. + */ +extern const char *jbd2_dev_to_name(dev_t device); + #endif /* __KERNEL__ */ #endif /* _LINUX_JBD2_H */ diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h new file mode 100644 index 00000000000..845b0b4b48f --- /dev/null +++ b/include/trace/events/jbd2.h @@ -0,0 +1,168 @@ +#if !defined(_TRACE_JBD2_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_JBD2_H + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM jbd2 + +TRACE_EVENT(jbd2_checkpoint, + + TP_PROTO(journal_t *journal, int result), + + TP_ARGS(journal, result), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( int, result ) + ), + + TP_fast_assign( + __entry->dev = journal->j_fs_dev->bd_dev; + __entry->result = result; + ), + + TP_printk("dev %s result %d", + jbd2_dev_to_name(__entry->dev), __entry->result) +); + +TRACE_EVENT(jbd2_start_commit, + + TP_PROTO(journal_t *journal, transaction_t *commit_transaction), + + TP_ARGS(journal, commit_transaction), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( char, sync_commit ) + __field( int, transaction ) + ), + + TP_fast_assign( + __entry->dev = journal->j_fs_dev->bd_dev; + __entry->sync_commit = commit_transaction->t_synchronous_commit; + __entry->transaction = commit_transaction->t_tid; + ), + + TP_printk("dev %s transaction %d sync %d", + jbd2_dev_to_name(__entry->dev), __entry->transaction, + __entry->sync_commit) +); + +TRACE_EVENT(jbd2_commit_locking, + + TP_PROTO(journal_t *journal, transaction_t *commit_transaction), + + TP_ARGS(journal, commit_transaction), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( char, sync_commit ) + __field( int, transaction ) + ), + + TP_fast_assign( + __entry->dev = journal->j_fs_dev->bd_dev; + __entry->sync_commit = commit_transaction->t_synchronous_commit; + __entry->transaction = commit_transaction->t_tid; + ), + + TP_printk("dev %s transaction %d sync %d", + jbd2_dev_to_name(__entry->dev), __entry->transaction, + __entry->sync_commit) +); + +TRACE_EVENT(jbd2_commit_flushing, + + TP_PROTO(journal_t *journal, transaction_t *commit_transaction), + + TP_ARGS(journal, commit_transaction), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( char, sync_commit ) + __field( int, transaction ) + ), + + TP_fast_assign( + __entry->dev = journal->j_fs_dev->bd_dev; + __entry->sync_commit = commit_transaction->t_synchronous_commit; + __entry->transaction = commit_transaction->t_tid; + ), + + TP_printk("dev %s transaction %d sync %d", + jbd2_dev_to_name(__entry->dev), __entry->transaction, + __entry->sync_commit) +); + +TRACE_EVENT(jbd2_commit_logging, + + TP_PROTO(journal_t *journal, transaction_t *commit_transaction), + + TP_ARGS(journal, commit_transaction), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( char, sync_commit ) + __field( int, transaction ) + ), + + TP_fast_assign( + __entry->dev = journal->j_fs_dev->bd_dev; + __entry->sync_commit = commit_transaction->t_synchronous_commit; + __entry->transaction = commit_transaction->t_tid; + ), + + TP_printk("dev %s transaction %d sync %d", + jbd2_dev_to_name(__entry->dev), __entry->transaction, + __entry->sync_commit) +); + +TRACE_EVENT(jbd2_end_commit, + TP_PROTO(journal_t *journal, transaction_t *commit_transaction), + + TP_ARGS(journal, commit_transaction), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( char, sync_commit ) + __field( int, transaction ) + __field( int, head ) + ), + + TP_fast_assign( + __entry->dev = journal->j_fs_dev->bd_dev; + __entry->sync_commit = commit_transaction->t_synchronous_commit; + __entry->transaction = commit_transaction->t_tid; + __entry->head = journal->j_tail_sequence; + ), + + TP_printk("dev %s transaction %d sync %d head %d", + jbd2_dev_to_name(__entry->dev), __entry->transaction, + __entry->sync_commit, __entry->head) +); + +TRACE_EVENT(jbd2_submit_inode_data, + TP_PROTO(struct inode *inode), + + TP_ARGS(inode), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + ), + + TP_printk("dev %s ino %lu", + jbd2_dev_to_name(__entry->dev), __entry->ino) +); + +#endif /* _TRACE_JBD2_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3-70-g09d2 From 9bffad1ed2a003a355ed1b42424a0ae3575275ed Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 17 Jun 2009 11:48:11 -0400 Subject: ext4: convert instrumentation from markers to tracepoints Signed-off-by: "Theodore Ts'o" --- fs/ext4/fsync.c | 8 +- fs/ext4/ialloc.c | 15 +- fs/ext4/inode.c | 69 +---- fs/ext4/mballoc.c | 77 ++--- fs/ext4/mballoc.h | 1 - fs/ext4/super.c | 6 +- include/trace/events/ext4.h | 719 ++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 774 insertions(+), 121 deletions(-) create mode 100644 include/trace/events/ext4.h (limited to 'include') diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 5afe4370840..83cf6415f59 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -28,10 +28,12 @@ #include #include #include -#include + #include "ext4.h" #include "ext4_jbd2.h" +#include + /* * akpm: A new design for ext4_sync_file(). * @@ -52,9 +54,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) J_ASSERT(ext4_journal_current_handle() == NULL); - trace_mark(ext4_sync_file, "dev %s datasync %d ino %ld parent %ld", - inode->i_sb->s_id, datasync, inode->i_ino, - dentry->d_parent->d_inode->i_ino); + trace_ext4_sync_file(file, dentry, datasync); /* * data=writeback: diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 3743bd849bc..7d502f3be91 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -23,11 +23,14 @@ #include #include #include + #include "ext4.h" #include "ext4_jbd2.h" #include "xattr.h" #include "acl.h" +#include + /* * ialloc.c contains the inodes allocation and deallocation routines */ @@ -208,11 +211,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) ino = inode->i_ino; ext4_debug("freeing inode %lu\n", ino); - trace_mark(ext4_free_inode, - "dev %s ino %lu mode %d uid %lu gid %lu bocks %llu", - sb->s_id, inode->i_ino, inode->i_mode, - (unsigned long) inode->i_uid, (unsigned long) inode->i_gid, - (unsigned long long) inode->i_blocks); + trace_ext4_free_inode(inode); /* * Note: we must free any quota before locking the superblock, @@ -815,8 +814,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) sb = dir->i_sb; ngroups = ext4_get_groups_count(sb); - trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id, - dir->i_ino, mode); + trace_ext4_request_inode(dir, mode); inode = new_inode(sb); if (!inode) return ERR_PTR(-ENOMEM); @@ -1047,8 +1045,7 @@ got: } ext4_debug("allocating inode %lu\n", inode->i_ino); - trace_mark(ext4_allocate_inode, "dev %s ino %lu dir %lu mode %d", - sb->s_id, inode->i_ino, dir->i_ino, mode); + trace_ext4_allocate_inode(inode, dir, mode); goto really_out; fail: ext4_std_error(sb, err); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 875db944b22..2418ad36eab 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -37,11 +37,14 @@ #include #include #include + #include "ext4_jbd2.h" #include "xattr.h" #include "acl.h" #include "ext4_extents.h" +#include + #define MPAGE_DA_EXTENT_TAIL 0x01 static inline int ext4_begin_ordered_truncate(struct inode *inode, @@ -1466,10 +1469,7 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, pgoff_t index; unsigned from, to; - trace_mark(ext4_write_begin, - "dev %s ino %lu pos %llu len %u flags %u", - inode->i_sb->s_id, inode->i_ino, - (unsigned long long) pos, len, flags); + trace_ext4_write_begin(inode, pos, len, flags); /* * Reserve one block more for addition to orphan list in case * we allocate blocks but write fails for some reason @@ -1611,10 +1611,7 @@ static int ext4_ordered_write_end(struct file *file, struct inode *inode = mapping->host; int ret = 0, ret2; - trace_mark(ext4_ordered_write_end, - "dev %s ino %lu pos %llu len %u copied %u", - inode->i_sb->s_id, inode->i_ino, - (unsigned long long) pos, len, copied); + trace_ext4_ordered_write_end(inode, pos, len, copied); ret = ext4_jbd2_file_inode(handle, inode); if (ret == 0) { @@ -1658,10 +1655,7 @@ static int ext4_writeback_write_end(struct file *file, struct inode *inode = mapping->host; int ret = 0, ret2; - trace_mark(ext4_writeback_write_end, - "dev %s ino %lu pos %llu len %u copied %u", - inode->i_sb->s_id, inode->i_ino, - (unsigned long long) pos, len, copied); + trace_ext4_writeback_write_end(inode, pos, len, copied); ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, page, fsdata); copied = ret2; @@ -1705,10 +1699,7 @@ static int ext4_journalled_write_end(struct file *file, unsigned from, to; loff_t new_i_size; - trace_mark(ext4_journalled_write_end, - "dev %s ino %lu pos %llu len %u copied %u", - inode->i_sb->s_id, inode->i_ino, - (unsigned long long) pos, len, copied); + trace_ext4_journalled_write_end(inode, pos, len, copied); from = pos & (PAGE_CACHE_SIZE - 1); to = from + len; @@ -2554,9 +2545,7 @@ static int ext4_da_writepage(struct page *page, struct buffer_head *page_bufs; struct inode *inode = page->mapping->host; - trace_mark(ext4_da_writepage, - "dev %s ino %lu page_index %lu", - inode->i_sb->s_id, inode->i_ino, page->index); + trace_ext4_da_writepage(inode, page); size = i_size_read(inode); if (page->index == size >> PAGE_CACHE_SHIFT) len = size & ~PAGE_CACHE_MASK; @@ -2667,19 +2656,7 @@ static int ext4_da_writepages(struct address_space *mapping, int needed_blocks, ret = 0, nr_to_writebump = 0; struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); - trace_mark(ext4_da_writepages, - "dev %s ino %lu nr_t_write %ld " - "pages_skipped %ld range_start %llu " - "range_end %llu nonblocking %d " - "for_kupdate %d for_reclaim %d " - "for_writepages %d range_cyclic %d", - inode->i_sb->s_id, inode->i_ino, - wbc->nr_to_write, wbc->pages_skipped, - (unsigned long long) wbc->range_start, - (unsigned long long) wbc->range_end, - wbc->nonblocking, wbc->for_kupdate, - wbc->for_reclaim, wbc->for_writepages, - wbc->range_cyclic); + trace_ext4_da_writepages(inode, wbc); /* * No pages to write? This is mainly a kludge to avoid starting @@ -2845,14 +2822,7 @@ out_writepages: if (!no_nrwrite_index_update) wbc->no_nrwrite_index_update = 0; wbc->nr_to_write -= nr_to_writebump; - trace_mark(ext4_da_writepage_result, - "dev %s ino %lu ret %d pages_written %d " - "pages_skipped %ld congestion %d " - "more_io %d no_nrwrite_index_update %d", - inode->i_sb->s_id, inode->i_ino, ret, - pages_written, wbc->pages_skipped, - wbc->encountered_congestion, wbc->more_io, - wbc->no_nrwrite_index_update); + trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); return ret; } @@ -2904,11 +2874,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, len, flags, pagep, fsdata); } *fsdata = (void *)0; - - trace_mark(ext4_da_write_begin, - "dev %s ino %lu pos %llu len %u flags %u", - inode->i_sb->s_id, inode->i_ino, - (unsigned long long) pos, len, flags); + trace_ext4_da_write_begin(inode, pos, len, flags); retry: /* * With delayed allocation, we don't log the i_disksize update @@ -3001,10 +2967,7 @@ static int ext4_da_write_end(struct file *file, } } - trace_mark(ext4_da_write_end, - "dev %s ino %lu pos %llu len %u copied %u", - inode->i_sb->s_id, inode->i_ino, - (unsigned long long) pos, len, copied); + trace_ext4_da_write_end(inode, pos, len, copied); start = pos & (PAGE_CACHE_SIZE - 1); end = start + copied - 1; @@ -3255,9 +3218,7 @@ static int ext4_normal_writepage(struct page *page, loff_t size = i_size_read(inode); loff_t len; - trace_mark(ext4_normal_writepage, - "dev %s ino %lu page_index %lu", - inode->i_sb->s_id, inode->i_ino, page->index); + trace_ext4_normal_writepage(inode, page); J_ASSERT(PageLocked(page)); if (page->index == size >> PAGE_CACHE_SHIFT) len = size & ~PAGE_CACHE_MASK; @@ -3343,9 +3304,7 @@ static int ext4_journalled_writepage(struct page *page, loff_t size = i_size_read(inode); loff_t len; - trace_mark(ext4_journalled_writepage, - "dev %s ino %lu page_index %lu", - inode->i_sb->s_id, inode->i_ino, page->index); + trace_ext4_journalled_writepage(inode, page); J_ASSERT(PageLocked(page)); if (page->index == size >> PAGE_CACHE_SHIFT) len = size & ~PAGE_CACHE_MASK; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index ed8482e22c0..8d98070b48f 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -22,6 +22,8 @@ */ #include "mballoc.h" +#include + /* * MUSTDO: * - test ext4_ext_search_left() and ext4_ext_search_right() @@ -340,8 +342,6 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, ext4_group_t group); static void release_blocks_on_commit(journal_t *journal, transaction_t *txn); - - static inline void *mb_correct_addr_and_bit(int *bit, void *addr) { #if BITS_PER_LONG == 64 @@ -2859,9 +2859,8 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb) + entry->start_blk + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); - trace_mark(ext4_discard_blocks, "dev %s blk %llu count %u", - sb->s_id, (unsigned long long) discard_block, - entry->count); + trace_ext4_discard_blocks(sb, (unsigned long long)discard_block, + entry->count); sb_issue_discard(sb, discard_block, entry->count); kmem_cache_free(ext4_free_ext_cachep, entry); @@ -3629,10 +3628,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) mb_debug("new inode pa %p: %llu/%u for %u\n", pa, pa->pa_pstart, pa->pa_len, pa->pa_lstart); - trace_mark(ext4_mb_new_inode_pa, - "dev %s ino %lu pstart %llu len %u lstart %u", - sb->s_id, ac->ac_inode->i_ino, - pa->pa_pstart, pa->pa_len, pa->pa_lstart); + trace_ext4_mb_new_inode_pa(ac, pa); ext4_mb_use_inode_pa(ac, pa); atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); @@ -3691,9 +3687,8 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac) pa->pa_type = MB_GROUP_PA; mb_debug("new group pa %p: %llu/%u for %u\n", pa, - pa->pa_pstart, pa->pa_len, pa->pa_lstart); - trace_mark(ext4_mb_new_group_pa, "dev %s pstart %llu len %u lstart %u", - sb->s_id, pa->pa_pstart, pa->pa_len, pa->pa_lstart); + pa->pa_pstart, pa->pa_len, pa->pa_lstart); + trace_ext4_mb_new_group_pa(ac, pa); ext4_mb_use_group_pa(ac, pa); atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); @@ -3783,10 +3778,8 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, ext4_mb_store_history(ac); } - trace_mark(ext4_mb_release_inode_pa, - "dev %s ino %lu block %llu count %u", - sb->s_id, pa->pa_inode->i_ino, grp_blk_start + bit, - next - bit); + trace_ext4_mb_release_inode_pa(ac, pa, grp_blk_start + bit, + next - bit); mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); bit = next + 1; } @@ -3820,8 +3813,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b, if (ac) ac->ac_op = EXT4_MB_HISTORY_DISCARD; - trace_mark(ext4_mb_release_group_pa, "dev %s pstart %llu len %d", - sb->s_id, pa->pa_pstart, pa->pa_len); + trace_ext4_mb_release_group_pa(ac, pa); BUG_ON(pa->pa_deleted == 0); ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); BUG_ON(group != e4b->bd_group && pa->pa_len != 0); @@ -3889,6 +3881,8 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, INIT_LIST_HEAD(&list); ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); + if (ac) + ac->ac_sb = sb; repeat: ext4_lock_group(sb, group); list_for_each_entry_safe(pa, tmp, @@ -3987,12 +3981,15 @@ void ext4_discard_preallocations(struct inode *inode) } mb_debug("discard preallocation for inode %lu\n", inode->i_ino); - trace_mark(ext4_discard_preallocations, "dev %s ino %lu", sb->s_id, - inode->i_ino); + trace_ext4_discard_preallocations(inode); INIT_LIST_HEAD(&list); ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); + if (ac) { + ac->ac_sb = sb; + ac->ac_inode = inode; + } repeat: /* first, collect all pa's in the inode */ spin_lock(&ei->i_prealloc_lock); @@ -4276,6 +4273,8 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, INIT_LIST_HEAD(&discard_list); ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); + if (ac) + ac->ac_sb = sb; spin_lock(&lg->lg_prealloc_lock); list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order], @@ -4445,8 +4444,7 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) int ret; int freed = 0; - trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d", - sb->s_id, needed); + trace_ext4_mb_discard_preallocations(sb, needed); for (i = 0; i < ngroups && needed > 0; i++) { ret = ext4_mb_discard_group_preallocations(sb, i, needed); freed += ret; @@ -4475,17 +4473,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, sb = ar->inode->i_sb; sbi = EXT4_SB(sb); - trace_mark(ext4_request_blocks, "dev %s flags %u len %u ino %lu " - "lblk %llu goal %llu lleft %llu lright %llu " - "pleft %llu pright %llu ", - sb->s_id, ar->flags, ar->len, - ar->inode ? ar->inode->i_ino : 0, - (unsigned long long) ar->logical, - (unsigned long long) ar->goal, - (unsigned long long) ar->lleft, - (unsigned long long) ar->lright, - (unsigned long long) ar->pleft, - (unsigned long long) ar->pright); + trace_ext4_request_blocks(ar); /* * For delayed allocation, we could skip the ENOSPC and @@ -4521,7 +4509,10 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, } ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); - if (!ac) { + if (ac) { + ac->ac_sb = sb; + ac->ac_inode = ar->inode; + } else { ar->len = 0; *errp = -ENOMEM; goto out1; @@ -4594,18 +4585,7 @@ out3: reserv_blks); } - trace_mark(ext4_allocate_blocks, - "dev %s block %llu flags %u len %u ino %lu " - "logical %llu goal %llu lleft %llu lright %llu " - "pleft %llu pright %llu ", - sb->s_id, (unsigned long long) block, - ar->flags, ar->len, ar->inode ? ar->inode->i_ino : 0, - (unsigned long long) ar->logical, - (unsigned long long) ar->goal, - (unsigned long long) ar->lleft, - (unsigned long long) ar->lright, - (unsigned long long) ar->pleft, - (unsigned long long) ar->pright); + trace_ext4_allocate_blocks(ar, (unsigned long long)block); return block; } @@ -4740,10 +4720,7 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, } ext4_debug("freeing block %lu\n", block); - trace_mark(ext4_free_blocks, - "dev %s block %llu count %lu metadata %d ino %lu", - sb->s_id, (unsigned long long) block, count, metadata, - inode ? inode->i_ino : 0); + trace_ext4_free_blocks(inode, block, count, metadata); ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); if (ac) { diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 75e34f69215..c96bb19f58f 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -19,7 +19,6 @@ #include #include #include -#include #include #include "ext4_jbd2.h" #include "ext4.h" diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 012c4251397..e8f0b2af460 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include @@ -47,6 +46,9 @@ #include "xattr.h" #include "acl.h" +#define CREATE_TRACE_POINTS +#include + static int default_mb_history_length = 1000; module_param_named(default_mb_history_length, default_mb_history_length, @@ -3346,7 +3348,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait) int ret = 0; tid_t target; - trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); + trace_ext4_sync_fs(sb, wait); if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { if (wait) jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target); diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h new file mode 100644 index 00000000000..acf4cc9cd36 --- /dev/null +++ b/include/trace/events/ext4.h @@ -0,0 +1,719 @@ +#if !defined(_TRACE_EXT4_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_EXT4_H + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM ext4 + +#include +#include "../../../fs/ext4/ext4.h" +#include "../../../fs/ext4/mballoc.h" +#include + +TRACE_EVENT(ext4_free_inode, + TP_PROTO(struct inode *inode), + + TP_ARGS(inode), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( umode_t, mode ) + __field( uid_t, uid ) + __field( gid_t, gid ) + __field( blkcnt_t, blocks ) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->mode = inode->i_mode; + __entry->uid = inode->i_uid; + __entry->gid = inode->i_gid; + __entry->blocks = inode->i_blocks; + ), + + TP_printk("dev %s ino %lu mode %d uid %u gid %u blocks %llu", + jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->mode, + __entry->uid, __entry->gid, __entry->blocks) +); + +TRACE_EVENT(ext4_request_inode, + TP_PROTO(struct inode *dir, int mode), + + TP_ARGS(dir, mode), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, dir ) + __field( umode_t, mode ) + ), + + TP_fast_assign( + __entry->dev = dir->i_sb->s_dev; + __entry->dir = dir->i_ino; + __entry->mode = mode; + ), + + TP_printk("dev %s dir %lu mode %d", + jbd2_dev_to_name(__entry->dev), __entry->dir, __entry->mode) +); + +TRACE_EVENT(ext4_allocate_inode, + TP_PROTO(struct inode *inode, struct inode *dir, int mode), + + TP_ARGS(inode, dir, mode), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( ino_t, dir ) + __field( umode_t, mode ) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->dir = dir->i_ino; + __entry->mode = mode; + ), + + TP_printk("dev %s ino %lu dir %lu mode %d", + jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->dir, __entry->mode) +); + +TRACE_EVENT(ext4_write_begin, + + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int flags), + + TP_ARGS(inode, pos, len, flags), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( loff_t, pos ) + __field( unsigned int, len ) + __field( unsigned int, flags ) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->pos = pos; + __entry->len = len; + __entry->flags = flags; + ), + + TP_printk("dev %s ino %lu pos %llu len %u flags %u", + jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len, + __entry->flags) +); + +TRACE_EVENT(ext4_ordered_write_end, + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int copied), + + TP_ARGS(inode, pos, len, copied), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( loff_t, pos ) + __field( unsigned int, len ) + __field( unsigned int, copied ) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->pos = pos; + __entry->len = len; + __entry->copied = copied; + ), + + TP_printk("dev %s ino %lu pos %llu len %u copied %u", + jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len, + __entry->copied) +); + +TRACE_EVENT(ext4_writeback_write_end, + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int copied), + + TP_ARGS(inode, pos, len, copied), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( loff_t, pos ) + __field( unsigned int, len ) + __field( unsigned int, copied ) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->pos = pos; + __entry->len = len; + __entry->copied = copied; + ), + + TP_printk("dev %s ino %lu pos %llu len %u copied %u", + jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len, + __entry->copied) +); + +TRACE_EVENT(ext4_journalled_write_end, + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int copied), + TP_ARGS(inode, pos, len, copied), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( loff_t, pos ) + __field( unsigned int, len ) + __field( unsigned int, copied ) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->pos = pos; + __entry->len = len; + __entry->copied = copied; + ), + + TP_printk("dev %s ino %lu pos %llu len %u copied %u", + jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len, + __entry->copied) +); + +TRACE_EVENT(ext4_da_writepage, + TP_PROTO(struct inode *inode, struct page *page), + + TP_ARGS(inode, page), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( pgoff_t, index ) + + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->index = page->index; + ), + + TP_printk("dev %s ino %lu page_index %lu", + jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->index) +); + +TRACE_EVENT(ext4_da_writepages, + TP_PROTO(struct inode *inode, struct writeback_control *wbc), + + TP_ARGS(inode, wbc), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( long, nr_to_write ) + __field( long, pages_skipped ) + __field( loff_t, range_start ) + __field( loff_t, range_end ) + __field( char, nonblocking ) + __field( char, for_kupdate ) + __field( char, for_reclaim ) + __field( char, for_writepages ) + __field( char, range_cyclic ) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->nr_to_write = wbc->nr_to_write; + __entry->pages_skipped = wbc->pages_skipped; + __entry->range_start = wbc->range_start; + __entry->range_end = wbc->range_end; + __entry->nonblocking = wbc->nonblocking; + __entry->for_kupdate = wbc->for_kupdate; + __entry->for_reclaim = wbc->for_reclaim; + __entry->for_writepages = wbc->for_writepages; + __entry->range_cyclic = wbc->range_cyclic; + ), + + TP_printk("dev %s ino %lu nr_t_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d for_writepages %d range_cyclic %d", + jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->nr_to_write, + __entry->pages_skipped, __entry->range_start, + __entry->range_end, __entry->nonblocking, + __entry->for_kupdate, __entry->for_reclaim, + __entry->for_writepages, __entry->range_cyclic) +); + +TRACE_EVENT(ext4_da_writepages_result, + TP_PROTO(struct inode *inode, struct writeback_control *wbc, + int ret, int pages_written), + + TP_ARGS(inode, wbc, ret, pages_written), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( int, ret ) + __field( int, pages_written ) + __field( long, pages_skipped ) + __field( char, encountered_congestion ) + __field( char, more_io ) + __field( char, no_nrwrite_index_update ) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->ret = ret; + __entry->pages_written = pages_written; + __entry->pages_skipped = wbc->pages_skipped; + __entry->encountered_congestion = wbc->encountered_congestion; + __entry->more_io = wbc->more_io; + __entry->no_nrwrite_index_update = wbc->no_nrwrite_index_update; + ), + + TP_printk("dev %s ino %lu ret %d pages_written %d pages_skipped %ld congestion %d more_io %d no_nrwrite_index_update %d", + jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->ret, + __entry->pages_written, __entry->pages_skipped, + __entry->encountered_congestion, __entry->more_io, + __entry->no_nrwrite_index_update) +); + +TRACE_EVENT(ext4_da_write_begin, + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int flags), + + TP_ARGS(inode, pos, len, flags), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( loff_t, pos ) + __field( unsigned int, len ) + __field( unsigned int, flags ) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->pos = pos; + __entry->len = len; + __entry->flags = flags; + ), + + TP_printk("dev %s ino %lu pos %llu len %u flags %u", + jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len, + __entry->flags) +); + +TRACE_EVENT(ext4_da_write_end, + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int copied), + + TP_ARGS(inode, pos, len, copied), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( loff_t, pos ) + __field( unsigned int, len ) + __field( unsigned int, copied ) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->pos = pos; + __entry->len = len; + __entry->copied = copied; + ), + + TP_printk("dev %s ino %lu pos %llu len %u copied %u", + jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len, + __entry->copied) +); + +TRACE_EVENT(ext4_normal_writepage, + TP_PROTO(struct inode *inode, struct page *page), + + TP_ARGS(inode, page), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( pgoff_t, index ) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->index = page->index; + ), + + TP_printk("dev %s ino %lu page_index %lu", + jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->index) +); + +TRACE_EVENT(ext4_journalled_writepage, + TP_PROTO(struct inode *inode, struct page *page), + + TP_ARGS(inode, page), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( pgoff_t, index ) + + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->index = page->index; + ), + + TP_printk("dev %s ino %lu page_index %lu", + jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->index) +); + +TRACE_EVENT(ext4_discard_blocks, + TP_PROTO(struct super_block *sb, unsigned long long blk, + unsigned long long count), + + TP_ARGS(sb, blk, count), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( __u64, blk ) + __field( __u64, count ) + + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->blk = blk; + __entry->count = count; + ), + + TP_printk("dev %s blk %llu count %llu", + jbd2_dev_to_name(__entry->dev), __entry->blk, __entry->count) +); + +TRACE_EVENT(ext4_mb_new_inode_pa, + TP_PROTO(struct ext4_allocation_context *ac, + struct ext4_prealloc_space *pa), + + TP_ARGS(ac, pa), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( __u64, pa_pstart ) + __field( __u32, pa_len ) + __field( __u64, pa_lstart ) + + ), + + TP_fast_assign( + __entry->dev = ac->ac_sb->s_dev; + __entry->ino = ac->ac_inode->i_ino; + __entry->pa_pstart = pa->pa_pstart; + __entry->pa_len = pa->pa_len; + __entry->pa_lstart = pa->pa_lstart; + ), + + TP_printk("dev %s ino %lu pstart %llu len %u lstart %llu", + jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pa_pstart, + __entry->pa_len, __entry->pa_lstart) +); + +TRACE_EVENT(ext4_mb_new_group_pa, + TP_PROTO(struct ext4_allocation_context *ac, + struct ext4_prealloc_space *pa), + + TP_ARGS(ac, pa), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( __u64, pa_pstart ) + __field( __u32, pa_len ) + __field( __u64, pa_lstart ) + + ), + + TP_fast_assign( + __entry->dev = ac->ac_sb->s_dev; + __entry->ino = ac->ac_inode->i_ino; + __entry->pa_pstart = pa->pa_pstart; + __entry->pa_len = pa->pa_len; + __entry->pa_lstart = pa->pa_lstart; + ), + + TP_printk("dev %s ino %lu pstart %llu len %u lstart %llu", + jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pa_pstart, + __entry->pa_len, __entry->pa_lstart) +); + +TRACE_EVENT(ext4_mb_release_inode_pa, + TP_PROTO(struct ext4_allocation_context *ac, + struct ext4_prealloc_space *pa, + unsigned long long block, unsigned int count), + + TP_ARGS(ac, pa, block, count), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( __u64, block ) + __field( __u32, count ) + + ), + + TP_fast_assign( + __entry->dev = ac->ac_sb->s_dev; + __entry->ino = ac->ac_inode->i_ino; + __entry->block = block; + __entry->count = count; + ), + + TP_printk("dev %s ino %lu block %llu count %u", + jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->block, + __entry->count) +); + +TRACE_EVENT(ext4_mb_release_group_pa, + TP_PROTO(struct ext4_allocation_context *ac, + struct ext4_prealloc_space *pa), + + TP_ARGS(ac, pa), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( __u64, pa_pstart ) + __field( __u32, pa_len ) + + ), + + TP_fast_assign( + __entry->dev = ac->ac_sb->s_dev; + __entry->ino = ac->ac_inode->i_ino; + __entry->pa_pstart = pa->pa_pstart; + __entry->pa_len = pa->pa_len; + ), + + TP_printk("dev %s pstart %llu len %u", + jbd2_dev_to_name(__entry->dev), __entry->pa_pstart, __entry->pa_len) +); + +TRACE_EVENT(ext4_discard_preallocations, + TP_PROTO(struct inode *inode), + + TP_ARGS(inode), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + ), + + TP_printk("dev %s ino %lu", + jbd2_dev_to_name(__entry->dev), __entry->ino) +); + +TRACE_EVENT(ext4_mb_discard_preallocations, + TP_PROTO(struct super_block *sb, int needed), + + TP_ARGS(sb, needed), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( int, needed ) + + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->needed = needed; + ), + + TP_printk("dev %s needed %d", + jbd2_dev_to_name(__entry->dev), __entry->needed) +); + +TRACE_EVENT(ext4_request_blocks, + TP_PROTO(struct ext4_allocation_request *ar), + + TP_ARGS(ar), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( unsigned int, flags ) + __field( unsigned int, len ) + __field( __u64, logical ) + __field( __u64, goal ) + __field( __u64, lleft ) + __field( __u64, lright ) + __field( __u64, pleft ) + __field( __u64, pright ) + ), + + TP_fast_assign( + __entry->dev = ar->inode->i_sb->s_dev; + __entry->ino = ar->inode->i_ino; + __entry->flags = ar->flags; + __entry->len = ar->len; + __entry->logical = ar->logical; + __entry->goal = ar->goal; + __entry->lleft = ar->lleft; + __entry->lright = ar->lright; + __entry->pleft = ar->pleft; + __entry->pright = ar->pright; + ), + + TP_printk("dev %s ino %lu flags %u len %u lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ", + jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->flags, + __entry->len, + (unsigned long long) __entry->logical, + (unsigned long long) __entry->goal, + (unsigned long long) __entry->lleft, + (unsigned long long) __entry->lright, + (unsigned long long) __entry->pleft, + (unsigned long long) __entry->pright) +); + +TRACE_EVENT(ext4_allocate_blocks, + TP_PROTO(struct ext4_allocation_request *ar, unsigned long long block), + + TP_ARGS(ar, block), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( __u64, block ) + __field( unsigned int, flags ) + __field( unsigned int, len ) + __field( __u64, logical ) + __field( __u64, goal ) + __field( __u64, lleft ) + __field( __u64, lright ) + __field( __u64, pleft ) + __field( __u64, pright ) + ), + + TP_fast_assign( + __entry->dev = ar->inode->i_sb->s_dev; + __entry->ino = ar->inode->i_ino; + __entry->block = block; + __entry->flags = ar->flags; + __entry->len = ar->len; + __entry->logical = ar->logical; + __entry->goal = ar->goal; + __entry->lleft = ar->lleft; + __entry->lright = ar->lright; + __entry->pleft = ar->pleft; + __entry->pright = ar->pright; + ), + + TP_printk("dev %s ino %lu flags %u len %u block %llu lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ", + jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->flags, + __entry->len, __entry->block, + (unsigned long long) __entry->logical, + (unsigned long long) __entry->goal, + (unsigned long long) __entry->lleft, + (unsigned long long) __entry->lright, + (unsigned long long) __entry->pleft, + (unsigned long long) __entry->pright) +); + +TRACE_EVENT(ext4_free_blocks, + TP_PROTO(struct inode *inode, __u64 block, unsigned long count, + int metadata), + + TP_ARGS(inode, block, count, metadata), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( __u64, block ) + __field( unsigned long, count ) + __field( int, metadata ) + + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->block = block; + __entry->count = count; + __entry->metadata = metadata; + ), + + TP_printk("dev %s ino %lu block %llu count %lu metadata %d", + jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->block, + __entry->count, __entry->metadata) +); + +TRACE_EVENT(ext4_sync_file, + TP_PROTO(struct file *file, struct dentry *dentry, int datasync), + + TP_ARGS(file, dentry, datasync), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( ino_t, parent ) + __field( int, datasync ) + ), + + TP_fast_assign( + __entry->dev = dentry->d_inode->i_sb->s_dev; + __entry->ino = dentry->d_inode->i_ino; + __entry->datasync = datasync; + __entry->parent = dentry->d_parent->d_inode->i_ino; + ), + + TP_printk("dev %s ino %ld parent %ld datasync %d ", + jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->parent, + __entry->datasync) +); + +TRACE_EVENT(ext4_sync_fs, + TP_PROTO(struct super_block *sb, int wait), + + TP_ARGS(sb, wait), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( int, wait ) + + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->wait = wait; + ), + + TP_printk("dev %s wait %d", jbd2_dev_to_name(__entry->dev), + __entry->wait) +); + +#endif /* _TRACE_EXT4_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3-70-g09d2 From e686307fdc84f249490e6c9da92fcb2424491f14 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 17 Apr 2009 08:41:21 +0200 Subject: loop: use BIO list management functions Now that the bio list management stuff is generic, convert loop to use bio lists instead of its own private bio list implementation. Cc: Jens Axboe Cc: Christoph Hellwig Signed-off-by: Akinobu Mita Signed-off-by: Jens Axboe --- drivers/block/loop.c | 26 +++++++------------------- include/linux/bio.h | 2 +- include/linux/loop.h | 3 +-- 3 files changed, 9 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index ddae8082589..9ca4bb01465 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -511,11 +511,7 @@ out: */ static void loop_add_bio(struct loop_device *lo, struct bio *bio) { - if (lo->lo_biotail) { - lo->lo_biotail->bi_next = bio; - lo->lo_biotail = bio; - } else - lo->lo_bio = lo->lo_biotail = bio; + bio_list_add(&lo->lo_bio_list, bio); } /* @@ -523,16 +519,7 @@ static void loop_add_bio(struct loop_device *lo, struct bio *bio) */ static struct bio *loop_get_bio(struct loop_device *lo) { - struct bio *bio; - - if ((bio = lo->lo_bio)) { - if (bio == lo->lo_biotail) - lo->lo_biotail = NULL; - lo->lo_bio = bio->bi_next; - bio->bi_next = NULL; - } - - return bio; + return bio_list_pop(&lo->lo_bio_list); } static int loop_make_request(struct request_queue *q, struct bio *old_bio) @@ -609,12 +596,13 @@ static int loop_thread(void *data) set_user_nice(current, -20); - while (!kthread_should_stop() || lo->lo_bio) { + while (!kthread_should_stop() || !bio_list_empty(&lo->lo_bio_list)) { wait_event_interruptible(lo->lo_event, - lo->lo_bio || kthread_should_stop()); + !bio_list_empty(&lo->lo_bio_list) || + kthread_should_stop()); - if (!lo->lo_bio) + if (bio_list_empty(&lo->lo_bio_list)) continue; spin_lock_irq(&lo->lo_lock); bio = loop_get_bio(lo); @@ -841,7 +829,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, lo->old_gfp_mask = mapping_gfp_mask(mapping); mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); - lo->lo_bio = lo->lo_biotail = NULL; + bio_list_init(&lo->lo_bio_list); /* * set queue make_request_fn, and add limits based on lower level diff --git a/include/linux/bio.h b/include/linux/bio.h index 7b214fd672a..f37ca8c726b 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -506,7 +506,7 @@ static inline int bio_has_data(struct bio *bio) } /* - * BIO list managment for use by remapping drivers (e.g. DM or MD). + * BIO list management for use by remapping drivers (e.g. DM or MD) and loop. * * A bio_list anchors a singly-linked list of bios chained through the bi_next * member of the bio. The bio_list also caches the last list member to allow diff --git a/include/linux/loop.h b/include/linux/loop.h index 40725447f5e..66c194e2d9b 100644 --- a/include/linux/loop.h +++ b/include/linux/loop.h @@ -56,8 +56,7 @@ struct loop_device { gfp_t old_gfp_mask; spinlock_t lo_lock; - struct bio *lo_bio; - struct bio *lo_biotail; + struct bio_list lo_bio_list; int lo_state; struct mutex lo_ctl_mutex; struct task_struct *lo_thread; -- cgit v1.2.3-70-g09d2 From 214ae19104141404f18ad6651752eb38e3dd584e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 07:00:41 +0900 Subject: ide kill unused ide_cmd->special Impact: removal of unused field No one uses ide_cmd->special anymore. Kill it. Signed-off-by: Tejun Heo --- include/linux/ide.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/ide.h b/include/linux/ide.h index ff65fffb078..846a1e13240 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -324,7 +324,6 @@ struct ide_cmd { unsigned int cursg_ofs; struct request *rq; /* copy of request */ - void *special; /* valid_t generally */ }; /* ATAPI packet command flags */ -- cgit v1.2.3-70-g09d2 From e69d800f7e8797a8e3423380ee9d8ca1cb90c388 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 19 Apr 2009 07:00:42 +0900 Subject: ide: add helpers for preparing sense requests This is in preparation of removing the queueing of a sense request out of the IRQ handler path. Use struct request_sense as a general sense buffer for all ATAPI devices ide-{floppy,tape,cd}. tj: * blk_get_request(__GFP_WAIT) can't be called from do_request() as it can cause deadlock. Converted to use inline struct request and blk_rq_init(). * Added xfer / cdb len selection depending on device type. * All sense prep logics folded into ide_prep_sense() which never fails. * hwif->rq clearing and sense_rq used handling moved into ide_queue_sense_rq(). * blk_rq_map_kern() conversion is moved to later patch. CC: Bartlomiej Zolnierkiewicz CC: FUJITA Tomonori Signed-off-by: Borislav Petkov Signed-off-by: Tejun Heo --- drivers/ide/ide-atapi.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/ide.h | 11 +++++++++ 2 files changed, 72 insertions(+) (limited to 'include') diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index 2894577237b..c6e03485a63 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -191,6 +191,67 @@ void ide_create_request_sense_cmd(ide_drive_t *drive, struct ide_atapi_pc *pc) } EXPORT_SYMBOL_GPL(ide_create_request_sense_cmd); +void ide_prep_sense(ide_drive_t *drive, struct request *rq) +{ + struct request_sense *sense = &drive->sense_data; + struct request *sense_rq = &drive->sense_rq; + unsigned int cmd_len, sense_len; + + debug_log("%s: enter\n", __func__); + + switch (drive->media) { + case ide_floppy: + cmd_len = 255; + sense_len = 18; + break; + case ide_tape: + cmd_len = 20; + sense_len = 20; + break; + default: + cmd_len = 18; + sense_len = 18; + } + + BUG_ON(sense_len > sizeof(*sense)); + + if (blk_sense_request(rq) || drive->sense_rq_armed) + return; + + memset(sense, 0, sizeof(*sense)); + + blk_rq_init(rq->q, sense_rq); + sense_rq->rq_disk = rq->rq_disk; + + sense_rq->data = sense; + sense_rq->cmd[0] = GPCMD_REQUEST_SENSE; + sense_rq->cmd[4] = cmd_len; + sense_rq->data_len = sense_len; + + sense_rq->cmd_type = REQ_TYPE_SENSE; + sense_rq->cmd_flags |= REQ_PREEMPT; + + if (drive->media == ide_tape) + sense_rq->cmd[13] = REQ_IDETAPE_PC1; + + drive->sense_rq_armed = true; +} +EXPORT_SYMBOL_GPL(ide_prep_sense); + +void ide_queue_sense_rq(ide_drive_t *drive, void *special) +{ + BUG_ON(!drive->sense_rq_armed); + + drive->sense_rq.special = special; + drive->sense_rq_armed = false; + + drive->hwif->rq = NULL; + + elv_add_request(drive->queue, &drive->sense_rq, + ELEVATOR_INSERT_FRONT, 0); +} +EXPORT_SYMBOL_GPL(ide_queue_sense_rq); + /* * Called when an error was detected during the last packet command. * We queue a request sense packet command in the head of the request list. diff --git a/include/linux/ide.h b/include/linux/ide.h index 846a1e13240..a69ccac5641 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -26,6 +26,9 @@ #include #include +/* for request_sense */ +#include + #if defined(CONFIG_CRIS) || defined(CONFIG_FRV) || defined(CONFIG_MN10300) # define SUPPORT_VLB_SYNC 0 #else @@ -602,6 +605,11 @@ struct ide_drive_s { struct ide_atapi_pc request_sense_pc; struct request request_sense_rq; + + /* current sense rq and buffer */ + bool sense_rq_armed; + struct request sense_rq; + struct request_sense sense_data; }; typedef struct ide_drive_s ide_drive_t; @@ -1175,6 +1183,9 @@ int ide_set_media_lock(ide_drive_t *, struct gendisk *, int); void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *); void ide_retry_pc(ide_drive_t *, struct gendisk *); +void ide_prep_sense(ide_drive_t *drive, struct request *rq); +void ide_queue_sense_rq(ide_drive_t *drive, void *special); + int ide_cd_expiry(ide_drive_t *); int ide_cd_get_xferlen(struct request *); -- cgit v1.2.3-70-g09d2 From 068753203e6cd085664a62e0fc0636e19b148a12 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 19 Apr 2009 07:00:42 +0900 Subject: ide-atapi: convert ide-{floppy,tape} to using preallocated sense buffer Since we're issuing REQ_TYPE_SENSE now we need to allow those types of rqs in the ->do_request callbacks. As a future improvement, sense_len assignment might be unified across all ATAPI devices. Borislav to check with specs and test. As a result, get rid of ide_queue_pc_head() and drive->request_sense_rq. tj: * Init request sense ide_atapi_pc from sense request. In the longer timer, it would probably better to fold ide_create_request_sense_cmd() into its only current user - ide_floppy_get_format_progress(). * ide_retry_pc() no longer takes @disk. CC: Bartlomiej Zolnierkiewicz CC: FUJITA Tomonori Signed-off-by: Borislav Petkov Signed-off-by: Tejun Heo --- drivers/ide/ide-atapi.c | 48 ++++++++++++++---------------------------------- drivers/ide/ide-floppy.c | 4 +++- drivers/ide/ide-tape.c | 7 +++++-- include/linux/ide.h | 3 +-- 4 files changed, 23 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index c6e03485a63..972c522516f 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -79,34 +79,6 @@ void ide_init_pc(struct ide_atapi_pc *pc) } EXPORT_SYMBOL_GPL(ide_init_pc); -/* - * Generate a new packet command request in front of the request queue, before - * the current request, so that it will be processed immediately, on the next - * pass through the driver. - */ -static void ide_queue_pc_head(ide_drive_t *drive, struct gendisk *disk, - struct ide_atapi_pc *pc, struct request *rq) -{ - blk_rq_init(NULL, rq); - rq->cmd_type = REQ_TYPE_SPECIAL; - rq->cmd_flags |= REQ_PREEMPT; - rq->special = (char *)pc; - rq->rq_disk = disk; - - if (pc->req_xfer) { - rq->data = pc->buf; - rq->data_len = pc->req_xfer; - } - - memcpy(rq->cmd, pc->c, 12); - if (drive->media == ide_tape) - rq->cmd[13] = REQ_IDETAPE_PC1; - - drive->hwif->rq = NULL; - - elv_add_request(drive->queue, rq, ELEVATOR_INSERT_FRONT, 0); -} - /* * Add a special packet command request to the tail of the request queue, * and wait for it to be serviced. @@ -254,18 +226,26 @@ EXPORT_SYMBOL_GPL(ide_queue_sense_rq); /* * Called when an error was detected during the last packet command. - * We queue a request sense packet command in the head of the request list. + * We queue a request sense packet command at the head of the request + * queue. */ -void ide_retry_pc(ide_drive_t *drive, struct gendisk *disk) +void ide_retry_pc(ide_drive_t *drive) { - struct request *rq = &drive->request_sense_rq; + struct request *sense_rq = &drive->sense_rq; struct ide_atapi_pc *pc = &drive->request_sense_pc; (void)ide_read_error(drive); - ide_create_request_sense_cmd(drive, pc); + + /* init pc from sense_rq */ + ide_init_pc(pc); + memcpy(pc->c, sense_rq->cmd, 12); + pc->buf = sense_rq->data; + pc->req_xfer = sense_rq->data_len; + if (drive->media == ide_tape) set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags); - ide_queue_pc_head(drive, disk, pc, rq); + + ide_queue_sense_rq(drive, pc); } EXPORT_SYMBOL_GPL(ide_retry_pc); @@ -404,7 +384,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) debug_log("[cmd %x]: check condition\n", rq->cmd[0]); /* Retry operation */ - ide_retry_pc(drive, rq->rq_disk); + ide_retry_pc(drive); /* queued, but not started */ return ide_stopped; diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index 94600331a27..d3302cc891e 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -263,7 +263,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, } pc = &floppy->queued_pc; idefloppy_create_rw_cmd(drive, pc, rq, (unsigned long)block); - } else if (blk_special_request(rq)) { + } else if (blk_special_request(rq) || blk_sense_request(rq)) { pc = (struct ide_atapi_pc *)rq->special; } else if (blk_pc_request(rq)) { pc = &floppy->queued_pc; @@ -273,6 +273,8 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, goto out_end; } + ide_prep_sense(drive, rq); + memset(&cmd, 0, sizeof(cmd)); if (rq_data_dir(rq)) diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index aadf53cfac6..8324dfa78a3 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -695,7 +695,7 @@ static ide_startstop_t idetape_media_access_finished(ide_drive_t *drive) printk(KERN_ERR "ide-tape: %s: I/O error, ", tape->name); /* Retry operation */ - ide_retry_pc(drive, tape->disk); + ide_retry_pc(drive); return ide_stopped; } pc->error = 0; @@ -752,7 +752,7 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, (unsigned long long)rq->sector, rq->nr_sectors, rq->current_nr_sectors); - if (!blk_special_request(rq)) { + if (!(blk_special_request(rq) || blk_sense_request(rq))) { /* We do not support buffer cache originated requests. */ printk(KERN_NOTICE "ide-tape: %s: Unsupported request in " "request queue (%d)\n", drive->name, rq->cmd_type); @@ -840,6 +840,9 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, BUG(); out: + /* prepare sense request for this command */ + ide_prep_sense(drive, rq); + memset(&cmd, 0, sizeof(cmd)); if (rq_data_dir(rq)) diff --git a/include/linux/ide.h b/include/linux/ide.h index a69ccac5641..9e67ccac3c1 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -604,7 +604,6 @@ struct ide_drive_s { unsigned long atapi_flags; struct ide_atapi_pc request_sense_pc; - struct request request_sense_rq; /* current sense rq and buffer */ bool sense_rq_armed; @@ -1181,7 +1180,7 @@ int ide_do_test_unit_ready(ide_drive_t *, struct gendisk *); int ide_do_start_stop(ide_drive_t *, struct gendisk *, int); int ide_set_media_lock(ide_drive_t *, struct gendisk *, int); void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *); -void ide_retry_pc(ide_drive_t *, struct gendisk *); +void ide_retry_pc(ide_drive_t *drive); void ide_prep_sense(ide_drive_t *drive, struct request *rq); void ide_queue_sense_rq(ide_drive_t *drive, void *special); -- cgit v1.2.3-70-g09d2 From 02e7cf8f848841ca21864ccd019e480b73c323b7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 07:00:42 +0900 Subject: ide-cd,atapi: use bio for internal commands Impact: unify request data buffer handling rq->data is used mostly to pass kernel buffer through request queue without using bio. There are only a couple of places which still do this in kernel and converting to bio isn't difficult. This patch converts ide-cd and atapi to use bio instead of rq->data for request sense and internal pc commands. With previous change to unify sense request handling, this is relatively easily achieved by adding blk_rq_map_kern() during sense_rq prep and PC issue. If blk_rq_map_kern() fails for sense, the error is deferred till sense issue and aborts the failed command which triggered the sense. Note that this is a slim possibility as sense prep is done on each command issue, so for the above condition to actually trigger, all preps since the last sense issue till the issue of the request which would require a sense should fail. * do_request functions might sleep now. This should be okay as ide request_fn - do_ide_request() - is invoked only from make_request and plug work. Make sure this is the case by adding might_sleep() to do_ide_request(). * Functions which access the read sense data before the sense request is complete now should access bio_data(sense_rq->bio) as the sense buffer might have been copied during blk_rq_map_kern(). * ide-tape updated to map sg. * cdrom_do_block_pc() now doesn't have to deal with REQ_TYPE_ATA_PC special case. Simplified. * tp_ops->output/input_data path dropped from ide_pc_intr(). Signed-off-by: Tejun Heo --- drivers/ide/ide-atapi.c | 52 +++++++++++++++++++++++++++++-------------------- drivers/ide/ide-cd.c | 28 +++++++++++++------------- drivers/ide/ide-io.c | 3 +++ drivers/ide/ide-tape.c | 3 +++ include/linux/ide.h | 2 +- 5 files changed, 52 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index 972c522516f..5cefe12f562 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -94,16 +94,18 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk, rq->special = (char *)pc; if (pc->req_xfer) { - rq->data = pc->buf; - rq->data_len = pc->req_xfer; + error = blk_rq_map_kern(drive->queue, rq, pc->buf, pc->req_xfer, + GFP_NOIO); + if (error) + goto put_req; } memcpy(rq->cmd, pc->c, 12); if (drive->media == ide_tape) rq->cmd[13] = REQ_IDETAPE_PC1; error = blk_execute_rq(drive->queue, disk, rq, 0); +put_req: blk_put_request(rq); - return error; } EXPORT_SYMBOL_GPL(ide_queue_pc_tail); @@ -168,6 +170,7 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq) struct request_sense *sense = &drive->sense_data; struct request *sense_rq = &drive->sense_rq; unsigned int cmd_len, sense_len; + int err; debug_log("%s: enter\n", __func__); @@ -193,13 +196,19 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq) memset(sense, 0, sizeof(*sense)); blk_rq_init(rq->q, sense_rq); - sense_rq->rq_disk = rq->rq_disk; - sense_rq->data = sense; + err = blk_rq_map_kern(drive->queue, sense_rq, sense, sense_len, + GFP_NOIO); + if (unlikely(err)) { + if (printk_ratelimit()) + printk(KERN_WARNING "%s: failed to map sense buffer\n", + drive->name); + return; + } + + sense_rq->rq_disk = rq->rq_disk; sense_rq->cmd[0] = GPCMD_REQUEST_SENSE; sense_rq->cmd[4] = cmd_len; - sense_rq->data_len = sense_len; - sense_rq->cmd_type = REQ_TYPE_SENSE; sense_rq->cmd_flags |= REQ_PREEMPT; @@ -210,9 +219,14 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq) } EXPORT_SYMBOL_GPL(ide_prep_sense); -void ide_queue_sense_rq(ide_drive_t *drive, void *special) +int ide_queue_sense_rq(ide_drive_t *drive, void *special) { - BUG_ON(!drive->sense_rq_armed); + /* deferred failure from ide_prep_sense() */ + if (!drive->sense_rq_armed) { + printk(KERN_WARNING "%s: failed queue sense request\n", + drive->name); + return -ENOMEM; + } drive->sense_rq.special = special; drive->sense_rq_armed = false; @@ -221,6 +235,7 @@ void ide_queue_sense_rq(ide_drive_t *drive, void *special) elv_add_request(drive->queue, &drive->sense_rq, ELEVATOR_INSERT_FRONT, 0); + return 0; } EXPORT_SYMBOL_GPL(ide_queue_sense_rq); @@ -239,13 +254,14 @@ void ide_retry_pc(ide_drive_t *drive) /* init pc from sense_rq */ ide_init_pc(pc); memcpy(pc->c, sense_rq->cmd, 12); - pc->buf = sense_rq->data; + pc->buf = bio_data(sense_rq->bio); /* pointer to mapped address */ pc->req_xfer = sense_rq->data_len; if (drive->media == ide_tape) set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags); - ide_queue_sense_rq(drive, pc); + if (ide_queue_sense_rq(drive, pc)) + ide_complete_rq(drive, -EIO, blk_rq_bytes(drive->hwif->rq)); } EXPORT_SYMBOL_GPL(ide_retry_pc); @@ -317,7 +333,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) struct ide_cmd *cmd = &hwif->cmd; struct request *rq = hwif->rq; const struct ide_tp_ops *tp_ops = hwif->tp_ops; - xfer_func_t *xferfunc; unsigned int timeout, done; u16 bcount; u8 stat, ireason, dsc = 0; @@ -411,7 +426,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) rq->errors = -EIO; } - if (drive->media == ide_tape) + if (drive->media == ide_tape && !rq->bio) done = ide_rq_bytes(rq); /* FIXME */ else done = blk_rq_bytes(rq); @@ -448,16 +463,11 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) return ide_do_reset(drive); } - xferfunc = write ? tp_ops->output_data : tp_ops->input_data; - - if (drive->media == ide_floppy && pc->buf == NULL) { + if (drive->media == ide_tape && pc->bh) + done = drive->pc_io_buffers(drive, pc, bcount, write); + else { done = min_t(unsigned int, bcount, cmd->nleft); ide_pio_bytes(drive, cmd, write, done); - } else if (drive->media == ide_tape && pc->bh) { - done = drive->pc_io_buffers(drive, pc, bcount, write); - } else { - done = min_t(unsigned int, bcount, pc->req_xfer - pc->xferred); - xferfunc(drive, NULL, pc->cur_pos, done); } /* Update the current position */ diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 061d7bbcd34..673628790f1 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -210,10 +210,12 @@ static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq) { /* * For REQ_TYPE_SENSE, "rq->special" points to the original - * failed request + * failed request. Also, the sense data should be read + * directly from rq which might be different from the original + * sense buffer if it got copied during mapping. */ struct request *failed = (struct request *)rq->special; - struct request_sense *sense = &drive->sense_data; + void *sense = bio_data(rq->bio); if (failed) { if (failed->sense) { @@ -398,7 +400,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) /* if we got a CHECK_CONDITION status, queue a request sense command */ if (stat & ATA_ERR) - ide_queue_sense_rq(drive, NULL); + return ide_queue_sense_rq(drive, NULL) ? 2 : 1; return 1; end_request: @@ -412,8 +414,7 @@ end_request: hwif->rq = NULL; - ide_queue_sense_rq(drive, rq); - return 1; + return ide_queue_sense_rq(drive, rq) ? 2 : 1; } else return 2; } @@ -507,8 +508,12 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd, rq->cmd_flags |= cmd_flags; rq->timeout = timeout; if (buffer) { - rq->data = buffer; - rq->data_len = *bufflen; + error = blk_rq_map_kern(drive->queue, rq, buffer, + *bufflen, GFP_NOIO); + if (error) { + blk_put_request(rq); + return error; + } } error = blk_execute_rq(drive->queue, info->disk, rq, 0); @@ -802,15 +807,10 @@ static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq) drive->dma = 0; /* sg request */ - if (rq->bio || ((rq->cmd_type == REQ_TYPE_ATA_PC) && rq->data_len)) { + if (rq->bio) { struct request_queue *q = drive->queue; + char *buf = bio_data(rq->bio); unsigned int alignment; - char *buf; - - if (rq->bio) - buf = bio_data(rq->bio); - else - buf = rq->data; drive->dma = !!(drive->dev_flags & IDE_DFLAG_USING_DMA); diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 9b9e8b1aae5..3245c2dbda3 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -481,6 +481,9 @@ void do_ide_request(struct request_queue *q) spin_unlock_irq(q->queue_lock); + /* HLD do_request() callback might sleep, make sure it's okay */ + might_sleep(); + if (ide_lock_host(host, hwif)) goto plug_device_2; diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 8324dfa78a3..9b762a2d5d9 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -850,6 +850,9 @@ out: cmd.rq = rq; + ide_init_sg_cmd(&cmd, pc->req_xfer); + ide_map_sg(drive, &cmd); + return ide_tape_issue_pc(drive, &cmd, pc); } diff --git a/include/linux/ide.h b/include/linux/ide.h index 9e67ccac3c1..1957461ac76 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1183,7 +1183,7 @@ void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *); void ide_retry_pc(ide_drive_t *drive); void ide_prep_sense(ide_drive_t *drive, struct request *rq); -void ide_queue_sense_rq(ide_drive_t *drive, void *special); +int ide_queue_sense_rq(ide_drive_t *drive, void *special); int ide_cd_expiry(ide_drive_t *); -- cgit v1.2.3-70-g09d2 From 29d1a4371035e01b0d079bc5aa88b50f5af7a566 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 08:46:03 +0900 Subject: ide-atapi: kill unused fields and callbacks Impact: remove fields and code paths which are no longer necessary Now that ide-tape uses standard mechanisms to transfer data, special case handling for bh handling can be dropped from ide-atapi. Drop the followings. * pc->cur_pos, b_count, bh and b_data * drive->pc_update_buffers() and pc_io_buffers(). Signed-off-by: Tejun Heo --- drivers/ide/ide-atapi.c | 17 ++++------------- drivers/ide/ide-tape.c | 1 - include/linux/ide.h | 12 ------------ 3 files changed, 4 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index b9dd4503cbc..afe5a432387 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -359,11 +359,8 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) drive->name, rq_data_dir(pc->rq) ? "write" : "read"); pc->flags |= PC_FLAG_DMA_ERROR; - } else { + } else pc->xferred = pc->req_xfer; - if (drive->pc_update_buffers) - drive->pc_update_buffers(drive, pc); - } debug_log("%s: DMA finished\n", drive->name); } @@ -463,16 +460,11 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) return ide_do_reset(drive); } - if (drive->media == ide_tape && pc->bh) - done = drive->pc_io_buffers(drive, pc, bcount, write); - else { - done = min_t(unsigned int, bcount, cmd->nleft); - ide_pio_bytes(drive, cmd, write, done); - } + done = min_t(unsigned int, bcount, cmd->nleft); + ide_pio_bytes(drive, cmd, write, done); - /* Update the current position */ + /* Update transferred byte count */ pc->xferred += done; - pc->cur_pos += done; bcount -= done; @@ -650,7 +642,6 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, struct ide_cmd *cmd) /* We haven't transferred any data yet */ pc->xferred = 0; - pc->cur_pos = pc->buf; valid_tf = IDE_VALID_DEVICE; bcount = ((drive->media == ide_tape) ? diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 2599579e417..8dfc68892d6 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -591,7 +591,6 @@ static void ide_tape_create_rw_cmd(idetape_tape_t *tape, ide_init_pc(pc); put_unaligned(cpu_to_be32(length), (unsigned int *) &pc->c[1]); pc->c[1] = 1; - pc->bh = NULL; pc->buf = NULL; pc->buf_size = length * tape->blk_size; pc->req_xfer = pc->buf_size; diff --git a/include/linux/ide.h b/include/linux/ide.h index 1957461ac76..34c128f0a33 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -362,11 +362,7 @@ struct ide_atapi_pc { /* data buffer */ u8 *buf; - /* current buffer position */ - u8 *cur_pos; int buf_size; - /* missing/available data on the current buffer */ - int b_count; /* the corresponding request */ struct request *rq; @@ -379,10 +375,6 @@ struct ide_atapi_pc { */ u8 pc_buf[IDE_PC_BUFFER_SIZE]; - /* idetape only */ - struct idetape_bh *bh; - char *b_data; - unsigned long timeout; }; @@ -595,10 +587,6 @@ struct ide_drive_s { /* callback for packet commands */ int (*pc_callback)(struct ide_drive_s *, int); - void (*pc_update_buffers)(struct ide_drive_s *, struct ide_atapi_pc *); - int (*pc_io_buffers)(struct ide_drive_s *, struct ide_atapi_pc *, - unsigned int, int); - ide_startstop_t (*irq_handler)(struct ide_drive_s *); unsigned long atapi_flags; -- cgit v1.2.3-70-g09d2 From a7f557923441186a3cdbabc54f1bcacf42b63bf5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 23 Apr 2009 11:05:17 +0900 Subject: block: kill blk_start_queueing() blk_start_queueing() is identical to __blk_run_queue() except that it doesn't check for recursion. None of the current users depends on blk_start_queueing() running request_fn directly. Replace usages of blk_start_queueing() with [__]blk_run_queue() and kill it. [ Impact: removal of mostly duplicate interface function ] Signed-off-by: Tejun Heo --- block/as-iosched.c | 6 +----- block/blk-core.c | 28 ++-------------------------- block/cfq-iosched.c | 6 +++--- block/elevator.c | 7 +++---- include/linux/blkdev.h | 1 - 5 files changed, 9 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/block/as-iosched.c b/block/as-iosched.c index c48fa670d22..45bd07059c2 100644 --- a/block/as-iosched.c +++ b/block/as-iosched.c @@ -1312,12 +1312,8 @@ static void as_merged_requests(struct request_queue *q, struct request *req, static void as_work_handler(struct work_struct *work) { struct as_data *ad = container_of(work, struct as_data, antic_work); - struct request_queue *q = ad->q; - unsigned long flags; - spin_lock_irqsave(q->queue_lock, flags); - blk_start_queueing(q); - spin_unlock_irqrestore(q->queue_lock, flags); + blk_run_queue(ad->q); } static int as_may_queue(struct request_queue *q, int rw) diff --git a/block/blk-core.c b/block/blk-core.c index 02f53bc00e4..8b4a0af7d69 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -433,9 +433,7 @@ EXPORT_SYMBOL(__blk_run_queue); * * Description: * Invoke request handling on this queue, if it has pending work to do. - * May be used to restart queueing when a request has completed. Also - * See @blk_start_queueing. - * + * May be used to restart queueing when a request has completed. */ void blk_run_queue(struct request_queue *q) { @@ -894,28 +892,6 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) } EXPORT_SYMBOL(blk_get_request); -/** - * blk_start_queueing - initiate dispatch of requests to device - * @q: request queue to kick into gear - * - * This is basically a helper to remove the need to know whether a queue - * is plugged or not if someone just wants to initiate dispatch of requests - * for this queue. Should be used to start queueing on a device outside - * of ->request_fn() context. Also see @blk_run_queue. - * - * The queue lock must be held with interrupts disabled. - */ -void blk_start_queueing(struct request_queue *q) -{ - if (!blk_queue_plugged(q)) { - if (unlikely(blk_queue_stopped(q))) - return; - q->request_fn(q); - } else - __generic_unplug_device(q); -} -EXPORT_SYMBOL(blk_start_queueing); - /** * blk_requeue_request - put a request back on queue * @q: request queue where request should be inserted @@ -984,7 +960,7 @@ void blk_insert_request(struct request_queue *q, struct request *rq, drive_stat_acct(rq, 1); __elv_add_request(q, rq, where, 0); - blk_start_queueing(q); + __blk_run_queue(q); spin_unlock_irqrestore(q->queue_lock, flags); } EXPORT_SYMBOL(blk_insert_request); diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index a55a9bd75bd..def0c698a4b 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -2088,7 +2088,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, if (blk_rq_bytes(rq) > PAGE_CACHE_SIZE || cfqd->busy_queues > 1) { del_timer(&cfqd->idle_slice_timer); - blk_start_queueing(cfqd->queue); + __blk_run_queue(cfqd->queue); } cfq_mark_cfqq_must_dispatch(cfqq); } @@ -2100,7 +2100,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, * this new queue is RT and the current one is BE */ cfq_preempt_queue(cfqd, cfqq); - blk_start_queueing(cfqd->queue); + __blk_run_queue(cfqd->queue); } } @@ -2345,7 +2345,7 @@ static void cfq_kick_queue(struct work_struct *work) struct request_queue *q = cfqd->queue; spin_lock_irq(q->queue_lock); - blk_start_queueing(q); + __blk_run_queue(cfqd->queue); spin_unlock_irq(q->queue_lock); } diff --git a/block/elevator.c b/block/elevator.c index 7073a907257..2e0fb21485b 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -599,7 +599,7 @@ void elv_quiesce_start(struct request_queue *q) */ elv_drain_elevator(q); while (q->rq.elvpriv) { - blk_start_queueing(q); + __blk_run_queue(q); spin_unlock_irq(q->queue_lock); msleep(10); spin_lock_irq(q->queue_lock); @@ -643,8 +643,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) * with anything. There's no point in delaying queue * processing. */ - blk_remove_plug(q); - blk_start_queueing(q); + __blk_run_queue(q); break; case ELEVATOR_INSERT_SORT: @@ -971,7 +970,7 @@ void elv_completed_request(struct request_queue *q, struct request *rq) blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN && (!next || blk_ordered_req_seq(next) > QUEUE_ORDSEQ_DRAIN)) { blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0); - blk_start_queueing(q); + __blk_run_queue(q); } } } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2755d5c6da2..12e20de44b6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -797,7 +797,6 @@ extern void blk_sync_queue(struct request_queue *q); extern void __blk_stop_queue(struct request_queue *q); extern void __blk_run_queue(struct request_queue *); extern void blk_run_queue(struct request_queue *); -extern void blk_start_queueing(struct request_queue *); extern int blk_rq_map_user(struct request_queue *, struct request *, struct rq_map_data *, void __user *, unsigned long, gfp_t); -- cgit v1.2.3-70-g09d2 From 5efccd17ceb0fc43837a331297c2c407969d7201 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 23 Apr 2009 11:05:18 +0900 Subject: block: reorder request completion functions Reorder request completion functions such that * All request completion functions are located together. * Functions which are used by only one caller is put right above the caller. * end_request() is put after other completion functions but before blk_update_request(). This change is for completion function cleanup which will follow. [ Impact: cleanup, code reorganization ] Signed-off-by: Tejun Heo --- block/blk-core.c | 144 ++++++++++++++++++++++++------------------------- include/linux/blkdev.h | 16 +++--- 2 files changed, 80 insertions(+), 80 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index cf10dfcda99..406a93e526b 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1683,6 +1683,35 @@ static void blk_account_io_done(struct request *req) } } +/** + * blk_rq_bytes - Returns bytes left to complete in the entire request + * @rq: the request being processed + **/ +unsigned int blk_rq_bytes(struct request *rq) +{ + if (blk_fs_request(rq)) + return rq->hard_nr_sectors << 9; + + return rq->data_len; +} +EXPORT_SYMBOL_GPL(blk_rq_bytes); + +/** + * blk_rq_cur_bytes - Returns bytes left to complete in the current segment + * @rq: the request being processed + **/ +unsigned int blk_rq_cur_bytes(struct request *rq) +{ + if (blk_fs_request(rq)) + return rq->current_nr_sectors << 9; + + if (rq->bio) + return rq->bio->bi_size; + + return rq->data_len; +} +EXPORT_SYMBOL_GPL(blk_rq_cur_bytes); + /** * __end_that_request_first - end I/O on a request * @req: the request being processed @@ -1797,6 +1826,22 @@ static int __end_that_request_first(struct request *req, int error, return 1; } +static int end_that_request_data(struct request *rq, int error, + unsigned int nr_bytes, unsigned int bidi_bytes) +{ + if (rq->bio) { + if (__end_that_request_first(rq, error, nr_bytes)) + return 1; + + /* Bidi request must be completed as a whole */ + if (blk_bidi_rq(rq) && + __end_that_request_first(rq->next_rq, error, bidi_bytes)) + return 1; + } + + return 0; +} + /* * queue lock must be held */ @@ -1825,78 +1870,6 @@ static void end_that_request_last(struct request *req, int error) } } -/** - * blk_rq_bytes - Returns bytes left to complete in the entire request - * @rq: the request being processed - **/ -unsigned int blk_rq_bytes(struct request *rq) -{ - if (blk_fs_request(rq)) - return rq->hard_nr_sectors << 9; - - return rq->data_len; -} -EXPORT_SYMBOL_GPL(blk_rq_bytes); - -/** - * blk_rq_cur_bytes - Returns bytes left to complete in the current segment - * @rq: the request being processed - **/ -unsigned int blk_rq_cur_bytes(struct request *rq) -{ - if (blk_fs_request(rq)) - return rq->current_nr_sectors << 9; - - if (rq->bio) - return rq->bio->bi_size; - - return rq->data_len; -} -EXPORT_SYMBOL_GPL(blk_rq_cur_bytes); - -/** - * end_request - end I/O on the current segment of the request - * @req: the request being processed - * @uptodate: error value or %0/%1 uptodate flag - * - * Description: - * Ends I/O on the current segment of a request. If that is the only - * remaining segment, the request is also completed and freed. - * - * This is a remnant of how older block drivers handled I/O completions. - * Modern drivers typically end I/O on the full request in one go, unless - * they have a residual value to account for. For that case this function - * isn't really useful, unless the residual just happens to be the - * full current segment. In other words, don't use this function in new - * code. Use blk_end_request() or __blk_end_request() to end a request. - **/ -void end_request(struct request *req, int uptodate) -{ - int error = 0; - - if (uptodate <= 0) - error = uptodate ? uptodate : -EIO; - - __blk_end_request(req, error, req->hard_cur_sectors << 9); -} -EXPORT_SYMBOL(end_request); - -static int end_that_request_data(struct request *rq, int error, - unsigned int nr_bytes, unsigned int bidi_bytes) -{ - if (rq->bio) { - if (__end_that_request_first(rq, error, nr_bytes)) - return 1; - - /* Bidi request must be completed as a whole */ - if (blk_bidi_rq(rq) && - __end_that_request_first(rq->next_rq, error, bidi_bytes)) - return 1; - } - - return 0; -} - /** * blk_end_io - Generic end_io function to complete a request. * @rq: the request being processed @@ -2006,6 +1979,33 @@ int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes, } EXPORT_SYMBOL_GPL(blk_end_bidi_request); +/** + * end_request - end I/O on the current segment of the request + * @req: the request being processed + * @uptodate: error value or %0/%1 uptodate flag + * + * Description: + * Ends I/O on the current segment of a request. If that is the only + * remaining segment, the request is also completed and freed. + * + * This is a remnant of how older block drivers handled I/O completions. + * Modern drivers typically end I/O on the full request in one go, unless + * they have a residual value to account for. For that case this function + * isn't really useful, unless the residual just happens to be the + * full current segment. In other words, don't use this function in new + * code. Use blk_end_request() or __blk_end_request() to end a request. + **/ +void end_request(struct request *req, int uptodate) +{ + int error = 0; + + if (uptodate <= 0) + error = uptodate ? uptodate : -EIO; + + __blk_end_request(req, error, req->hard_cur_sectors << 9); +} +EXPORT_SYMBOL(end_request); + /** * blk_update_request - Special helper function for request stacking drivers * @rq: the request being processed diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 12e20de44b6..156ffd9de96 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -831,6 +831,14 @@ static inline void blk_run_address_space(struct address_space *mapping) extern void blkdev_dequeue_request(struct request *req); +/* + * blk_end_request() takes bytes instead of sectors as a complete size. + * blk_rq_bytes() returns bytes left to complete in the entire request. + * blk_rq_cur_bytes() returns bytes left to complete in the current segment. + */ +extern unsigned int blk_rq_bytes(struct request *rq); +extern unsigned int blk_rq_cur_bytes(struct request *rq); + /* * blk_end_request() and friends. * __blk_end_request() and end_request() must be called with @@ -857,14 +865,6 @@ extern void blk_abort_queue(struct request_queue *); extern void blk_update_request(struct request *rq, int error, unsigned int nr_bytes); -/* - * blk_end_request() takes bytes instead of sectors as a complete size. - * blk_rq_bytes() returns bytes left to complete in the entire request. - * blk_rq_cur_bytes() returns bytes left to complete in the current segment. - */ -extern unsigned int blk_rq_bytes(struct request *rq); -extern unsigned int blk_rq_cur_bytes(struct request *rq); - /* * Access functions for manipulating queue properties */ -- cgit v1.2.3-70-g09d2 From 0b302d5aa7975006fa2ec3d66386610b9b36c669 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 23 Apr 2009 11:05:18 +0900 Subject: block: kill blk_end_request_callback() With recent IDE updates, blk_end_request_callback() doesn't have any user now. Kill it. [ Impact: removal of unused convoluted interface ] Signed-off-by: Tejun Heo --- block/blk-core.c | 48 +++--------------------------------------------- include/linux/blkdev.h | 3 --- 2 files changed, 3 insertions(+), 48 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index 678ede23ed0..2f277ea0e59 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1971,10 +1971,6 @@ static void end_that_request_last(struct request *req, int error) * @error: %0 for success, < %0 for error * @nr_bytes: number of bytes to complete @rq * @bidi_bytes: number of bytes to complete @rq->next_rq - * @drv_callback: function called between completion of bios in the request - * and completion of the request. - * If the callback returns non %0, this helper returns without - * completion of the request. * * Description: * Ends I/O on a number of bytes attached to @rq and @rq->next_rq. @@ -1985,8 +1981,7 @@ static void end_that_request_last(struct request *req, int error) * %1 - this request is not freed yet, it still has pending buffers. **/ static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes, - unsigned int bidi_bytes, - int (drv_callback)(struct request *)) + unsigned int bidi_bytes) { struct request_queue *q = rq->q; unsigned long flags = 0UL; @@ -1994,10 +1989,6 @@ static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes, if (end_that_request_data(rq, error, nr_bytes, bidi_bytes)) return 1; - /* Special feature for tricky drivers */ - if (drv_callback && drv_callback(rq)) - return 1; - add_disk_randomness(rq->rq_disk); spin_lock_irqsave(q->queue_lock, flags); @@ -2023,7 +2014,7 @@ static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes, **/ int blk_end_request(struct request *rq, int error, unsigned int nr_bytes) { - return blk_end_io(rq, error, nr_bytes, 0, NULL); + return blk_end_io(rq, error, nr_bytes, 0); } EXPORT_SYMBOL_GPL(blk_end_request); @@ -2070,7 +2061,7 @@ EXPORT_SYMBOL_GPL(__blk_end_request); int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes, unsigned int bidi_bytes) { - return blk_end_io(rq, error, nr_bytes, bidi_bytes, NULL); + return blk_end_io(rq, error, nr_bytes, bidi_bytes); } EXPORT_SYMBOL_GPL(blk_end_bidi_request); @@ -2131,39 +2122,6 @@ void blk_update_request(struct request *rq, int error, unsigned int nr_bytes) } EXPORT_SYMBOL_GPL(blk_update_request); -/** - * blk_end_request_callback - Special helper function for tricky drivers - * @rq: the request being processed - * @error: %0 for success, < %0 for error - * @nr_bytes: number of bytes to complete - * @drv_callback: function called between completion of bios in the request - * and completion of the request. - * If the callback returns non %0, this helper returns without - * completion of the request. - * - * Description: - * Ends I/O on a number of bytes attached to @rq. - * If @rq has leftover, sets it up for the next range of segments. - * - * This special helper function is used only for existing tricky drivers. - * (e.g. cdrom_newpc_intr() of ide-cd) - * This interface will be removed when such drivers are rewritten. - * Don't use this interface in other places anymore. - * - * Return: - * %0 - we are done with this request - * %1 - this request is not freed yet. - * this request still has pending buffers or - * the driver doesn't want to finish this request yet. - **/ -int blk_end_request_callback(struct request *rq, int error, - unsigned int nr_bytes, - int (drv_callback)(struct request *)) -{ - return blk_end_io(rq, error, nr_bytes, 0, drv_callback); -} -EXPORT_SYMBOL_GPL(blk_end_request_callback); - void blk_rq_bio_prep(struct request_queue *q, struct request *rq, struct bio *bio) { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 156ffd9de96..1fa9dcf9aa6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -855,9 +855,6 @@ extern int __blk_end_request(struct request *rq, int error, extern int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes, unsigned int bidi_bytes); extern void end_request(struct request *, int); -extern int blk_end_request_callback(struct request *rq, int error, - unsigned int nr_bytes, - int (drv_callback)(struct request *)); extern void blk_complete_request(struct request *); extern void __blk_complete_request(struct request *); extern void blk_abort_request(struct request *); -- cgit v1.2.3-70-g09d2 From 2e60e02297cf54e367567f2d85b2ca56b1c4a906 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 23 Apr 2009 11:05:18 +0900 Subject: block: clean up request completion API Request completion has gone through several changes and became a bit messy over the time. Clean it up. 1. end_that_request_data() is a thin wrapper around end_that_request_data_first() which checks whether bio is NULL before doing anything and handles bidi completion. blk_update_request() is a thin wrapper around end_that_request_data() which clears nr_sectors on the last iteration but doesn't use the bidi completion. Clean it up by moving the initial bio NULL check and nr_sectors clearing on the last iteration into end_that_request_data() and renaming it to blk_update_request(), which makes blk_end_io() the only user of end_that_request_data(). Collapse end_that_request_data() into blk_end_io(). 2. There are four visible completion variants - blk_end_request(), __blk_end_request(), blk_end_bidi_request() and end_request(). blk_end_request() and blk_end_bidi_request() uses blk_end_request() as the backend but __blk_end_request() and end_request() use separate implementation in __blk_end_request() due to different locking rules. blk_end_bidi_request() is identical to blk_end_io(). Collapse blk_end_io() into blk_end_bidi_request(), separate out request update into internal helper blk_update_bidi_request() and add __blk_end_bidi_request(). Redefine [__]blk_end_request() as thin inline wrappers around [__]blk_end_bidi_request(). 3. As the whole request issue/completion usages are about to be modified and audited, it's a good chance to convert completion functions return bool which better indicates the intended meaning of return values. 4. The function name end_that_request_last() is from the days when it was a public interface and slighly confusing. Give it a proper internal name - blk_finish_request(). 5. Add description explaning that blk_end_bidi_request() can be safely used for uni requests as suggested by Boaz Harrosh. The only visible behavior change is from #1. nr_sectors counts are cleared after the final iteration no matter which function is used to complete the request. I couldn't find any place where the code assumes those nr_sectors counters contain the values for the last segment and this change is good as it makes the API much more consistent as the end result is now same whether a request is completed using [__]blk_end_request() alone or in combination with blk_update_request(). API further cleaned up per Christoph's suggestion. [ Impact: cleanup, rq->*nr_sectors always updated after req completion ] Signed-off-by: Tejun Heo Reviewed-by: Boaz Harrosh Cc: Christoph Hellwig --- block/blk-core.c | 226 ++++++++++++++++--------------------------------- include/linux/blkdev.h | 94 +++++++++++++++++--- 2 files changed, 157 insertions(+), 163 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index 2f277ea0e59..89cc05d9a7a 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1808,25 +1808,35 @@ void elv_dequeue_request(struct request_queue *q, struct request *rq) } /** - * __end_that_request_first - end I/O on a request - * @req: the request being processed + * blk_update_request - Special helper function for request stacking drivers + * @rq: the request being processed * @error: %0 for success, < %0 for error - * @nr_bytes: number of bytes to complete + * @nr_bytes: number of bytes to complete @rq * * Description: - * Ends I/O on a number of bytes attached to @req, and sets it up - * for the next range of segments (if any) in the cluster. + * Ends I/O on a number of bytes attached to @rq, but doesn't complete + * the request structure even if @rq doesn't have leftover. + * If @rq has leftover, sets it up for the next range of segments. + * + * This special helper function is only for request stacking drivers + * (e.g. request-based dm) so that they can handle partial completion. + * Actual device drivers should use blk_end_request instead. + * + * Passing the result of blk_rq_bytes() as @nr_bytes guarantees + * %false return from this function. * * Return: - * %0 - we are done with this request, call end_that_request_last() - * %1 - still buffers pending for this request + * %false - this request doesn't have any more data + * %true - this request has more data **/ -static int __end_that_request_first(struct request *req, int error, - int nr_bytes) +bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) { int total_bytes, bio_nbytes, next_idx = 0; struct bio *bio; + if (!req->bio) + return false; + trace_block_rq_complete(req->q, req); /* @@ -1903,8 +1913,16 @@ static int __end_that_request_first(struct request *req, int error, /* * completely done */ - if (!req->bio) - return 0; + if (!req->bio) { + /* + * Reset counters so that the request stacking driver + * can find how many bytes remain in the request + * later. + */ + req->nr_sectors = req->hard_nr_sectors = 0; + req->current_nr_sectors = req->hard_cur_sectors = 0; + return false; + } /* * if the request wasn't completed, update state @@ -1918,29 +1936,31 @@ static int __end_that_request_first(struct request *req, int error, blk_recalc_rq_sectors(req, total_bytes >> 9); blk_recalc_rq_segments(req); - return 1; + return true; } +EXPORT_SYMBOL_GPL(blk_update_request); -static int end_that_request_data(struct request *rq, int error, - unsigned int nr_bytes, unsigned int bidi_bytes) +static bool blk_update_bidi_request(struct request *rq, int error, + unsigned int nr_bytes, + unsigned int bidi_bytes) { - if (rq->bio) { - if (__end_that_request_first(rq, error, nr_bytes)) - return 1; + if (blk_update_request(rq, error, nr_bytes)) + return true; - /* Bidi request must be completed as a whole */ - if (blk_bidi_rq(rq) && - __end_that_request_first(rq->next_rq, error, bidi_bytes)) - return 1; - } + /* Bidi request must be completed as a whole */ + if (unlikely(blk_bidi_rq(rq)) && + blk_update_request(rq->next_rq, error, bidi_bytes)) + return true; - return 0; + add_disk_randomness(rq->rq_disk); + + return false; } /* * queue lock must be held */ -static void end_that_request_last(struct request *req, int error) +static void blk_finish_request(struct request *req, int error) { if (blk_rq_tagged(req)) blk_queue_end_tag(req->q, req); @@ -1966,161 +1986,65 @@ static void end_that_request_last(struct request *req, int error) } /** - * blk_end_io - Generic end_io function to complete a request. - * @rq: the request being processed - * @error: %0 for success, < %0 for error - * @nr_bytes: number of bytes to complete @rq - * @bidi_bytes: number of bytes to complete @rq->next_rq + * blk_end_bidi_request - Complete a bidi request + * @rq: the request to complete + * @error: %0 for success, < %0 for error + * @nr_bytes: number of bytes to complete @rq + * @bidi_bytes: number of bytes to complete @rq->next_rq * * Description: * Ends I/O on a number of bytes attached to @rq and @rq->next_rq. - * If @rq has leftover, sets it up for the next range of segments. + * Drivers that supports bidi can safely call this member for any + * type of request, bidi or uni. In the later case @bidi_bytes is + * just ignored. * * Return: - * %0 - we are done with this request - * %1 - this request is not freed yet, it still has pending buffers. + * %false - we are done with this request + * %true - still buffers pending for this request **/ -static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes, - unsigned int bidi_bytes) +bool blk_end_bidi_request(struct request *rq, int error, + unsigned int nr_bytes, unsigned int bidi_bytes) { struct request_queue *q = rq->q; - unsigned long flags = 0UL; - - if (end_that_request_data(rq, error, nr_bytes, bidi_bytes)) - return 1; + unsigned long flags; - add_disk_randomness(rq->rq_disk); + if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) + return true; spin_lock_irqsave(q->queue_lock, flags); - end_that_request_last(rq, error); + blk_finish_request(rq, error); spin_unlock_irqrestore(q->queue_lock, flags); - return 0; -} - -/** - * blk_end_request - Helper function for drivers to complete the request. - * @rq: the request being processed - * @error: %0 for success, < %0 for error - * @nr_bytes: number of bytes to complete - * - * Description: - * Ends I/O on a number of bytes attached to @rq. - * If @rq has leftover, sets it up for the next range of segments. - * - * Return: - * %0 - we are done with this request - * %1 - still buffers pending for this request - **/ -int blk_end_request(struct request *rq, int error, unsigned int nr_bytes) -{ - return blk_end_io(rq, error, nr_bytes, 0); -} -EXPORT_SYMBOL_GPL(blk_end_request); - -/** - * __blk_end_request - Helper function for drivers to complete the request. - * @rq: the request being processed - * @error: %0 for success, < %0 for error - * @nr_bytes: number of bytes to complete - * - * Description: - * Must be called with queue lock held unlike blk_end_request(). - * - * Return: - * %0 - we are done with this request - * %1 - still buffers pending for this request - **/ -int __blk_end_request(struct request *rq, int error, unsigned int nr_bytes) -{ - if (rq->bio && __end_that_request_first(rq, error, nr_bytes)) - return 1; - - add_disk_randomness(rq->rq_disk); - - end_that_request_last(rq, error); - - return 0; + return false; } -EXPORT_SYMBOL_GPL(__blk_end_request); +EXPORT_SYMBOL_GPL(blk_end_bidi_request); /** - * blk_end_bidi_request - Helper function for drivers to complete bidi request. - * @rq: the bidi request being processed + * __blk_end_bidi_request - Complete a bidi request with queue lock held + * @rq: the request to complete * @error: %0 for success, < %0 for error * @nr_bytes: number of bytes to complete @rq * @bidi_bytes: number of bytes to complete @rq->next_rq * * Description: - * Ends I/O on a number of bytes attached to @rq and @rq->next_rq. + * Identical to blk_end_bidi_request() except that queue lock is + * assumed to be locked on entry and remains so on return. * * Return: - * %0 - we are done with this request - * %1 - still buffers pending for this request - **/ -int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes, - unsigned int bidi_bytes) -{ - return blk_end_io(rq, error, nr_bytes, bidi_bytes); -} -EXPORT_SYMBOL_GPL(blk_end_bidi_request); - -/** - * end_request - end I/O on the current segment of the request - * @req: the request being processed - * @uptodate: error value or %0/%1 uptodate flag - * - * Description: - * Ends I/O on the current segment of a request. If that is the only - * remaining segment, the request is also completed and freed. - * - * This is a remnant of how older block drivers handled I/O completions. - * Modern drivers typically end I/O on the full request in one go, unless - * they have a residual value to account for. For that case this function - * isn't really useful, unless the residual just happens to be the - * full current segment. In other words, don't use this function in new - * code. Use blk_end_request() or __blk_end_request() to end a request. + * %false - we are done with this request + * %true - still buffers pending for this request **/ -void end_request(struct request *req, int uptodate) +bool __blk_end_bidi_request(struct request *rq, int error, + unsigned int nr_bytes, unsigned int bidi_bytes) { - int error = 0; + if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) + return true; - if (uptodate <= 0) - error = uptodate ? uptodate : -EIO; + blk_finish_request(rq, error); - __blk_end_request(req, error, req->hard_cur_sectors << 9); -} -EXPORT_SYMBOL(end_request); - -/** - * blk_update_request - Special helper function for request stacking drivers - * @rq: the request being processed - * @error: %0 for success, < %0 for error - * @nr_bytes: number of bytes to complete @rq - * - * Description: - * Ends I/O on a number of bytes attached to @rq, but doesn't complete - * the request structure even if @rq doesn't have leftover. - * If @rq has leftover, sets it up for the next range of segments. - * - * This special helper function is only for request stacking drivers - * (e.g. request-based dm) so that they can handle partial completion. - * Actual device drivers should use blk_end_request instead. - */ -void blk_update_request(struct request *rq, int error, unsigned int nr_bytes) -{ - if (!end_that_request_data(rq, error, nr_bytes, 0)) { - /* - * These members are not updated in end_that_request_data() - * when all bios are completed. - * Update them so that the request stacking driver can find - * how many bytes remain in the request later. - */ - rq->nr_sectors = rq->hard_nr_sectors = 0; - rq->current_nr_sectors = rq->hard_cur_sectors = 0; - } + return false; } -EXPORT_SYMBOL_GPL(blk_update_request); +EXPORT_SYMBOL_GPL(__blk_end_bidi_request); void blk_rq_bio_prep(struct request_queue *q, struct request *rq, struct bio *bio) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1fa9dcf9aa6..501f6845cc7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -840,27 +840,97 @@ extern unsigned int blk_rq_bytes(struct request *rq); extern unsigned int blk_rq_cur_bytes(struct request *rq); /* - * blk_end_request() and friends. - * __blk_end_request() and end_request() must be called with - * the request queue spinlock acquired. + * Request completion related functions. + * + * blk_update_request() completes given number of bytes and updates + * the request without completing it. + * + * blk_end_request() and friends. __blk_end_request() and + * end_request() must be called with the request queue spinlock + * acquired. * * Several drivers define their own end_request and call * blk_end_request() for parts of the original function. * This prevents code duplication in drivers. */ -extern int blk_end_request(struct request *rq, int error, - unsigned int nr_bytes); -extern int __blk_end_request(struct request *rq, int error, - unsigned int nr_bytes); -extern int blk_end_bidi_request(struct request *rq, int error, - unsigned int nr_bytes, unsigned int bidi_bytes); -extern void end_request(struct request *, int); +extern bool blk_update_request(struct request *rq, int error, + unsigned int nr_bytes); +extern bool blk_end_bidi_request(struct request *rq, int error, + unsigned int nr_bytes, + unsigned int bidi_bytes); +extern bool __blk_end_bidi_request(struct request *rq, int error, + unsigned int nr_bytes, + unsigned int bidi_bytes); + +/** + * blk_end_request - Helper function for drivers to complete the request. + * @rq: the request being processed + * @error: %0 for success, < %0 for error + * @nr_bytes: number of bytes to complete + * + * Description: + * Ends I/O on a number of bytes attached to @rq. + * If @rq has leftover, sets it up for the next range of segments. + * + * Return: + * %false - we are done with this request + * %true - still buffers pending for this request + **/ +static inline bool blk_end_request(struct request *rq, int error, + unsigned int nr_bytes) +{ + return blk_end_bidi_request(rq, error, nr_bytes, 0); +} + +/** + * __blk_end_request - Helper function for drivers to complete the request. + * @rq: the request being processed + * @error: %0 for success, < %0 for error + * @nr_bytes: number of bytes to complete + * + * Description: + * Must be called with queue lock held unlike blk_end_request(). + * + * Return: + * %false - we are done with this request + * %true - still buffers pending for this request + **/ +static inline bool __blk_end_request(struct request *rq, int error, + unsigned int nr_bytes) +{ + return __blk_end_bidi_request(rq, error, nr_bytes, 0); +} + +/** + * end_request - end I/O on the current segment of the request + * @rq: the request being processed + * @uptodate: error value or %0/%1 uptodate flag + * + * Description: + * Ends I/O on the current segment of a request. If that is the only + * remaining segment, the request is also completed and freed. + * + * This is a remnant of how older block drivers handled I/O completions. + * Modern drivers typically end I/O on the full request in one go, unless + * they have a residual value to account for. For that case this function + * isn't really useful, unless the residual just happens to be the + * full current segment. In other words, don't use this function in new + * code. Use blk_end_request() or __blk_end_request() to end a request. + **/ +static inline void end_request(struct request *rq, int uptodate) +{ + int error = 0; + + if (uptodate <= 0) + error = uptodate ? uptodate : -EIO; + + __blk_end_bidi_request(rq, error, rq->hard_cur_sectors << 9, 0); +} + extern void blk_complete_request(struct request *); extern void __blk_complete_request(struct request *); extern void blk_abort_request(struct request *); extern void blk_abort_queue(struct request_queue *); -extern void blk_update_request(struct request *rq, int error, - unsigned int nr_bytes); /* * Access functions for manipulating queue properties -- cgit v1.2.3-70-g09d2 From 40cbbb781d3eba5d6ac0860db078af490e5c7c6b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 23 Apr 2009 11:05:19 +0900 Subject: block: implement and use [__]blk_end_request_all() There are many [__]blk_end_request() call sites which call it with full request length and expect full completion. Many of them ensure that the request actually completes by doing BUG_ON() the return value, which is awkward and error-prone. This patch adds [__]blk_end_request_all() which takes @rq and @error and fully completes the request. BUG_ON() is added to to ensure that this actually happens. Most conversions are simple but there are a few noteworthy ones. * cdrom/viocd: viocd_end_request() replaced with direct calls to __blk_end_request_all(). * s390/block/dasd: dasd_end_request() replaced with direct calls to __blk_end_request_all(). * s390/char/tape_block: tapeblock_end_request() replaced with direct calls to blk_end_request_all(). [ Impact: cleanup ] Signed-off-by: Tejun Heo Cc: Russell King Cc: Stephen Rothwell Cc: Mike Miller Cc: Martin Schwidefsky Cc: Jeff Garzik Cc: Rusty Russell Cc: Jeremy Fitzhardinge Cc: Alex Dubov Cc: James Bottomley --- arch/arm/plat-omap/mailbox.c | 11 +++-------- block/blk-barrier.c | 9 ++------- block/blk-core.c | 2 +- block/elevator.c | 2 +- drivers/block/cpqarray.c | 3 +-- drivers/block/sx8.c | 3 +-- drivers/block/virtio_blk.c | 2 +- drivers/block/xen-blkfront.c | 4 +--- drivers/cdrom/gdrom.c | 2 +- drivers/cdrom/viocd.c | 25 ++++--------------------- drivers/memstick/core/mspro_block.c | 2 +- drivers/s390/block/dasd.c | 17 ++++------------- drivers/s390/char/tape_block.c | 15 ++++----------- drivers/scsi/scsi_lib.c | 2 +- include/linux/blkdev.h | 32 ++++++++++++++++++++++++++++++++ 15 files changed, 58 insertions(+), 73 deletions(-) (limited to 'include') diff --git a/arch/arm/plat-omap/mailbox.c b/arch/arm/plat-omap/mailbox.c index 0abfbaa5987..cf81bad8aec 100644 --- a/arch/arm/plat-omap/mailbox.c +++ b/arch/arm/plat-omap/mailbox.c @@ -192,8 +192,7 @@ static void mbox_tx_work(struct work_struct *work) } spin_lock(q->queue_lock); - if (__blk_end_request(rq, 0, 0)) - BUG(); + __blk_end_request_all(rq, 0); spin_unlock(q->queue_lock); } } @@ -224,10 +223,7 @@ static void mbox_rx_work(struct work_struct *work) break; msg = (mbox_msg_t) rq->data; - - if (blk_end_request(rq, 0, 0)) - BUG(); - + blk_end_request_all(rq, 0); mbox->rxq->callback((void *)msg); } } @@ -337,8 +333,7 @@ omap_mbox_read(struct device *dev, struct device_attribute *attr, char *buf) *p = (mbox_msg_t) rq->data; - if (blk_end_request(rq, 0, 0)) - BUG(); + blk_end_request_all(rq, 0); if (unlikely(mbox_seq_test(mbox, *p))) { pr_info("mbox: Illegal seq bit!(%08x) ignored\n", *p); diff --git a/block/blk-barrier.c b/block/blk-barrier.c index 20b4111fa05..c8d087655ef 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -106,10 +106,7 @@ bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) */ q->ordseq = 0; rq = q->orig_bar_rq; - - if (__blk_end_request(rq, q->orderr, blk_rq_bytes(rq))) - BUG(); - + __blk_end_request_all(rq, q->orderr); return true; } @@ -252,9 +249,7 @@ bool blk_do_ordered(struct request_queue *q, struct request **rqp) * with prejudice. */ elv_dequeue_request(q, rq); - if (__blk_end_request(rq, -EOPNOTSUPP, - blk_rq_bytes(rq))) - BUG(); + __blk_end_request_all(rq, -EOPNOTSUPP); *rqp = NULL; return false; } diff --git a/block/blk-core.c b/block/blk-core.c index b84250d3019..0520cc70458 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1780,7 +1780,7 @@ struct request *elv_next_request(struct request_queue *q) break; } else if (ret == BLKPREP_KILL) { rq->cmd_flags |= REQ_QUIET; - __blk_end_request(rq, -EIO, blk_rq_bytes(rq)); + __blk_end_request_all(rq, -EIO); } else { printk(KERN_ERR "%s: bad return=%d\n", __func__, ret); break; diff --git a/block/elevator.c b/block/elevator.c index b03b8752e18..1af5d9f04af 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -810,7 +810,7 @@ void elv_abort_queue(struct request_queue *q) rq = list_entry_rq(q->queue_head.next); rq->cmd_flags |= REQ_QUIET; trace_block_rq_abort(q, rq); - __blk_end_request(rq, -EIO, blk_rq_bytes(rq)); + __blk_end_request_all(rq, -EIO); } } EXPORT_SYMBOL(elv_abort_queue); diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index ca268ca1115..488a8f4a60a 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -1024,8 +1024,7 @@ static inline void complete_command(cmdlist_t *cmd, int timeout) cmd->req.sg[i].size, ddir); DBGPX(printk("Done with %p\n", rq);); - if (__blk_end_request(rq, error, blk_rq_bytes(rq))) - BUG(); + __blk_end_request_all(rq, error); } /* diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c index ff0448e4bf0..60e85bb6f79 100644 --- a/drivers/block/sx8.c +++ b/drivers/block/sx8.c @@ -749,8 +749,7 @@ static inline void carm_end_request_queued(struct carm_host *host, struct request *req = crq->rq; int rc; - rc = __blk_end_request(req, error, blk_rq_bytes(req)); - assert(rc == 0); + __blk_end_request_all(req, error); rc = carm_put_request(host, crq); assert(rc == 0); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 5d34764c8a8..50745e64414 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -62,7 +62,7 @@ static void blk_done(struct virtqueue *vq) break; } - __blk_end_request(vbr->req, error, blk_rq_bytes(vbr->req)); + __blk_end_request_all(vbr->req, error); list_del(&vbr->list); mempool_free(vbr, vblk->pool); } diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 8f905089b72..cd6cfe3b51e 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -551,7 +551,6 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) for (i = info->ring.rsp_cons; i != rp; i++) { unsigned long id; - int ret; bret = RING_GET_RESPONSE(&info->ring, i); id = bret->id; @@ -578,8 +577,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) dev_dbg(&info->xbdev->dev, "Bad return from blkdev data " "request: %x\n", bret->status); - ret = __blk_end_request(req, error, blk_rq_bytes(req)); - BUG_ON(ret); + __blk_end_request_all(req, error); break; default: BUG(); diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index 2eecb779437..fee9a9e83fc 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -632,7 +632,7 @@ static void gdrom_readdisk_dma(struct work_struct *work) * before handling ending the request */ spin_lock(&gdrom_lock); list_del_init(&req->queuelist); - __blk_end_request(req, err, blk_rq_bytes(req)); + __blk_end_request_all(req, err); } spin_unlock(&gdrom_lock); kfree(read_command); diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c index 13929356135..cc3efa096e1 100644 --- a/drivers/cdrom/viocd.c +++ b/drivers/cdrom/viocd.c @@ -291,23 +291,6 @@ static int send_request(struct request *req) return 0; } -static void viocd_end_request(struct request *req, int error) -{ - int nsectors = req->hard_nr_sectors; - - /* - * Make sure it's fully ended, and ensure that we process - * at least one sector. - */ - if (blk_pc_request(req)) - nsectors = (req->data_len + 511) >> 9; - if (!nsectors) - nsectors = 1; - - if (__blk_end_request(req, error, nsectors << 9)) - BUG(); -} - static int rwreq; static void do_viocd_request(struct request_queue *q) @@ -316,11 +299,11 @@ static void do_viocd_request(struct request_queue *q) while ((rwreq == 0) && ((req = elv_next_request(q)) != NULL)) { if (!blk_fs_request(req)) - viocd_end_request(req, -EIO); + __blk_end_request_all(req, -EIO); else if (send_request(req) < 0) { printk(VIOCD_KERN_WARNING "unable to send message to OS/400!"); - viocd_end_request(req, -EIO); + __blk_end_request_all(req, -EIO); } else rwreq++; } @@ -531,9 +514,9 @@ return_complete: "with rc %d:0x%04X: %s\n", req, event->xRc, bevent->sub_result, err->msg); - viocd_end_request(req, -EIO); + __blk_end_request_all(req, -EIO); } else - viocd_end_request(req, 0); + __blk_end_request_all(req, 0); /* restart handling of incoming requests */ spin_unlock_irqrestore(&viocd_reqlock, flags); diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index de143deb06f..a41634699f8 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -826,7 +826,7 @@ static void mspro_block_submit_req(struct request_queue *q) if (msb->eject) { while ((req = elv_next_request(q)) != NULL) - __blk_end_request(req, -ENODEV, blk_rq_bytes(req)); + __blk_end_request_all(req, -ENODEV); return; } diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index d1815272c43..fabec95686b 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1613,15 +1613,6 @@ void dasd_block_clear_timer(struct dasd_block *block) del_timer(&block->timer); } -/* - * posts the buffer_cache about a finalized request - */ -static inline void dasd_end_request(struct request *req, int error) -{ - if (__blk_end_request(req, error, blk_rq_bytes(req))) - BUG(); -} - /* * Process finished error recovery ccw. */ @@ -1676,7 +1667,7 @@ static void __dasd_process_request_queue(struct dasd_block *block) "Rejecting write request %p", req); blkdev_dequeue_request(req); - dasd_end_request(req, -EIO); + __blk_end_request_all(req, -EIO); continue; } cqr = basedev->discipline->build_cp(basedev, block, req); @@ -1705,7 +1696,7 @@ static void __dasd_process_request_queue(struct dasd_block *block) "on request %p", PTR_ERR(cqr), req); blkdev_dequeue_request(req); - dasd_end_request(req, -EIO); + __blk_end_request_all(req, -EIO); continue; } /* @@ -1731,7 +1722,7 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr) status = cqr->block->base->discipline->free_cp(cqr, req); if (status <= 0) error = status ? status : -EIO; - dasd_end_request(req, error); + __blk_end_request_all(req, error); } /* @@ -2040,7 +2031,7 @@ static void dasd_flush_request_queue(struct dasd_block *block) spin_lock_irq(&block->request_queue_lock); while ((req = elv_next_request(block->request_queue))) { blkdev_dequeue_request(req); - dasd_end_request(req, -EIO); + __blk_end_request_all(req, -EIO); } spin_unlock_irq(&block->request_queue_lock); } diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c index f32e89e7c4f..86596d3813b 100644 --- a/drivers/s390/char/tape_block.c +++ b/drivers/s390/char/tape_block.c @@ -73,13 +73,6 @@ tapeblock_trigger_requeue(struct tape_device *device) /* * Post finished request. */ -static void -tapeblock_end_request(struct request *req, int error) -{ - if (blk_end_request(req, error, blk_rq_bytes(req))) - BUG(); -} - static void __tapeblock_end_request(struct tape_request *ccw_req, void *data) { @@ -90,7 +83,7 @@ __tapeblock_end_request(struct tape_request *ccw_req, void *data) device = ccw_req->device; req = (struct request *) data; - tapeblock_end_request(req, (ccw_req->rc == 0) ? 0 : -EIO); + blk_end_request_all(req, (ccw_req->rc == 0) ? 0 : -EIO); if (ccw_req->rc == 0) /* Update position. */ device->blk_data.block_position = @@ -118,7 +111,7 @@ tapeblock_start_request(struct tape_device *device, struct request *req) ccw_req = device->discipline->bread(device, req); if (IS_ERR(ccw_req)) { DBF_EVENT(1, "TBLOCK: bread failed\n"); - tapeblock_end_request(req, -EIO); + blk_end_request_all(req, -EIO); return PTR_ERR(ccw_req); } ccw_req->callback = __tapeblock_end_request; @@ -131,7 +124,7 @@ tapeblock_start_request(struct tape_device *device, struct request *req) * Start/enqueueing failed. No retries in * this case. */ - tapeblock_end_request(req, -EIO); + blk_end_request_all(req, -EIO); device->discipline->free_bread(ccw_req); } @@ -177,7 +170,7 @@ tapeblock_requeue(struct work_struct *work) { DBF_EVENT(1, "TBLOCK: Rejecting write request\n"); blkdev_dequeue_request(req); spin_unlock_irq(&device->blk_data.request_queue_lock); - tapeblock_end_request(req, -EIO); + blk_end_request_all(req, -EIO); spin_lock_irq(&device->blk_data.request_queue_lock); continue; } diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index d1cb64ad1a3..756ac7c93de 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -922,7 +922,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) if (driver_byte(result) & DRIVER_SENSE) scsi_print_sense("", cmd); } - blk_end_request(req, -EIO, blk_rq_bytes(req)); + blk_end_request_all(req, -EIO); scsi_next_command(cmd); break; case ACTION_REPREP: diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 501f6845cc7..e33c8356b3d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -882,6 +882,22 @@ static inline bool blk_end_request(struct request *rq, int error, return blk_end_bidi_request(rq, error, nr_bytes, 0); } +/** + * blk_end_request_all - Helper function for drives to finish the request. + * @rq: the request to finish + * @err: %0 for success, < %0 for error + * + * Description: + * Completely finish @rq. + */ +static inline void blk_end_request_all(struct request *rq, int error) +{ + bool pending; + + pending = blk_end_request(rq, error, blk_rq_bytes(rq)); + BUG_ON(pending); +} + /** * __blk_end_request - Helper function for drivers to complete the request. * @rq: the request being processed @@ -901,6 +917,22 @@ static inline bool __blk_end_request(struct request *rq, int error, return __blk_end_bidi_request(rq, error, nr_bytes, 0); } +/** + * __blk_end_request_all - Helper function for drives to finish the request. + * @rq: the request to finish + * @err: %0 for success, < %0 for error + * + * Description: + * Completely finish @rq. Must be called with queue lock held. + */ +static inline void __blk_end_request_all(struct request *rq, int error) +{ + bool pending; + + pending = __blk_end_request(rq, error, blk_rq_bytes(rq)); + BUG_ON(pending); +} + /** * end_request - end I/O on the current segment of the request * @rq: the request being processed -- cgit v1.2.3-70-g09d2 From f06d9a2b52e246a66b606130cea3f0d7b7be17a7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 23 Apr 2009 11:05:19 +0900 Subject: block: replace end_request() with [__]blk_end_request_cur() end_request() has been kept around for backward compatibility; however, it's about time for it to go away. * There aren't too many users left. * Its use of @updtodate is pretty confusing. * In some cases, newer code ends up using mixture of end_request() and [__]blk_end_request[_all](), which is way too confusing. So, add [__]blk_end_request_cur() and replace end_request() with it. Most conversions are straightforward. Noteworthy ones are... * paride/pcd: next_request() updated to take 0/-errno instead of 1/0. * paride/pf: pf_end_request() and next_request() updated to take 0/-errno instead of 1/0. * xd: xd_readwrite() updated to return 0/-errno instead of 1/0. * mtd/mtd_blkdevs: blktrans_discard_request() updated to return 0/-errno instead of 1/0. Unnecessary local variable res initialization removed from mtd_blktrans_thread(). [ Impact: cleanup ] Signed-off-by: Tejun Heo Acked-by: Joerg Dorchain Acked-by: Geert Uytterhoeven Acked-by: Grant Likely Acked-by: Laurent Vivier Cc: Tim Waugh Cc: Stephen Rothwell Cc: Paul Mackerras Cc: Jeremy Fitzhardinge Cc: Markus Lidel Cc: David Woodhouse Cc: Pete Zaitcev Cc: unsik Kim --- drivers/block/amiflop.c | 10 ++++----- drivers/block/ataflop.c | 14 ++++++------- drivers/block/hd.c | 14 ++++++------- drivers/block/mg_disk.c | 16 +++++++------- drivers/block/paride/pcd.c | 12 +++++------ drivers/block/paride/pd.c | 5 +++-- drivers/block/paride/pf.c | 28 ++++++++++++------------- drivers/block/ps3disk.c | 6 +++--- drivers/block/swim.c | 14 ++++++------- drivers/block/swim3.c | 26 +++++++++++------------ drivers/block/xd.c | 15 +++++++------- drivers/block/xen-blkfront.c | 2 +- drivers/block/xsysace.c | 4 ++-- drivers/block/z2ram.c | 4 ++-- drivers/cdrom/gdrom.c | 6 +++--- drivers/message/i2o/i2o_block.c | 2 +- drivers/mtd/mtd_blkdevs.c | 22 ++++++++++---------- drivers/sbus/char/jsflash.c | 8 +++---- include/linux/blkdev.h | 46 ++++++++++++++++++++--------------------- 19 files changed, 128 insertions(+), 126 deletions(-) (limited to 'include') diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 8df436ff706..b99a2a606d0 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -1359,7 +1359,7 @@ static void redo_fd_request(void) #endif block = CURRENT->sector + cnt; if ((int)block > floppy->blocks) { - end_request(CURRENT, 0); + __blk_end_request_cur(CURRENT, -EIO); goto repeat; } @@ -1373,11 +1373,11 @@ static void redo_fd_request(void) if ((rq_data_dir(CURRENT) != READ) && (rq_data_dir(CURRENT) != WRITE)) { printk(KERN_WARNING "do_fd_request: unknown command\n"); - end_request(CURRENT, 0); + __blk_end_request_cur(CURRENT, -EIO); goto repeat; } if (get_track(drive, track) == -1) { - end_request(CURRENT, 0); + __blk_end_request_cur(CURRENT, -EIO); goto repeat; } @@ -1391,7 +1391,7 @@ static void redo_fd_request(void) /* keep the drive spinning while writes are scheduled */ if (!fd_motor_on(drive)) { - end_request(CURRENT, 0); + __blk_end_request_cur(CURRENT, -EIO); goto repeat; } /* @@ -1410,7 +1410,7 @@ static void redo_fd_request(void) CURRENT->nr_sectors -= CURRENT->current_nr_sectors; CURRENT->sector += CURRENT->current_nr_sectors; - end_request(CURRENT, 1); + __blk_end_request_cur(CURRENT, 0); goto repeat; } diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 4234c11c1e4..44a8702136a 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -612,7 +612,7 @@ static void fd_error( void ) CURRENT->errors++; if (CURRENT->errors >= MAX_ERRORS) { printk(KERN_ERR "fd%d: too many errors.\n", SelectedDrive ); - end_request(CURRENT, 0); + __blk_end_request_cur(CURRENT, -EIO); } else if (CURRENT->errors == RECALIBRATE_ERRORS) { printk(KERN_WARNING "fd%d: recalibrating\n", SelectedDrive ); @@ -734,7 +734,7 @@ static void do_fd_action( int drive ) /* all sectors finished */ CURRENT->nr_sectors -= CURRENT->current_nr_sectors; CURRENT->sector += CURRENT->current_nr_sectors; - end_request(CURRENT, 1); + __blk_end_request_cur(CURRENT, 0); redo_fd_request(); return; } @@ -1141,7 +1141,7 @@ static void fd_rwsec_done1(int status) /* all sectors finished */ CURRENT->nr_sectors -= CURRENT->current_nr_sectors; CURRENT->sector += CURRENT->current_nr_sectors; - end_request(CURRENT, 1); + __blk_end_request_cur(CURRENT, 0); redo_fd_request(); } return; @@ -1414,7 +1414,7 @@ repeat: if (!UD.connected) { /* drive not connected */ printk(KERN_ERR "Unknown Device: fd%d\n", drive ); - end_request(CURRENT, 0); + __blk_end_request_cur(CURRENT, -EIO); goto repeat; } @@ -1430,12 +1430,12 @@ repeat: /* user supplied disk type */ if (--type >= NUM_DISK_MINORS) { printk(KERN_WARNING "fd%d: invalid disk format", drive ); - end_request(CURRENT, 0); + __blk_end_request_cur(CURRENT, -EIO); goto repeat; } if (minor2disktype[type].drive_types > DriveType) { printk(KERN_WARNING "fd%d: unsupported disk format", drive ); - end_request(CURRENT, 0); + __blk_end_request_cur(CURRENT, -EIO); goto repeat; } type = minor2disktype[type].index; @@ -1445,7 +1445,7 @@ repeat: } if (CURRENT->sector + 1 > UDT->blocks) { - end_request(CURRENT, 0); + __blk_end_request_cur(CURRENT, -EIO); goto repeat; } diff --git a/drivers/block/hd.c b/drivers/block/hd.c index baaa9e486e5..5cb300b81c6 100644 --- a/drivers/block/hd.c +++ b/drivers/block/hd.c @@ -410,7 +410,7 @@ static void bad_rw_intr(void) if (req != NULL) { struct hd_i_struct *disk = req->rq_disk->private_data; if (++req->errors >= MAX_ERRORS || (hd_error & BBD_ERR)) { - end_request(req, 0); + __blk_end_request_cur(req, -EIO); disk->special_op = disk->recalibrate = 1; } else if (req->errors % RESET_FREQ == 0) reset = 1; @@ -466,7 +466,7 @@ ok_to_read: req->buffer+512); #endif if (req->current_nr_sectors <= 0) - end_request(req, 1); + __blk_end_request_cur(req, 0); if (i > 0) { SET_HANDLER(&read_intr); return; @@ -505,7 +505,7 @@ ok_to_write: --req->current_nr_sectors; req->buffer += 512; if (!i || (req->bio && req->current_nr_sectors <= 0)) - end_request(req, 1); + __blk_end_request_cur(req, 0); if (i > 0) { SET_HANDLER(&write_intr); outsw(HD_DATA, req->buffer, 256); @@ -548,7 +548,7 @@ static void hd_times_out(unsigned long dummy) #ifdef DEBUG printk("%s: too many errors\n", name); #endif - end_request(CURRENT, 0); + __blk_end_request_cur(CURRENT, -EIO); } hd_request(); spin_unlock_irq(hd_queue->queue_lock); @@ -563,7 +563,7 @@ static int do_special_op(struct hd_i_struct *disk, struct request *req) } if (disk->head > 16) { printk("%s: cannot handle device with more than 16 heads - giving up\n", req->rq_disk->disk_name); - end_request(req, 0); + __blk_end_request_cur(req, -EIO); } disk->special_op = 0; return 1; @@ -607,7 +607,7 @@ repeat: ((block+nsect) > get_capacity(req->rq_disk))) { printk("%s: bad access: block=%d, count=%d\n", req->rq_disk->disk_name, block, nsect); - end_request(req, 0); + __blk_end_request_cur(req, -EIO); goto repeat; } @@ -647,7 +647,7 @@ repeat: break; default: printk("unknown hd-command\n"); - end_request(req, 0); + __blk_end_request_cur(req, -EIO); break; } } diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c index f3898353d0a..408c2bd8a43 100644 --- a/drivers/block/mg_disk.c +++ b/drivers/block/mg_disk.c @@ -285,7 +285,7 @@ static void mg_bad_rw_intr(struct mg_host *host) if (req != NULL) if (++req->errors >= MG_MAX_ERRORS || host->error == MG_ERR_TIMEOUT) - end_request(req, 0); + __blk_end_request_cur(req, -EIO); } static unsigned int mg_out(struct mg_host *host, @@ -351,7 +351,7 @@ static void mg_read(struct request *req) if (req->current_nr_sectors <= 0) { MG_DBG("remain : %d sects\n", remains); - end_request(req, 1); + __blk_end_request_cur(req, 0); if (remains > 0) req = elv_next_request(host->breq); } @@ -395,7 +395,7 @@ static void mg_write(struct request *req) if (req->current_nr_sectors <= 0) { MG_DBG("remain : %d sects\n", remains); - end_request(req, 1); + __blk_end_request_cur(req, 0); if (remains > 0) req = elv_next_request(host->breq); } @@ -448,7 +448,7 @@ ok_to_read: /* let know if current segment done */ if (req->current_nr_sectors <= 0) - end_request(req, 1); + __blk_end_request_cur(req, 0); /* set handler if read remains */ if (i > 0) { @@ -497,7 +497,7 @@ ok_to_write: /* let know if current segment or all done */ if (!i || (req->bio && req->current_nr_sectors <= 0)) - end_request(req, 1); + __blk_end_request_cur(req, 0); /* write 1 sector and set handler if remains */ if (i > 0) { @@ -563,7 +563,7 @@ static void mg_request_poll(struct request_queue *q) default: printk(KERN_WARNING "%s:%d unknown command\n", __func__, __LINE__); - end_request(req, 0); + __blk_end_request_cur(req, -EIO); break; } } @@ -617,7 +617,7 @@ static unsigned int mg_issue_req(struct request *req, default: printk(KERN_WARNING "%s:%d unknown command\n", __func__, __LINE__); - end_request(req, 0); + __blk_end_request_cur(req, -EIO); break; } return MG_ERR_NONE; @@ -655,7 +655,7 @@ static void mg_request(struct request_queue *q) "%s: bad access: sector=%d, count=%d\n", req->rq_disk->disk_name, sect_num, sect_cnt); - end_request(req, 0); + __blk_end_request_cur(req, -EIO); continue; } diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index e91d4b4b014..9fd57c2aa46 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -735,16 +735,16 @@ static void do_pcd_request(struct request_queue * q) ps_set_intr(do_pcd_read, NULL, 0, nice); return; } else - end_request(pcd_req, 0); + __blk_end_request_cur(pcd_req, -EIO); } } -static inline void next_request(int success) +static inline void next_request(int err) { unsigned long saved_flags; spin_lock_irqsave(&pcd_lock, saved_flags); - end_request(pcd_req, success); + __blk_end_request_cur(pcd_req, err); pcd_busy = 0; do_pcd_request(pcd_queue); spin_unlock_irqrestore(&pcd_lock, saved_flags); @@ -781,7 +781,7 @@ static void pcd_start(void) if (pcd_command(pcd_current, rd_cmd, 2048, "read block")) { pcd_bufblk = -1; - next_request(0); + next_request(-EIO); return; } @@ -796,7 +796,7 @@ static void do_pcd_read(void) pcd_retries = 0; pcd_transfer(); if (!pcd_count) { - next_request(1); + next_request(0); return; } @@ -815,7 +815,7 @@ static void do_pcd_read_drq(void) return; } pcd_bufblk = -1; - next_request(0); + next_request(-EIO); return; } diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 9299455b0af..0732df4e901 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -410,7 +410,8 @@ static void run_fsm(void) pd_claimed = 0; phase = NULL; spin_lock_irqsave(&pd_lock, saved_flags); - end_request(pd_req, res); + __blk_end_request_cur(pd_req, + res == Ok ? 0 : -EIO); pd_req = elv_next_request(pd_queue); if (!pd_req) stop = 1; @@ -477,7 +478,7 @@ static int pd_next_buf(void) if (pd_count) return 0; spin_lock_irqsave(&pd_lock, saved_flags); - end_request(pd_req, 1); + __blk_end_request_cur(pd_req, 0); pd_count = pd_req->current_nr_sectors; pd_buf = pd_req->buffer; spin_unlock_irqrestore(&pd_lock, saved_flags); diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index bef3b997ba3..3871e3586d6 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c @@ -750,10 +750,10 @@ static int pf_ready(void) static struct request_queue *pf_queue; -static void pf_end_request(int uptodate) +static void pf_end_request(int err) { if (pf_req) { - end_request(pf_req, uptodate); + __blk_end_request_cur(pf_req, err); pf_req = NULL; } } @@ -773,7 +773,7 @@ repeat: pf_count = pf_req->current_nr_sectors; if (pf_block + pf_count > get_capacity(pf_req->rq_disk)) { - pf_end_request(0); + pf_end_request(-EIO); goto repeat; } @@ -788,7 +788,7 @@ repeat: pi_do_claimed(pf_current->pi, do_pf_write); else { pf_busy = 0; - pf_end_request(0); + pf_end_request(-EIO); goto repeat; } } @@ -805,7 +805,7 @@ static int pf_next_buf(void) return 1; if (!pf_count) { spin_lock_irqsave(&pf_spin_lock, saved_flags); - pf_end_request(1); + pf_end_request(0); pf_req = elv_next_request(pf_queue); spin_unlock_irqrestore(&pf_spin_lock, saved_flags); if (!pf_req) @@ -816,12 +816,12 @@ static int pf_next_buf(void) return 0; } -static inline void next_request(int success) +static inline void next_request(int err) { unsigned long saved_flags; spin_lock_irqsave(&pf_spin_lock, saved_flags); - pf_end_request(success); + pf_end_request(err); pf_busy = 0; do_pf_request(pf_queue); spin_unlock_irqrestore(&pf_spin_lock, saved_flags); @@ -844,7 +844,7 @@ static void do_pf_read_start(void) pi_do_claimed(pf_current->pi, do_pf_read_start); return; } - next_request(0); + next_request(-EIO); return; } pf_mask = STAT_DRQ; @@ -863,7 +863,7 @@ static void do_pf_read_drq(void) pi_do_claimed(pf_current->pi, do_pf_read_start); return; } - next_request(0); + next_request(-EIO); return; } pi_read_block(pf_current->pi, pf_buf, 512); @@ -871,7 +871,7 @@ static void do_pf_read_drq(void) break; } pi_disconnect(pf_current->pi); - next_request(1); + next_request(0); } static void do_pf_write(void) @@ -890,7 +890,7 @@ static void do_pf_write_start(void) pi_do_claimed(pf_current->pi, do_pf_write_start); return; } - next_request(0); + next_request(-EIO); return; } @@ -903,7 +903,7 @@ static void do_pf_write_start(void) pi_do_claimed(pf_current->pi, do_pf_write_start); return; } - next_request(0); + next_request(-EIO); return; } pi_write_block(pf_current->pi, pf_buf, 512); @@ -923,11 +923,11 @@ static void do_pf_write_done(void) pi_do_claimed(pf_current->pi, do_pf_write_start); return; } - next_request(0); + next_request(-EIO); return; } pi_disconnect(pf_current->pi); - next_request(1); + next_request(0); } static int __init pf_init(void) diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index bccc42bb921..d23b54bc2f5 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -158,7 +158,7 @@ static int ps3disk_submit_request_sg(struct ps3_storage_device *dev, if (res) { dev_err(&dev->sbd.core, "%s:%u: %s failed %d\n", __func__, __LINE__, op, res); - end_request(req, 0); + __blk_end_request_cur(req, -EIO); return 0; } @@ -180,7 +180,7 @@ static int ps3disk_submit_flush_request(struct ps3_storage_device *dev, if (res) { dev_err(&dev->sbd.core, "%s:%u: sync cache failed 0x%llx\n", __func__, __LINE__, res); - end_request(req, 0); + __blk_end_request_cur(req, -EIO); return 0; } @@ -205,7 +205,7 @@ static void ps3disk_do_request(struct ps3_storage_device *dev, break; } else { blk_dump_rq_flags(req, DEVICE_NAME " bad request"); - end_request(req, 0); + __blk_end_request_cur(req, -EIO); continue; } } diff --git a/drivers/block/swim.c b/drivers/block/swim.c index d22cc385693..6544a7b06bf 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -532,39 +532,39 @@ static void redo_fd_request(struct request_queue *q) fs = req->rq_disk->private_data; if (req->sector < 0 || req->sector >= fs->total_secs) { - end_request(req, 0); + __blk_end_request_cur(req, -EIO); continue; } if (req->current_nr_sectors == 0) { - end_request(req, 1); + __blk_end_request_cur(req, 0); continue; } if (!fs->disk_in) { - end_request(req, 0); + __blk_end_request_cur(req, -EIO); continue; } if (rq_data_dir(req) == WRITE) { if (fs->write_protected) { - end_request(req, 0); + __blk_end_request_cur(req, -EIO); continue; } } switch (rq_data_dir(req)) { case WRITE: /* NOT IMPLEMENTED */ - end_request(req, 0); + __blk_end_request_cur(req, -EIO); break; case READ: if (floppy_read_sectors(fs, req->sector, req->current_nr_sectors, req->buffer)) { - end_request(req, 0); + __blk_end_request_cur(req, -EIO); continue; } req->nr_sectors -= req->current_nr_sectors; req->sector += req->current_nr_sectors; req->buffer += req->current_nr_sectors * 512; - end_request(req, 1); + __blk_end_request_cur(req, 0); break; } } diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 612965307ba..5904f7b73c6 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -320,15 +320,15 @@ static void start_request(struct floppy_state *fs) #endif if (req->sector < 0 || req->sector >= fs->total_secs) { - end_request(req, 0); + __blk_end_request_cur(req, -EIO); continue; } if (req->current_nr_sectors == 0) { - end_request(req, 1); + __blk_end_request_cur(req, 0); continue; } if (fs->ejected) { - end_request(req, 0); + __blk_end_request_cur(req, -EIO); continue; } @@ -336,7 +336,7 @@ static void start_request(struct floppy_state *fs) if (fs->write_prot < 0) fs->write_prot = swim3_readbit(fs, WRITE_PROT); if (fs->write_prot) { - end_request(req, 0); + __blk_end_request_cur(req, -EIO); continue; } } @@ -508,7 +508,7 @@ static void act(struct floppy_state *fs) case do_transfer: if (fs->cur_cyl != fs->req_cyl) { if (fs->retries > 5) { - end_request(fd_req, 0); + __blk_end_request_cur(fd_req, -EIO); fs->state = idle; return; } @@ -540,7 +540,7 @@ static void scan_timeout(unsigned long data) out_8(&sw->intr_enable, 0); fs->cur_cyl = -1; if (fs->retries > 5) { - end_request(fd_req, 0); + __blk_end_request_cur(fd_req, -EIO); fs->state = idle; start_request(fs); } else { @@ -559,7 +559,7 @@ static void seek_timeout(unsigned long data) out_8(&sw->select, RELAX); out_8(&sw->intr_enable, 0); printk(KERN_ERR "swim3: seek timeout\n"); - end_request(fd_req, 0); + __blk_end_request_cur(fd_req, -EIO); fs->state = idle; start_request(fs); } @@ -583,7 +583,7 @@ static void settle_timeout(unsigned long data) return; } printk(KERN_ERR "swim3: seek settle timeout\n"); - end_request(fd_req, 0); + __blk_end_request_cur(fd_req, -EIO); fs->state = idle; start_request(fs); } @@ -615,7 +615,7 @@ static void xfer_timeout(unsigned long data) fd_req->current_nr_sectors -= s; printk(KERN_ERR "swim3: timeout %sing sector %ld\n", (rq_data_dir(fd_req)==WRITE? "writ": "read"), (long)fd_req->sector); - end_request(fd_req, 0); + __blk_end_request_cur(fd_req, -EIO); fs->state = idle; start_request(fs); } @@ -646,7 +646,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id) printk(KERN_ERR "swim3: seen sector but cyl=ff?\n"); fs->cur_cyl = -1; if (fs->retries > 5) { - end_request(fd_req, 0); + __blk_end_request_cur(fd_req, -EIO); fs->state = idle; start_request(fs); } else { @@ -731,7 +731,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id) printk("swim3: error %sing block %ld (err=%x)\n", rq_data_dir(fd_req) == WRITE? "writ": "read", (long)fd_req->sector, err); - end_request(fd_req, 0); + __blk_end_request_cur(fd_req, -EIO); fs->state = idle; } } else { @@ -740,7 +740,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id) printk(KERN_ERR "swim3: fd dma: stat=%x resid=%d\n", stat, resid); printk(KERN_ERR " state=%d, dir=%x, intr=%x, err=%x\n", fs->state, rq_data_dir(fd_req), intr, err); - end_request(fd_req, 0); + __blk_end_request_cur(fd_req, -EIO); fs->state = idle; start_request(fs); break; @@ -749,7 +749,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id) fd_req->current_nr_sectors -= fs->scount; fd_req->buffer += fs->scount * 512; if (fd_req->current_nr_sectors <= 0) { - end_request(fd_req, 1); + __blk_end_request_cur(fd_req, 0); fs->state = idle; } else { fs->req_sector += fs->scount; diff --git a/drivers/block/xd.c b/drivers/block/xd.c index 64b496fce98..6f6ad82ec0c 100644 --- a/drivers/block/xd.c +++ b/drivers/block/xd.c @@ -314,21 +314,22 @@ static void do_xd_request (struct request_queue * q) int retry; if (!blk_fs_request(req)) { - end_request(req, 0); + __blk_end_request_cur(req, -EIO); continue; } if (block + count > get_capacity(req->rq_disk)) { - end_request(req, 0); + __blk_end_request_cur(req, -EIO); continue; } if (rw != READ && rw != WRITE) { printk("do_xd_request: unknown request\n"); - end_request(req, 0); + __blk_end_request_cur(req, -EIO); continue; } for (retry = 0; (retry < XD_RETRIES) && !res; retry++) res = xd_readwrite(rw, disk, req->buffer, block, count); - end_request(req, res); /* wrap up, 0 = fail, 1 = success */ + /* wrap up, 0 = success, -errno = fail */ + __blk_end_request_cur(req, res); } } @@ -418,7 +419,7 @@ static int xd_readwrite (u_char operation,XD_INFO *p,char *buffer,u_int block,u_ printk("xd%c: %s timeout, recalibrating drive\n",'a'+drive,(operation == READ ? "read" : "write")); xd_recalibrate(drive); spin_lock_irq(&xd_lock); - return (0); + return -EIO; case 2: if (sense[0] & 0x30) { printk("xd%c: %s - ",'a'+drive,(operation == READ ? "reading" : "writing")); @@ -439,7 +440,7 @@ static int xd_readwrite (u_char operation,XD_INFO *p,char *buffer,u_int block,u_ else printk(" - no valid disk address\n"); spin_lock_irq(&xd_lock); - return (0); + return -EIO; } if (xd_dma_buffer) for (i=0; i < (temp * 0x200); i++) @@ -448,7 +449,7 @@ static int xd_readwrite (u_char operation,XD_INFO *p,char *buffer,u_int block,u_ count -= temp, buffer += temp * 0x200, block += temp; } spin_lock_irq(&xd_lock); - return (1); + return 0; } /* xd_recalibrate: recalibrate a given drive and reset controller if necessary */ diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index cd6cfe3b51e..b4564479f64 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -302,7 +302,7 @@ static void do_blkif_request(struct request_queue *rq) while ((req = elv_next_request(rq)) != NULL) { info = req->rq_disk->private_data; if (!blk_fs_request(req)) { - end_request(req, 0); + __blk_end_request_cur(req, -EIO); continue; } diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c index 4aecf5dc6a9..b1e1d7e5ab1 100644 --- a/drivers/block/xsysace.c +++ b/drivers/block/xsysace.c @@ -466,7 +466,7 @@ struct request *ace_get_next_request(struct request_queue * q) while ((req = elv_next_request(q)) != NULL) { if (blk_fs_request(req)) break; - end_request(req, 0); + __blk_end_request_cur(req, -EIO); } return req; } @@ -494,7 +494,7 @@ static void ace_fsm_dostate(struct ace_device *ace) /* Drop all pending requests */ while ((req = elv_next_request(ace->queue)) != NULL) - end_request(req, 0); + __blk_end_request_cur(req, -EIO); /* Drop back to IDLE state and notify waiters */ ace->fsm_state = ACE_FSM_STATE_IDLE; diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c index 80754cdd311..b66ad58a3c3 100644 --- a/drivers/block/z2ram.c +++ b/drivers/block/z2ram.c @@ -77,7 +77,7 @@ static void do_z2_request(struct request_queue *q) if (start + len > z2ram_size) { printk( KERN_ERR DEVICE_NAME ": bad access: block=%lu, count=%u\n", req->sector, req->current_nr_sectors); - end_request(req, 0); + __blk_end_request_cur(req, -EIO); continue; } while (len) { @@ -93,7 +93,7 @@ static void do_z2_request(struct request_queue *q) start += size; len -= size; } - end_request(req, 1); + __blk_end_request_cur(req, 0); } } diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index fee9a9e83fc..cab2b1fb2fe 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -654,17 +654,17 @@ static void gdrom_request(struct request_queue *rq) while ((req = elv_next_request(rq)) != NULL) { if (!blk_fs_request(req)) { printk(KERN_DEBUG "GDROM: Non-fs request ignored\n"); - end_request(req, 0); + __blk_end_request_cur(req, -EIO); } if (rq_data_dir(req) != READ) { printk(KERN_NOTICE "GDROM: Read only device -"); printk(" write request ignored\n"); - end_request(req, 0); + __blk_end_request_cur(req, -EIO); } if (req->nr_sectors) gdrom_request_handler_dma(req); else - end_request(req, 0); + __blk_end_request_cur(req, -EIO); } } diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c index a443e136dc4..221317e6a00 100644 --- a/drivers/message/i2o/i2o_block.c +++ b/drivers/message/i2o/i2o_block.c @@ -923,7 +923,7 @@ static void i2o_block_request_fn(struct request_queue *q) break; } } else - end_request(req, 0); + __blk_end_request_cur(req, -EIO); } }; diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index a49a9c8f2cb..76c4c8d1307 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -54,33 +54,33 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr, if (req->cmd_type == REQ_TYPE_LINUX_BLOCK && req->cmd[0] == REQ_LB_OP_DISCARD) - return !tr->discard(dev, block, nsect); + return tr->discard(dev, block, nsect); if (!blk_fs_request(req)) - return 0; + return -EIO; if (req->sector + req->current_nr_sectors > get_capacity(req->rq_disk)) - return 0; + return -EIO; switch(rq_data_dir(req)) { case READ: for (; nsect > 0; nsect--, block++, buf += tr->blksize) if (tr->readsect(dev, block, buf)) - return 0; - return 1; + return -EIO; + return 0; case WRITE: if (!tr->writesect) - return 0; + return -EIO; for (; nsect > 0; nsect--, block++, buf += tr->blksize) if (tr->writesect(dev, block, buf)) - return 0; - return 1; + return -EIO; + return 0; default: printk(KERN_NOTICE "Unknown request %u\n", rq_data_dir(req)); - return 0; + return -EIO; } } @@ -96,7 +96,7 @@ static int mtd_blktrans_thread(void *arg) while (!kthread_should_stop()) { struct request *req; struct mtd_blktrans_dev *dev; - int res = 0; + int res; req = elv_next_request(rq); @@ -119,7 +119,7 @@ static int mtd_blktrans_thread(void *arg) spin_lock_irq(rq->queue_lock); - end_request(req, res); + __blk_end_request_cur(req, res); } spin_unlock_irq(rq->queue_lock); diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c index a85ad05e854..09617884a50 100644 --- a/drivers/sbus/char/jsflash.c +++ b/drivers/sbus/char/jsflash.c @@ -192,25 +192,25 @@ static void jsfd_do_request(struct request_queue *q) size_t len = req->current_nr_sectors << 9; if ((offset + len) > jdp->dsize) { - end_request(req, 0); + __blk_end_request_cur(req, -EIO); continue; } if (rq_data_dir(req) != READ) { printk(KERN_ERR "jsfd: write\n"); - end_request(req, 0); + __blk_end_request_cur(req, -EIO); continue; } if ((jdp->dbase & 0xff000000) != 0x20000000) { printk(KERN_ERR "jsfd: bad base %x\n", (int)jdp->dbase); - end_request(req, 0); + __blk_end_request_cur(req, -EIO); continue; } jsfd_read(req->buffer, jdp->dbase + offset, len); - end_request(req, 1); + __blk_end_request_cur(req, 0); } } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e33c8356b3d..cfeb3c2feb2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -845,9 +845,8 @@ extern unsigned int blk_rq_cur_bytes(struct request *rq); * blk_update_request() completes given number of bytes and updates * the request without completing it. * - * blk_end_request() and friends. __blk_end_request() and - * end_request() must be called with the request queue spinlock - * acquired. + * blk_end_request() and friends. __blk_end_request() must be called + * with the request queue spinlock acquired. * * Several drivers define their own end_request and call * blk_end_request() for parts of the original function. @@ -898,6 +897,19 @@ static inline void blk_end_request_all(struct request *rq, int error) BUG_ON(pending); } +/** + * blk_end_request_cur - Helper function to finish the current request chunk. + * @rq: the request to finish the current chunk for + * @err: %0 for success, < %0 for error + * + * Description: + * Complete the current consecutively mapped chunk from @rq. + */ +static inline void blk_end_request_cur(struct request *rq, int error) +{ + blk_end_request(rq, error, rq->hard_cur_sectors << 9); +} + /** * __blk_end_request - Helper function for drivers to complete the request. * @rq: the request being processed @@ -934,29 +946,17 @@ static inline void __blk_end_request_all(struct request *rq, int error) } /** - * end_request - end I/O on the current segment of the request - * @rq: the request being processed - * @uptodate: error value or %0/%1 uptodate flag + * __blk_end_request_cur - Helper function to finish the current request chunk. + * @rq: the request to finish the current chunk for + * @err: %0 for success, < %0 for error * * Description: - * Ends I/O on the current segment of a request. If that is the only - * remaining segment, the request is also completed and freed. - * - * This is a remnant of how older block drivers handled I/O completions. - * Modern drivers typically end I/O on the full request in one go, unless - * they have a residual value to account for. For that case this function - * isn't really useful, unless the residual just happens to be the - * full current segment. In other words, don't use this function in new - * code. Use blk_end_request() or __blk_end_request() to end a request. - **/ -static inline void end_request(struct request *rq, int uptodate) + * Complete the current consecutively mapped chunk from @rq. Must + * be called with queue lock held. + */ +static inline void __blk_end_request_cur(struct request *rq, int error) { - int error = 0; - - if (uptodate <= 0) - error = uptodate ? uptodate : -EIO; - - __blk_end_bidi_request(rq, error, rq->hard_cur_sectors << 9, 0); + __blk_end_request(rq, error, rq->hard_cur_sectors << 9); } extern void blk_complete_request(struct request *); -- cgit v1.2.3-70-g09d2 From 731ec497e5888c6792ad62613ae9be97eebcd7ca Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 23 Apr 2009 11:05:20 +0900 Subject: block: kill rq->data Now that all block request data transfer is done via bio, rq->data isn't used. Kill it. While at it, make the roles of rq->special and buffer clear. [ Impact: drop now unncessary field from struct request ] Signed-off-by: Tejun Heo Cc: Boaz Harrosh --- block/blk-core.c | 5 ++--- block/blk-map.c | 6 +++--- block/scsi_ioctl.c | 1 - drivers/scsi/scsi_lib.c | 1 - include/linux/blkdev.h | 5 ++--- 5 files changed, 7 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index 0520cc70458..6dd180cf15d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -189,10 +189,9 @@ void blk_dump_rq_flags(struct request *rq, char *msg) (unsigned long long)rq->sector, rq->nr_sectors, rq->current_nr_sectors); - printk(KERN_INFO " bio %p, biotail %p, buffer %p, data %p, len %u\n", + printk(KERN_INFO " bio %p, biotail %p, buffer %p, len %u\n", rq->bio, rq->biotail, - rq->buffer, rq->data, - rq->data_len); + rq->buffer, rq->data_len); if (blk_pc_request(rq)) { printk(KERN_INFO " cdb: "); diff --git a/block/blk-map.c b/block/blk-map.c index f103729b462..694fefad34e 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -156,7 +156,7 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq, if (!bio_flagged(bio, BIO_USER_MAPPED)) rq->cmd_flags |= REQ_COPY_USER; - rq->buffer = rq->data = NULL; + rq->buffer = NULL; return 0; unmap_rq: blk_rq_unmap_user(bio); @@ -235,7 +235,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, blk_queue_bounce(q, &bio); bio_get(bio); blk_rq_bio_prep(q, rq, bio); - rq->buffer = rq->data = NULL; + rq->buffer = NULL; return 0; } EXPORT_SYMBOL(blk_rq_map_user_iov); @@ -313,7 +313,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, blk_rq_bio_prep(q, rq, bio); blk_queue_bounce(q, &rq->bio); - rq->buffer = rq->data = NULL; + rq->buffer = NULL; return 0; } EXPORT_SYMBOL(blk_rq_map_kern); diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 82a0ca2f672..bb9aa43551b 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -500,7 +500,6 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk, rq = blk_get_request(q, WRITE, __GFP_WAIT); rq->cmd_type = REQ_TYPE_BLOCK_PC; - rq->data = NULL; rq->data_len = 0; rq->extra_len = 0; rq->timeout = BLK_DEFAULT_SG_TIMEOUT; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 756ac7c93de..aa9fc572e45 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1088,7 +1088,6 @@ int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req) return ret; } else { BUG_ON(req->data_len); - BUG_ON(req->data); memset(&cmd->sdb, 0, sizeof(cmd->sdb)); req->buffer = NULL; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index cfeb3c2feb2..12c545e2737 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -211,8 +211,8 @@ struct request { unsigned short ioprio; - void *special; - char *buffer; + void *special; /* opaque pointer available for LLD use */ + char *buffer; /* kaddr of the current segment if available */ int tag; int errors; @@ -229,7 +229,6 @@ struct request { unsigned int data_len; unsigned int extra_len; /* length of alignment and padding */ unsigned int sense_len; - void *data; void *sense; unsigned long deadline; -- cgit v1.2.3-70-g09d2 From 9fd8d0e1bcb848257968d9a7d73ca4d890ea8bd1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 28 Apr 2009 13:06:04 +0900 Subject: block: make blk_end_request_cur() return bool In the process of mindlessly copying [__]blk_end_request_all(), [__]blk_end_request_cur() ended up returning void even though they're partial completion functions. Fix it. [ Impact: fix braindead API ] Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 12c545e2737..3a5b1bd6582 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -903,10 +903,14 @@ static inline void blk_end_request_all(struct request *rq, int error) * * Description: * Complete the current consecutively mapped chunk from @rq. + * + * Return: + * %false - we are done with this request + * %true - still buffers pending for this request */ -static inline void blk_end_request_cur(struct request *rq, int error) +static inline bool blk_end_request_cur(struct request *rq, int error) { - blk_end_request(rq, error, rq->hard_cur_sectors << 9); + return blk_end_request(rq, error, rq->hard_cur_sectors << 9); } /** @@ -952,10 +956,14 @@ static inline void __blk_end_request_all(struct request *rq, int error) * Description: * Complete the current consecutively mapped chunk from @rq. Must * be called with queue lock held. + * + * Return: + * %false - we are done with this request + * %true - still buffers pending for this request */ -static inline void __blk_end_request_cur(struct request *rq, int error) +static inline bool __blk_end_request_cur(struct request *rq, int error) { - __blk_end_request(rq, error, rq->hard_cur_sectors << 9); + return __blk_end_request(rq, error, rq->hard_cur_sectors << 9); } extern void blk_complete_request(struct request *); -- cgit v1.2.3-70-g09d2 From eec9462088a26c046d4db3100796a340a50890b8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 28 Apr 2009 13:06:14 +0900 Subject: mg_disk: fold mg_disk.h into mg_disk.c include/linux/mg_disk.h is used only by drivers/block/mg_disk.c. No reason to put it in a separate header. Fold it into mg_disk.c. [ Impact: cleanup ] Signed-off-by: Tejun Heo Cc: unsik Kim Signed-off-by: Jens Axboe --- drivers/block/mg_disk.c | 186 ++++++++++++++++++++++++++++++++++++++++++- include/linux/mg_disk.h | 206 ------------------------------------------------ 2 files changed, 185 insertions(+), 207 deletions(-) delete mode 100644 include/linux/mg_disk.h (limited to 'include') diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c index 2c00ad90cd6..2c6127ee834 100644 --- a/drivers/block/mg_disk.c +++ b/drivers/block/mg_disk.c @@ -22,10 +22,194 @@ #include #include #include -#include #define MG_RES_SEC (CONFIG_MG_DISK_RES << 1) +/* name for block device */ +#define MG_DISK_NAME "mgd" +/* name for platform device */ +#define MG_DEV_NAME "mg_disk" + +#define MG_DISK_MAJ 0 +#define MG_DISK_MAX_PART 16 +#define MG_SECTOR_SIZE 512 +#define MG_MAX_SECTS 256 + +/* Register offsets */ +#define MG_BUFF_OFFSET 0x8000 +#define MG_STORAGE_BUFFER_SIZE 0x200 +#define MG_REG_OFFSET 0xC000 +#define MG_REG_FEATURE (MG_REG_OFFSET + 2) /* write case */ +#define MG_REG_ERROR (MG_REG_OFFSET + 2) /* read case */ +#define MG_REG_SECT_CNT (MG_REG_OFFSET + 4) +#define MG_REG_SECT_NUM (MG_REG_OFFSET + 6) +#define MG_REG_CYL_LOW (MG_REG_OFFSET + 8) +#define MG_REG_CYL_HIGH (MG_REG_OFFSET + 0xA) +#define MG_REG_DRV_HEAD (MG_REG_OFFSET + 0xC) +#define MG_REG_COMMAND (MG_REG_OFFSET + 0xE) /* write case */ +#define MG_REG_STATUS (MG_REG_OFFSET + 0xE) /* read case */ +#define MG_REG_DRV_CTRL (MG_REG_OFFSET + 0x10) +#define MG_REG_BURST_CTRL (MG_REG_OFFSET + 0x12) + +/* "Drive Select/Head Register" bit values */ +#define MG_REG_HEAD_MUST_BE_ON 0xA0 /* These 2 bits are always on */ +#define MG_REG_HEAD_DRIVE_MASTER (0x00 | MG_REG_HEAD_MUST_BE_ON) +#define MG_REG_HEAD_DRIVE_SLAVE (0x10 | MG_REG_HEAD_MUST_BE_ON) +#define MG_REG_HEAD_LBA_MODE (0x40 | MG_REG_HEAD_MUST_BE_ON) + + +/* "Device Control Register" bit values */ +#define MG_REG_CTRL_INTR_ENABLE 0x0 +#define MG_REG_CTRL_INTR_DISABLE (0x1<<1) +#define MG_REG_CTRL_RESET (0x1<<2) +#define MG_REG_CTRL_INTR_POLA_ACTIVE_HIGH 0x0 +#define MG_REG_CTRL_INTR_POLA_ACTIVE_LOW (0x1<<4) +#define MG_REG_CTRL_DPD_POLA_ACTIVE_LOW 0x0 +#define MG_REG_CTRL_DPD_POLA_ACTIVE_HIGH (0x1<<5) +#define MG_REG_CTRL_DPD_DISABLE 0x0 +#define MG_REG_CTRL_DPD_ENABLE (0x1<<6) + +/* Status register bit */ +/* error bit in status register */ +#define MG_REG_STATUS_BIT_ERROR 0x01 +/* corrected error in status register */ +#define MG_REG_STATUS_BIT_CORRECTED_ERROR 0x04 +/* data request bit in status register */ +#define MG_REG_STATUS_BIT_DATA_REQ 0x08 +/* DSC - Drive Seek Complete */ +#define MG_REG_STATUS_BIT_SEEK_DONE 0x10 +/* DWF - Drive Write Fault */ +#define MG_REG_STATUS_BIT_WRITE_FAULT 0x20 +#define MG_REG_STATUS_BIT_READY 0x40 +#define MG_REG_STATUS_BIT_BUSY 0x80 + +/* handy status */ +#define MG_STAT_READY (MG_REG_STATUS_BIT_READY | MG_REG_STATUS_BIT_SEEK_DONE) +#define MG_READY_OK(s) (((s) & (MG_STAT_READY | \ + (MG_REG_STATUS_BIT_BUSY | \ + MG_REG_STATUS_BIT_WRITE_FAULT | \ + MG_REG_STATUS_BIT_ERROR))) == MG_STAT_READY) + +/* Error register */ +#define MG_REG_ERR_AMNF 0x01 +#define MG_REG_ERR_ABRT 0x04 +#define MG_REG_ERR_IDNF 0x10 +#define MG_REG_ERR_UNC 0x40 +#define MG_REG_ERR_BBK 0x80 + +/* error code for others */ +#define MG_ERR_NONE 0 +#define MG_ERR_TIMEOUT 0x100 +#define MG_ERR_INIT_STAT 0x101 +#define MG_ERR_TRANSLATION 0x102 +#define MG_ERR_CTRL_RST 0x103 +#define MG_ERR_INV_STAT 0x104 +#define MG_ERR_RSTOUT 0x105 + +#define MG_MAX_ERRORS 6 /* Max read/write errors */ + +/* command */ +#define MG_CMD_RD 0x20 +#define MG_CMD_WR 0x30 +#define MG_CMD_SLEEP 0x99 +#define MG_CMD_WAKEUP 0xC3 +#define MG_CMD_ID 0xEC +#define MG_CMD_WR_CONF 0x3C +#define MG_CMD_RD_CONF 0x40 + +/* operation mode */ +#define MG_OP_CASCADE (1 << 0) +#define MG_OP_CASCADE_SYNC_RD (1 << 1) +#define MG_OP_CASCADE_SYNC_WR (1 << 2) +#define MG_OP_INTERLEAVE (1 << 3) + +/* synchronous */ +#define MG_BURST_LAT_4 (3 << 4) +#define MG_BURST_LAT_5 (4 << 4) +#define MG_BURST_LAT_6 (5 << 4) +#define MG_BURST_LAT_7 (6 << 4) +#define MG_BURST_LAT_8 (7 << 4) +#define MG_BURST_LEN_4 (1 << 1) +#define MG_BURST_LEN_8 (2 << 1) +#define MG_BURST_LEN_16 (3 << 1) +#define MG_BURST_LEN_32 (4 << 1) +#define MG_BURST_LEN_CONT (0 << 1) + +/* timeout value (unit: ms) */ +#define MG_TMAX_CONF_TO_CMD 1 +#define MG_TMAX_WAIT_RD_DRQ 10 +#define MG_TMAX_WAIT_WR_DRQ 500 +#define MG_TMAX_RST_TO_BUSY 10 +#define MG_TMAX_HDRST_TO_RDY 500 +#define MG_TMAX_SWRST_TO_RDY 500 +#define MG_TMAX_RSTOUT 3000 + +/* device attribution */ +/* use mflash as boot device */ +#define MG_BOOT_DEV (1 << 0) +/* use mflash as storage device */ +#define MG_STORAGE_DEV (1 << 1) +/* same as MG_STORAGE_DEV, but bootloader already done reset sequence */ +#define MG_STORAGE_DEV_SKIP_RST (1 << 2) + +#define MG_DEV_MASK (MG_BOOT_DEV | MG_STORAGE_DEV | MG_STORAGE_DEV_SKIP_RST) + +/* names of GPIO resource */ +#define MG_RST_PIN "mg_rst" +/* except MG_BOOT_DEV, reset-out pin should be assigned */ +#define MG_RSTOUT_PIN "mg_rstout" + +/* private driver data */ +struct mg_drv_data { + /* disk resource */ + u32 use_polling; + + /* device attribution */ + u32 dev_attr; + + /* internally used */ + struct mg_host *host; +}; + +/* main structure for mflash driver */ +struct mg_host { + struct device *dev; + + struct request_queue *breq; + spinlock_t lock; + struct gendisk *gd; + + struct timer_list timer; + void (*mg_do_intr) (struct mg_host *); + + u16 id[ATA_ID_WORDS]; + + u16 cyls; + u16 heads; + u16 sectors; + u32 n_sectors; + u32 nres_sectors; + + void __iomem *dev_base; + unsigned int irq; + unsigned int rst; + unsigned int rstout; + + u32 major; + u32 error; +}; + +/* + * Debugging macro and defines + */ +#undef DO_MG_DEBUG +#ifdef DO_MG_DEBUG +# define MG_DBG(fmt, args...) \ + printk(KERN_DEBUG "%s:%d "fmt, __func__, __LINE__, ##args) +#else /* CONFIG_MG_DEBUG */ +# define MG_DBG(fmt, args...) do { } while (0) +#endif /* CONFIG_MG_DEBUG */ + static void mg_request(struct request_queue *); static void mg_dump_status(const char *msg, unsigned int stat, diff --git a/include/linux/mg_disk.h b/include/linux/mg_disk.h deleted file mode 100644 index 1f76b1ebf62..00000000000 --- a/include/linux/mg_disk.h +++ /dev/null @@ -1,206 +0,0 @@ -/* - * include/linux/mg_disk.c - * - * Support for the mGine m[g]flash IO mode. - * Based on legacy hd.c - * - * (c) 2008 mGine Co.,LTD - * (c) 2008 unsik Kim - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef __MG_DISK_H__ -#define __MG_DISK_H__ - -#include -#include - -/* name for block device */ -#define MG_DISK_NAME "mgd" -/* name for platform device */ -#define MG_DEV_NAME "mg_disk" - -#define MG_DISK_MAJ 0 -#define MG_DISK_MAX_PART 16 -#define MG_SECTOR_SIZE 512 -#define MG_MAX_SECTS 256 - -/* Register offsets */ -#define MG_BUFF_OFFSET 0x8000 -#define MG_STORAGE_BUFFER_SIZE 0x200 -#define MG_REG_OFFSET 0xC000 -#define MG_REG_FEATURE (MG_REG_OFFSET + 2) /* write case */ -#define MG_REG_ERROR (MG_REG_OFFSET + 2) /* read case */ -#define MG_REG_SECT_CNT (MG_REG_OFFSET + 4) -#define MG_REG_SECT_NUM (MG_REG_OFFSET + 6) -#define MG_REG_CYL_LOW (MG_REG_OFFSET + 8) -#define MG_REG_CYL_HIGH (MG_REG_OFFSET + 0xA) -#define MG_REG_DRV_HEAD (MG_REG_OFFSET + 0xC) -#define MG_REG_COMMAND (MG_REG_OFFSET + 0xE) /* write case */ -#define MG_REG_STATUS (MG_REG_OFFSET + 0xE) /* read case */ -#define MG_REG_DRV_CTRL (MG_REG_OFFSET + 0x10) -#define MG_REG_BURST_CTRL (MG_REG_OFFSET + 0x12) - -/* "Drive Select/Head Register" bit values */ -#define MG_REG_HEAD_MUST_BE_ON 0xA0 /* These 2 bits are always on */ -#define MG_REG_HEAD_DRIVE_MASTER (0x00 | MG_REG_HEAD_MUST_BE_ON) -#define MG_REG_HEAD_DRIVE_SLAVE (0x10 | MG_REG_HEAD_MUST_BE_ON) -#define MG_REG_HEAD_LBA_MODE (0x40 | MG_REG_HEAD_MUST_BE_ON) - - -/* "Device Control Register" bit values */ -#define MG_REG_CTRL_INTR_ENABLE 0x0 -#define MG_REG_CTRL_INTR_DISABLE (0x1<<1) -#define MG_REG_CTRL_RESET (0x1<<2) -#define MG_REG_CTRL_INTR_POLA_ACTIVE_HIGH 0x0 -#define MG_REG_CTRL_INTR_POLA_ACTIVE_LOW (0x1<<4) -#define MG_REG_CTRL_DPD_POLA_ACTIVE_LOW 0x0 -#define MG_REG_CTRL_DPD_POLA_ACTIVE_HIGH (0x1<<5) -#define MG_REG_CTRL_DPD_DISABLE 0x0 -#define MG_REG_CTRL_DPD_ENABLE (0x1<<6) - -/* Status register bit */ -/* error bit in status register */ -#define MG_REG_STATUS_BIT_ERROR 0x01 -/* corrected error in status register */ -#define MG_REG_STATUS_BIT_CORRECTED_ERROR 0x04 -/* data request bit in status register */ -#define MG_REG_STATUS_BIT_DATA_REQ 0x08 -/* DSC - Drive Seek Complete */ -#define MG_REG_STATUS_BIT_SEEK_DONE 0x10 -/* DWF - Drive Write Fault */ -#define MG_REG_STATUS_BIT_WRITE_FAULT 0x20 -#define MG_REG_STATUS_BIT_READY 0x40 -#define MG_REG_STATUS_BIT_BUSY 0x80 - -/* handy status */ -#define MG_STAT_READY (MG_REG_STATUS_BIT_READY | MG_REG_STATUS_BIT_SEEK_DONE) -#define MG_READY_OK(s) (((s) & (MG_STAT_READY | \ - (MG_REG_STATUS_BIT_BUSY | \ - MG_REG_STATUS_BIT_WRITE_FAULT | \ - MG_REG_STATUS_BIT_ERROR))) == MG_STAT_READY) - -/* Error register */ -#define MG_REG_ERR_AMNF 0x01 -#define MG_REG_ERR_ABRT 0x04 -#define MG_REG_ERR_IDNF 0x10 -#define MG_REG_ERR_UNC 0x40 -#define MG_REG_ERR_BBK 0x80 - -/* error code for others */ -#define MG_ERR_NONE 0 -#define MG_ERR_TIMEOUT 0x100 -#define MG_ERR_INIT_STAT 0x101 -#define MG_ERR_TRANSLATION 0x102 -#define MG_ERR_CTRL_RST 0x103 -#define MG_ERR_INV_STAT 0x104 -#define MG_ERR_RSTOUT 0x105 - -#define MG_MAX_ERRORS 6 /* Max read/write errors */ - -/* command */ -#define MG_CMD_RD 0x20 -#define MG_CMD_WR 0x30 -#define MG_CMD_SLEEP 0x99 -#define MG_CMD_WAKEUP 0xC3 -#define MG_CMD_ID 0xEC -#define MG_CMD_WR_CONF 0x3C -#define MG_CMD_RD_CONF 0x40 - -/* operation mode */ -#define MG_OP_CASCADE (1 << 0) -#define MG_OP_CASCADE_SYNC_RD (1 << 1) -#define MG_OP_CASCADE_SYNC_WR (1 << 2) -#define MG_OP_INTERLEAVE (1 << 3) - -/* synchronous */ -#define MG_BURST_LAT_4 (3 << 4) -#define MG_BURST_LAT_5 (4 << 4) -#define MG_BURST_LAT_6 (5 << 4) -#define MG_BURST_LAT_7 (6 << 4) -#define MG_BURST_LAT_8 (7 << 4) -#define MG_BURST_LEN_4 (1 << 1) -#define MG_BURST_LEN_8 (2 << 1) -#define MG_BURST_LEN_16 (3 << 1) -#define MG_BURST_LEN_32 (4 << 1) -#define MG_BURST_LEN_CONT (0 << 1) - -/* timeout value (unit: ms) */ -#define MG_TMAX_CONF_TO_CMD 1 -#define MG_TMAX_WAIT_RD_DRQ 10 -#define MG_TMAX_WAIT_WR_DRQ 500 -#define MG_TMAX_RST_TO_BUSY 10 -#define MG_TMAX_HDRST_TO_RDY 500 -#define MG_TMAX_SWRST_TO_RDY 500 -#define MG_TMAX_RSTOUT 3000 - -/* device attribution */ -/* use mflash as boot device */ -#define MG_BOOT_DEV (1 << 0) -/* use mflash as storage device */ -#define MG_STORAGE_DEV (1 << 1) -/* same as MG_STORAGE_DEV, but bootloader already done reset sequence */ -#define MG_STORAGE_DEV_SKIP_RST (1 << 2) - -#define MG_DEV_MASK (MG_BOOT_DEV | MG_STORAGE_DEV | MG_STORAGE_DEV_SKIP_RST) - -/* names of GPIO resource */ -#define MG_RST_PIN "mg_rst" -/* except MG_BOOT_DEV, reset-out pin should be assigned */ -#define MG_RSTOUT_PIN "mg_rstout" - -/* private driver data */ -struct mg_drv_data { - /* disk resource */ - u32 use_polling; - - /* device attribution */ - u32 dev_attr; - - /* internally used */ - struct mg_host *host; -}; - -/* main structure for mflash driver */ -struct mg_host { - struct device *dev; - - struct request_queue *breq; - spinlock_t lock; - struct gendisk *gd; - - struct timer_list timer; - void (*mg_do_intr) (struct mg_host *); - - u16 id[ATA_ID_WORDS]; - - u16 cyls; - u16 heads; - u16 sectors; - u32 n_sectors; - u32 nres_sectors; - - void __iomem *dev_base; - unsigned int irq; - unsigned int rst; - unsigned int rstout; - - u32 major; - u32 error; -}; - -/* - * Debugging macro and defines - */ -#undef DO_MG_DEBUG -#ifdef DO_MG_DEBUG -# define MG_DBG(fmt, args...) \ - printk(KERN_DEBUG "%s:%d "fmt, __func__, __LINE__, ##args) -#else /* CONFIG_MG_DEBUG */ -# define MG_DBG(fmt, args...) do { } while (0) -#endif /* CONFIG_MG_DEBUG */ - -#endif -- cgit v1.2.3-70-g09d2 From 833cc67c7722e35863c6aaee9df56b442ef957ae Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Mon, 27 Apr 2009 21:32:16 +0000 Subject: smsc911x: add fifo byteswap support V2 This is V2 of the smsc911x fifo byteswap patch. The smsc911x hardware supports both big and little and endian hardware configurations, and the linux smsc911x driver currently detects word order. For correct operation on big endian platforms lacking swapped byte lanes the following patch is needed. Only fifo data is swapped, register data does not require any swapping. Signed-off-by: Magnus Damm Acked-by: Steve Glendinning Signed-off-by: David S. Miller --- drivers/net/smsc911x.c | 13 +++++++++++++ include/linux/smsc911x.h | 10 ++++++++++ 2 files changed, 23 insertions(+) (limited to 'include') diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c index eb7db032a78..5113b26fc2d 100644 --- a/drivers/net/smsc911x.c +++ b/drivers/net/smsc911x.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include "smsc911x.h" @@ -175,6 +176,12 @@ static inline void smsc911x_tx_writefifo(struct smsc911x_data *pdata, unsigned int *buf, unsigned int wordcount) { + if (pdata->config.flags & SMSC911X_SWAP_FIFO) { + while (wordcount--) + smsc911x_reg_write(pdata, TX_DATA_FIFO, swab32(*buf++)); + return; + } + if (pdata->config.flags & SMSC911X_USE_32BIT) { writesl(pdata->ioaddr + TX_DATA_FIFO, buf, wordcount); return; @@ -194,6 +201,12 @@ static inline void smsc911x_rx_readfifo(struct smsc911x_data *pdata, unsigned int *buf, unsigned int wordcount) { + if (pdata->config.flags & SMSC911X_SWAP_FIFO) { + while (wordcount--) + *buf++ = swab32(smsc911x_reg_read(pdata, RX_DATA_FIFO)); + return; + } + if (pdata->config.flags & SMSC911X_USE_32BIT) { readsl(pdata->ioaddr + RX_DATA_FIFO, buf, wordcount); return; diff --git a/include/linux/smsc911x.h b/include/linux/smsc911x.h index b32725075d7..5241e4fb4ec 100644 --- a/include/linux/smsc911x.h +++ b/include/linux/smsc911x.h @@ -47,4 +47,14 @@ struct smsc911x_platform_config { #define SMSC911X_FORCE_EXTERNAL_PHY (BIT(3)) #define SMSC911X_SAVE_MAC_ADDRESS (BIT(4)) +/* + * SMSC911X_SWAP_FIFO: + * Enables software byte swap for fifo data. Should only be used as a + * "last resort" in the case of big endian mode on boards with incorrectly + * routed data bus to older devices such as LAN9118. Newer devices such as + * LAN9221 can handle this in hardware, there are registers to control + * this swapping but the driver doesn't currently use them. + */ +#define SMSC911X_SWAP_FIFO (BIT(5)) + #endif /* __LINUX_SMSC911X_H__ */ -- cgit v1.2.3-70-g09d2 From 8dc92f7e2ecfd93f5c57da78594a7a5482e2c15e Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Mon, 27 Apr 2009 22:35:52 +0000 Subject: sctp: add feature bit for SCTP offload in hardware this is the sctp code to enable hardware crc32c offload for adapters that support it. Originally by: Vlad Yasevich modified by Jesse Brandeburg Signed-off-by: Jesse Brandeburg Signed-off-by: Vlad Yasevich Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ net/sctp/output.c | 17 ++++++++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fe20d171acf..f8c3619d551 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -670,7 +670,9 @@ struct net_device #define NETIF_F_GRO 16384 /* Generic receive offload */ #define NETIF_F_LRO 32768 /* large receive offload */ +/* the GSO_MASK reserves bits 16 through 23 */ #define NETIF_F_FCOE_CRC (1 << 24) /* FCoE CRC32 */ +#define NETIF_F_SCTP_CSUM (1 << 25) /* SCTP checksum offload */ /* Segmentation offload features */ #define NETIF_F_GSO_SHIFT 16 diff --git a/net/sctp/output.c b/net/sctp/output.c index 7d08f522ec8..f0c91df59d4 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -412,6 +412,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) /* Build the SCTP header. */ sh = (struct sctphdr *)skb_push(nskb, sizeof(struct sctphdr)); + skb_reset_transport_header(nskb); sh->source = htons(packet->source_port); sh->dest = htons(packet->destination_port); @@ -527,15 +528,25 @@ int sctp_packet_transmit(struct sctp_packet *packet) * Note: Adler-32 is no longer applicable, as has been replaced * by CRC32-C as described in . */ - if (!sctp_checksum_disable && !(dst->dev->features & NETIF_F_NO_CSUM)) { + if (!sctp_checksum_disable && + !(dst->dev->features & (NETIF_F_NO_CSUM | NETIF_F_SCTP_CSUM))) { __u32 crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len); /* 3) Put the resultant value into the checksum field in the * common header, and leave the rest of the bits unchanged. */ sh->checksum = sctp_end_cksum(crc32); - } else - nskb->ip_summed = CHECKSUM_UNNECESSARY; + } else { + if (dst->dev->features & NETIF_F_SCTP_CSUM) { + /* no need to seed psuedo checksum for SCTP */ + nskb->ip_summed = CHECKSUM_PARTIAL; + nskb->csum_start = (skb_transport_header(nskb) - + nskb->head); + nskb->csum_offset = offsetof(struct sctphdr, checksum); + } else { + nskb->ip_summed = CHECKSUM_UNNECESSARY; + } + } /* IP layer ECN support * From RFC 2481 -- cgit v1.2.3-70-g09d2 From 9ec4fa271faf2db3b8e1419c998da1ca6b094eb6 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 27 Apr 2009 17:57:18 -0700 Subject: irq, cpumask: correct CPUMASKS_OFFSTACK typo and fix fallout CPUMASKS_OFFSTACK is not defined anywhere (it is CPUMASK_OFFSTACK). It is a typo and init_allocate_desc_masks() is called before it set affinity to all cpus... Split init_alloc_desc_masks() into all_desc_masks() and init_desc_masks(). Also use CPUMASK_OFFSTACK in alloc_desc_masks(). [ Impact: fix smp_affinity copying/setup when moving irq_desc between CPUs ] Signed-off-by: Yinghai Lu Acked-by: Rusty Russell Cc: Andrew Morton Cc: Suresh Siddha Cc: "Eric W. Biederman" LKML-Reference: <49F6546E.3040406@kernel.org> Signed-off-by: Ingo Molnar --- include/linux/irq.h | 27 ++++++++++++++++++--------- kernel/irq/handle.c | 9 ++++++--- kernel/irq/numa_migrate.c | 2 +- 3 files changed, 25 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index b7cbeed972e..c4953cf27e5 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -424,27 +424,25 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); #ifdef CONFIG_SMP /** - * init_alloc_desc_masks - allocate cpumasks for irq_desc + * alloc_desc_masks - allocate cpumasks for irq_desc * @desc: pointer to irq_desc struct * @cpu: cpu which will be handling the cpumasks * @boot: true if need bootmem * * Allocates affinity and pending_mask cpumask if required. * Returns true if successful (or not required). - * Side effect: affinity has all bits set, pending_mask has all bits clear. */ -static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu, +static inline bool alloc_desc_masks(struct irq_desc *desc, int cpu, bool boot) { +#ifdef CONFIG_CPUMASK_OFFSTACK int node; if (boot) { alloc_bootmem_cpumask_var(&desc->affinity); - cpumask_setall(desc->affinity); #ifdef CONFIG_GENERIC_PENDING_IRQ alloc_bootmem_cpumask_var(&desc->pending_mask); - cpumask_clear(desc->pending_mask); #endif return true; } @@ -453,18 +451,25 @@ static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu, if (!alloc_cpumask_var_node(&desc->affinity, GFP_ATOMIC, node)) return false; - cpumask_setall(desc->affinity); #ifdef CONFIG_GENERIC_PENDING_IRQ if (!alloc_cpumask_var_node(&desc->pending_mask, GFP_ATOMIC, node)) { free_cpumask_var(desc->affinity); return false; } - cpumask_clear(desc->pending_mask); +#endif #endif return true; } +static inline void init_desc_masks(struct irq_desc *desc) +{ + cpumask_setall(desc->affinity); +#ifdef CONFIG_GENERIC_PENDING_IRQ + cpumask_clear(desc->pending_mask); +#endif +} + /** * init_copy_desc_masks - copy cpumasks for irq_desc * @old_desc: pointer to old irq_desc struct @@ -478,7 +483,7 @@ static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu, static inline void init_copy_desc_masks(struct irq_desc *old_desc, struct irq_desc *new_desc) { -#ifdef CONFIG_CPUMASKS_OFFSTACK +#ifdef CONFIG_CPUMASK_OFFSTACK cpumask_copy(new_desc->affinity, old_desc->affinity); #ifdef CONFIG_GENERIC_PENDING_IRQ @@ -499,12 +504,16 @@ static inline void free_desc_masks(struct irq_desc *old_desc, #else /* !CONFIG_SMP */ -static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu, +static inline bool alloc_desc_masks(struct irq_desc *desc, int cpu, bool boot) { return true; } +static inline void init_desc_masks(struct irq_desc *desc) +{ +} + static inline void init_copy_desc_masks(struct irq_desc *old_desc, struct irq_desc *new_desc) { diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index d82142be8dd..882c7980010 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -115,10 +115,11 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu) printk(KERN_ERR "can not alloc kstat_irqs\n"); BUG_ON(1); } - if (!init_alloc_desc_masks(desc, cpu, false)) { + if (!alloc_desc_masks(desc, cpu, false)) { printk(KERN_ERR "can not alloc irq_desc cpumasks\n"); BUG_ON(1); } + init_desc_masks(desc); arch_init_chip_data(desc, cpu); } @@ -169,7 +170,8 @@ int __init early_irq_init(void) desc[i].irq = i; desc[i].kstat_irqs = kstat_irqs_legacy + i * nr_cpu_ids; lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); - init_alloc_desc_masks(&desc[i], 0, true); + alloc_desc_masks(&desc[i], 0, true); + init_desc_masks(&desc[i]); irq_desc_ptrs[i] = desc + i; } @@ -256,7 +258,8 @@ int __init early_irq_init(void) for (i = 0; i < count; i++) { desc[i].irq = i; - init_alloc_desc_masks(&desc[i], 0, true); + alloc_desc_masks(&desc[i], 0, true); + init_desc_masks(&desc[i]); desc[i].kstat_irqs = kstat_irqs_all[i]; } return arch_early_irq_init(); diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c index 44bbdcbaf8d..5760d725162 100644 --- a/kernel/irq/numa_migrate.c +++ b/kernel/irq/numa_migrate.c @@ -37,7 +37,7 @@ static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc, struct irq_desc *desc, int cpu) { memcpy(desc, old_desc, sizeof(struct irq_desc)); - if (!init_alloc_desc_masks(desc, cpu, false)) { + if (!alloc_desc_masks(desc, cpu, false)) { printk(KERN_ERR "irq %d: can not get new irq_desc cpumask " "for migration.\n", irq); return false; -- cgit v1.2.3-70-g09d2 From fcef5911c7ea89b80d5bfc727f402f37c9eefd57 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 27 Apr 2009 17:58:23 -0700 Subject: x86/irq: remove leftover code from NUMA_MIGRATE_IRQ_DESC The original feature of migrating irq_desc dynamic was too fragile and was causing problems: it caused crashes on systems with lots of cards with MSI-X when user-space irq-balancer was enabled. We now have new patches that create irq_desc according to device numa node. This patch removes the leftover bits of the dynamic balancer. [ Impact: remove dead code ] Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: Suresh Siddha Cc: "Eric W. Biederman" Cc: Rusty Russell LKML-Reference: <49F654AF.8000808@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 10 ------- arch/x86/configs/x86_64_defconfig | 1 - arch/x86/kernel/apic/io_apic.c | 56 +++------------------------------------ include/linux/irq.h | 10 ------- kernel/irq/Makefile | 2 +- kernel/irq/chip.c | 12 ++------- kernel/irq/handle.c | 9 ++----- kernel/irq/numa_migrate.c | 2 -- 8 files changed, 9 insertions(+), 93 deletions(-) (limited to 'include') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index df9e885eee1..e1b2543f8ed 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -274,16 +274,6 @@ config SPARSE_IRQ If you don't know what to do here, say N. -config NUMA_MIGRATE_IRQ_DESC - bool "Move irq desc when changing irq smp_affinity" - depends on SPARSE_IRQ && NUMA - depends on BROKEN - default n - ---help--- - This enables moving irq_desc to cpu/node that irq will use handled. - - If you don't know what to do here, say N. - config X86_MPPARSE bool "Enable MPS table" if ACPI default y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 9fe5d212ab4..27b8ce0f590 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -195,7 +195,6 @@ CONFIG_HIGH_RES_TIMERS=y CONFIG_GENERIC_CLOCKEVENTS_BUILD=y CONFIG_SMP=y CONFIG_SPARSE_IRQ=y -# CONFIG_NUMA_MIGRATE_IRQ_DESC is not set CONFIG_X86_FIND_SMP_CONFIG=y CONFIG_X86_MPPARSE=y # CONFIG_X86_ELAN is not set diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 30da617d18e..9fbf0f7ec7e 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -148,9 +148,6 @@ struct irq_cfg { unsigned move_cleanup_count; u8 vector; u8 move_in_progress : 1; -#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC - u8 move_desc_pending : 1; -#endif }; /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ @@ -254,8 +251,7 @@ int arch_init_chip_data(struct irq_desc *desc, int cpu) return 0; } -#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC - +/* for move_irq_desc */ static void init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu) { @@ -356,19 +352,7 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) old_desc->chip_data = NULL; } } - -static void -set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) -{ - struct irq_cfg *cfg = desc->chip_data; - - if (!cfg->move_in_progress) { - /* it means that domain is not changed */ - if (!cpumask_intersects(desc->affinity, mask)) - cfg->move_desc_pending = 1; - } -} -#endif +/* end for move_irq_desc */ #else static struct irq_cfg *irq_cfg(unsigned int irq) @@ -378,13 +362,6 @@ static struct irq_cfg *irq_cfg(unsigned int irq) #endif -#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC -static inline void -set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) -{ -} -#endif - struct io_apic { unsigned int index; unsigned int unused[3]; @@ -592,9 +569,6 @@ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) if (assign_irq_vector(irq, cfg, mask)) return BAD_APICID; - /* check that before desc->addinity get updated */ - set_extra_move_desc(desc, mask); - cpumask_copy(desc->affinity, mask); return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain); @@ -2393,8 +2367,6 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) if (assign_irq_vector(irq, cfg, mask)) return; - set_extra_move_desc(desc, mask); - dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask); irte.vector = cfg->vector; @@ -2491,34 +2463,14 @@ static void irq_complete_move(struct irq_desc **descp) struct irq_cfg *cfg = desc->chip_data; unsigned vector, me; - if (likely(!cfg->move_in_progress)) { -#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC - if (likely(!cfg->move_desc_pending)) - return; - - /* domain has not changed, but affinity did */ - me = smp_processor_id(); - if (cpumask_test_cpu(me, desc->affinity)) { - *descp = desc = move_irq_desc(desc, me); - /* get the new one */ - cfg = desc->chip_data; - cfg->move_desc_pending = 0; - } -#endif + if (likely(!cfg->move_in_progress)) return; - } vector = ~get_irq_regs()->orig_ax; me = smp_processor_id(); - if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) { -#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC - *descp = desc = move_irq_desc(desc, me); - /* get the new one */ - cfg = desc->chip_data; -#endif + if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) send_cleanup_vector(cfg); - } } #else static inline void irq_complete_move(struct irq_desc **descp) {} diff --git a/include/linux/irq.h b/include/linux/irq.h index c4953cf27e5..2a34cd6281d 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -212,16 +212,6 @@ extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu); extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu); -static inline struct irq_desc * -irq_remap_to_desc(unsigned int irq, struct irq_desc *desc) -{ -#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC - return irq_to_desc(irq); -#else - return desc; -#endif -} - /* * Migration helpers for obsolete names, they will go away: */ diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile index 3394f8f5296..2f065277f8e 100644 --- a/kernel/irq/Makefile +++ b/kernel/irq/Makefile @@ -3,5 +3,5 @@ obj-y := handle.o manage.o spurious.o resend.o chip.o devres.o obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o -obj-$(CONFIG_NUMA_MIGRATE_IRQ_DESC) += numa_migrate.o +obj-$(CONFIG_SPARSE_IRQ) += numa_migrate.o obj-$(CONFIG_PM_SLEEP) += pm.o diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index c687ba4363f..13c68e71b72 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -359,7 +359,6 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc) spin_lock(&desc->lock); mask_ack_irq(desc, irq); - desc = irq_remap_to_desc(irq, desc); if (unlikely(desc->status & IRQ_INPROGRESS)) goto out_unlock; @@ -438,7 +437,6 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) desc->status &= ~IRQ_INPROGRESS; out: desc->chip->eoi(irq); - desc = irq_remap_to_desc(irq, desc); spin_unlock(&desc->lock); } @@ -475,7 +473,6 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc) !desc->action)) { desc->status |= (IRQ_PENDING | IRQ_MASKED); mask_ack_irq(desc, irq); - desc = irq_remap_to_desc(irq, desc); goto out_unlock; } kstat_incr_irqs_this_cpu(irq, desc); @@ -483,7 +480,6 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc) /* Start handling the irq */ if (desc->chip->ack) desc->chip->ack(irq); - desc = irq_remap_to_desc(irq, desc); /* Mark the IRQ currently in progress.*/ desc->status |= IRQ_INPROGRESS; @@ -544,10 +540,8 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc) if (!noirqdebug) note_interrupt(irq, desc, action_ret); - if (desc->chip->eoi) { + if (desc->chip->eoi) desc->chip->eoi(irq); - desc = irq_remap_to_desc(irq, desc); - } } void @@ -582,10 +576,8 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, /* Uninstall? */ if (handle == handle_bad_irq) { - if (desc->chip != &no_irq_chip) { + if (desc->chip != &no_irq_chip) mask_ack_irq(desc, irq); - desc = irq_remap_to_desc(irq, desc); - } desc->status |= IRQ_DISABLED; desc->depth = 1; } diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 882c7980010..3e0cbc44bd7 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -458,11 +458,8 @@ unsigned int __do_IRQ(unsigned int irq) /* * No locking required for CPU-local interrupts: */ - if (desc->chip->ack) { + if (desc->chip->ack) desc->chip->ack(irq); - /* get new one */ - desc = irq_remap_to_desc(irq, desc); - } if (likely(!(desc->status & IRQ_DISABLED))) { action_ret = handle_IRQ_event(irq, desc->action); if (!noirqdebug) @@ -473,10 +470,8 @@ unsigned int __do_IRQ(unsigned int irq) } spin_lock(&desc->lock); - if (desc->chip->ack) { + if (desc->chip->ack) desc->chip->ack(irq); - desc = irq_remap_to_desc(irq, desc); - } /* * REPLAY is when Linux resends an IRQ that was dropped earlier * WAITING is used by probe to mark irqs that are being tested diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c index 5760d725162..ce72bc3f4ce 100644 --- a/kernel/irq/numa_migrate.c +++ b/kernel/irq/numa_migrate.c @@ -97,9 +97,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, /* free the old one */ free_one_irq_desc(old_desc, desc); - spin_unlock(&old_desc->lock); kfree(old_desc); - spin_lock(&desc->lock); return desc; -- cgit v1.2.3-70-g09d2 From d5dedd4507d307eb3f35f21b6e16f336fdc0d82a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 27 Apr 2009 17:59:21 -0700 Subject: irq: change ->set_affinity() to return status according to Ingo, change set_affinity() in irq_chip should return int, because that way we can handle failure cases in a much cleaner way, in the genirq layer. v2: fix two typos [ Impact: extend API ] Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: Suresh Siddha Cc: "Eric W. Biederman" Cc: Rusty Russell Cc: linux-arch@vger.kernel.org LKML-Reference: <49F654E9.4070809@kernel.org> Signed-off-by: Ingo Molnar --- arch/alpha/kernel/sys_dp264.c | 8 +++-- arch/alpha/kernel/sys_titan.c | 4 ++- arch/arm/common/gic.c | 4 ++- arch/cris/arch-v32/kernel/irq.c | 4 ++- arch/ia64/hp/sim/hpsim_irq.c | 3 +- arch/ia64/kernel/iosapic.c | 10 +++--- arch/ia64/kernel/msi_ia64.c | 16 +++++---- arch/ia64/sn/kernel/irq.c | 4 ++- arch/ia64/sn/kernel/msi_sn.c | 8 +++-- arch/mips/cavium-octeon/octeon-irq.c | 8 +++-- arch/mips/include/asm/irq.h | 2 +- arch/mips/kernel/irq-gic.c | 5 +-- arch/mips/mti-malta/malta-smtc.c | 4 ++- arch/mips/sibyte/bcm1480/irq.c | 8 +++-- arch/mips/sibyte/sb1250/irq.c | 8 +++-- arch/parisc/kernel/irq.c | 6 ++-- arch/powerpc/platforms/pseries/xics.c | 12 ++++--- arch/powerpc/sysdev/mpic.c | 4 ++- arch/sparc/kernel/irq_64.c | 12 +++++-- arch/x86/kernel/apic/io_apic.c | 64 ++++++++++++++++++++++------------- drivers/parisc/iosapic.c | 6 ++-- drivers/xen/events.c | 12 ++++--- include/linux/irq.h | 2 +- 23 files changed, 140 insertions(+), 74 deletions(-) (limited to 'include') diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c index 9c9d1fd4155..5bd5259324b 100644 --- a/arch/alpha/kernel/sys_dp264.c +++ b/arch/alpha/kernel/sys_dp264.c @@ -176,22 +176,26 @@ cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity) } } -static void +static int dp264_set_affinity(unsigned int irq, const struct cpumask *affinity) { spin_lock(&dp264_irq_lock); cpu_set_irq_affinity(irq, *affinity); tsunami_update_irq_hw(cached_irq_mask); spin_unlock(&dp264_irq_lock); + + return 0; } -static void +static int clipper_set_affinity(unsigned int irq, const struct cpumask *affinity) { spin_lock(&dp264_irq_lock); cpu_set_irq_affinity(irq - 16, *affinity); tsunami_update_irq_hw(cached_irq_mask); spin_unlock(&dp264_irq_lock); + + return 0; } static struct hw_interrupt_type dp264_irq_type = { diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c index 27f840a4ad3..8dd239ebdb9 100644 --- a/arch/alpha/kernel/sys_titan.c +++ b/arch/alpha/kernel/sys_titan.c @@ -157,13 +157,15 @@ titan_cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity) } -static void +static int titan_set_irq_affinity(unsigned int irq, const struct cpumask *affinity) { spin_lock(&titan_irq_lock); titan_cpu_set_irq_affinity(irq - 16, *affinity); titan_update_irq_hw(titan_cached_irq_mask); spin_unlock(&titan_irq_lock); + + return 0; } static void diff --git a/arch/arm/common/gic.c b/arch/arm/common/gic.c index c6884ba1d5e..90f6b7f52d4 100644 --- a/arch/arm/common/gic.c +++ b/arch/arm/common/gic.c @@ -109,7 +109,7 @@ static void gic_unmask_irq(unsigned int irq) } #ifdef CONFIG_SMP -static void gic_set_cpu(unsigned int irq, const struct cpumask *mask_val) +static int gic_set_cpu(unsigned int irq, const struct cpumask *mask_val) { void __iomem *reg = gic_dist_base(irq) + GIC_DIST_TARGET + (gic_irq(irq) & ~3); unsigned int shift = (irq % 4) * 8; @@ -122,6 +122,8 @@ static void gic_set_cpu(unsigned int irq, const struct cpumask *mask_val) val |= 1 << (cpu + shift); writel(val, reg); spin_unlock(&irq_controller_lock); + + return 0; } #endif diff --git a/arch/cris/arch-v32/kernel/irq.c b/arch/cris/arch-v32/kernel/irq.c index df3925cb1c7..d70b445f4a8 100644 --- a/arch/cris/arch-v32/kernel/irq.c +++ b/arch/cris/arch-v32/kernel/irq.c @@ -325,12 +325,14 @@ static void end_crisv32_irq(unsigned int irq) { } -void set_affinity_crisv32_irq(unsigned int irq, const struct cpumask *dest) +int set_affinity_crisv32_irq(unsigned int irq, const struct cpumask *dest) { unsigned long flags; spin_lock_irqsave(&irq_lock, flags); irq_allocations[irq - FIRST_IRQ].mask = *dest; spin_unlock_irqrestore(&irq_lock, flags); + + return 0; } static struct irq_chip crisv32_irq_type = { diff --git a/arch/ia64/hp/sim/hpsim_irq.c b/arch/ia64/hp/sim/hpsim_irq.c index cc0a3182db3..acb5047ab57 100644 --- a/arch/ia64/hp/sim/hpsim_irq.c +++ b/arch/ia64/hp/sim/hpsim_irq.c @@ -21,9 +21,10 @@ hpsim_irq_noop (unsigned int irq) { } -static void +static int hpsim_set_affinity_noop(unsigned int a, const struct cpumask *b) { + return 0; } static struct hw_interrupt_type irq_type_hp_sim = { diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index 166e0d839fa..f92cef47bf8 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -329,7 +329,7 @@ unmask_irq (unsigned int irq) } -static void +static int iosapic_set_affinity(unsigned int irq, const struct cpumask *mask) { #ifdef CONFIG_SMP @@ -343,15 +343,15 @@ iosapic_set_affinity(unsigned int irq, const struct cpumask *mask) cpu = cpumask_first_and(cpu_online_mask, mask); if (cpu >= nr_cpu_ids) - return; + return -1; if (irq_prepare_move(irq, cpu)) - return; + return -1; dest = cpu_physical_id(cpu); if (!iosapic_intr_info[irq].count) - return; /* not an IOSAPIC interrupt */ + return -1; /* not an IOSAPIC interrupt */ set_irq_affinity_info(irq, dest, redir); @@ -376,7 +376,9 @@ iosapic_set_affinity(unsigned int irq, const struct cpumask *mask) iosapic_write(iosapic, IOSAPIC_RTE_HIGH(rte_index), high32); iosapic_write(iosapic, IOSAPIC_RTE_LOW(rte_index), low32); } + #endif + return 0; } /* diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c index 2b15e233f7f..0f8ade9331b 100644 --- a/arch/ia64/kernel/msi_ia64.c +++ b/arch/ia64/kernel/msi_ia64.c @@ -12,7 +12,7 @@ static struct irq_chip ia64_msi_chip; #ifdef CONFIG_SMP -static void ia64_set_msi_irq_affinity(unsigned int irq, +static int ia64_set_msi_irq_affinity(unsigned int irq, const cpumask_t *cpu_mask) { struct msi_msg msg; @@ -20,10 +20,10 @@ static void ia64_set_msi_irq_affinity(unsigned int irq, int cpu = first_cpu(*cpu_mask); if (!cpu_online(cpu)) - return; + return -1; if (irq_prepare_move(irq, cpu)) - return; + return -1; read_msi_msg(irq, &msg); @@ -39,6 +39,8 @@ static void ia64_set_msi_irq_affinity(unsigned int irq, write_msi_msg(irq, &msg); cpumask_copy(irq_desc[irq].affinity, cpumask_of(cpu)); + + return 0; } #endif /* CONFIG_SMP */ @@ -130,17 +132,17 @@ void arch_teardown_msi_irq(unsigned int irq) #ifdef CONFIG_DMAR #ifdef CONFIG_SMP -static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) +static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_cfg *cfg = irq_cfg + irq; struct msi_msg msg; int cpu = cpumask_first(mask); if (!cpu_online(cpu)) - return; + return -1; if (irq_prepare_move(irq, cpu)) - return; + return -1; dmar_msi_read(irq, &msg); @@ -151,6 +153,8 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) dmar_msi_write(irq, &msg); cpumask_copy(irq_desc[irq].affinity, mask); + + return 0; } #endif /* CONFIG_SMP */ diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c index 66fd705e82c..764f26abac0 100644 --- a/arch/ia64/sn/kernel/irq.c +++ b/arch/ia64/sn/kernel/irq.c @@ -227,7 +227,7 @@ finish_up: return new_irq_info; } -static void sn_set_affinity_irq(unsigned int irq, const struct cpumask *mask) +static int sn_set_affinity_irq(unsigned int irq, const struct cpumask *mask) { struct sn_irq_info *sn_irq_info, *sn_irq_info_safe; nasid_t nasid; @@ -239,6 +239,8 @@ static void sn_set_affinity_irq(unsigned int irq, const struct cpumask *mask) list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe, sn_irq_lh[irq], list) (void)sn_retarget_vector(sn_irq_info, nasid, slice); + + return 0; } #ifdef CONFIG_SMP diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c index 81e428943d7..fbbfb970120 100644 --- a/arch/ia64/sn/kernel/msi_sn.c +++ b/arch/ia64/sn/kernel/msi_sn.c @@ -151,7 +151,7 @@ int sn_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *entry) } #ifdef CONFIG_SMP -static void sn_set_msi_irq_affinity(unsigned int irq, +static int sn_set_msi_irq_affinity(unsigned int irq, const struct cpumask *cpu_mask) { struct msi_msg msg; @@ -168,7 +168,7 @@ static void sn_set_msi_irq_affinity(unsigned int irq, cpu = cpumask_first(cpu_mask); sn_irq_info = sn_msi_info[irq].sn_irq_info; if (sn_irq_info == NULL || sn_irq_info->irq_int_bit >= 0) - return; + return -1; /* * Release XIO resources for the old MSI PCI address @@ -189,7 +189,7 @@ static void sn_set_msi_irq_affinity(unsigned int irq, new_irq_info = sn_retarget_vector(sn_irq_info, nasid, slice); sn_msi_info[irq].sn_irq_info = new_irq_info; if (new_irq_info == NULL) - return; + return -1; /* * Map the xio address into bus space @@ -206,6 +206,8 @@ static void sn_set_msi_irq_affinity(unsigned int irq, write_msi_msg(irq, &msg); cpumask_copy(irq_desc[irq].affinity, cpu_mask); + + return 0; } #endif /* CONFIG_SMP */ diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index 1c19af8daa6..d3a0c8154be 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -177,7 +177,7 @@ static void octeon_irq_ciu0_disable(unsigned int irq) } #ifdef CONFIG_SMP -static void octeon_irq_ciu0_set_affinity(unsigned int irq, const struct cpumask *dest) +static int octeon_irq_ciu0_set_affinity(unsigned int irq, const struct cpumask *dest) { int cpu; int bit = irq - OCTEON_IRQ_WORKQ0; /* Bit 0-63 of EN0 */ @@ -199,6 +199,8 @@ static void octeon_irq_ciu0_set_affinity(unsigned int irq, const struct cpumask */ cvmx_read_csr(CVMX_CIU_INTX_EN0(cvmx_get_core_num() * 2)); write_unlock(&octeon_irq_ciu0_rwlock); + + return 0; } #endif @@ -292,7 +294,7 @@ static void octeon_irq_ciu1_disable(unsigned int irq) } #ifdef CONFIG_SMP -static void octeon_irq_ciu1_set_affinity(unsigned int irq, const struct cpumask *dest) +static int octeon_irq_ciu1_set_affinity(unsigned int irq, const struct cpumask *dest) { int cpu; int bit = irq - OCTEON_IRQ_WDOG0; /* Bit 0-63 of EN1 */ @@ -315,6 +317,8 @@ static void octeon_irq_ciu1_set_affinity(unsigned int irq, const struct cpumask */ cvmx_read_csr(CVMX_CIU_INTX_EN1(cvmx_get_core_num() * 2 + 1)); write_unlock(&octeon_irq_ciu1_rwlock); + + return 0; } #endif diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h index 3214ade02d1..4f1eed107b0 100644 --- a/arch/mips/include/asm/irq.h +++ b/arch/mips/include/asm/irq.h @@ -49,7 +49,7 @@ static inline void smtc_im_ack_irq(unsigned int irq) #ifdef CONFIG_MIPS_MT_SMTC_IRQAFF #include -extern void plat_set_irq_affinity(unsigned int irq, +extern int plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity); extern void smtc_forward_irq(unsigned int irq); diff --git a/arch/mips/kernel/irq-gic.c b/arch/mips/kernel/irq-gic.c index 87deb8f6c45..3f43c2e3aa5 100644 --- a/arch/mips/kernel/irq-gic.c +++ b/arch/mips/kernel/irq-gic.c @@ -155,7 +155,7 @@ static void gic_unmask_irq(unsigned int irq) static DEFINE_SPINLOCK(gic_lock); -static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask) +static int gic_set_affinity(unsigned int irq, const struct cpumask *cpumask) { cpumask_t tmp = CPU_MASK_NONE; unsigned long flags; @@ -166,7 +166,7 @@ static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask) cpumask_and(&tmp, cpumask, cpu_online_mask); if (cpus_empty(tmp)) - return; + return -1; /* Assumption : cpumask refers to a single CPU */ spin_lock_irqsave(&gic_lock, flags); @@ -190,6 +190,7 @@ static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask) cpumask_copy(irq_desc[irq].affinity, cpumask); spin_unlock_irqrestore(&gic_lock, flags); + return 0; } #endif diff --git a/arch/mips/mti-malta/malta-smtc.c b/arch/mips/mti-malta/malta-smtc.c index 5ba31888fef..499ffe5475d 100644 --- a/arch/mips/mti-malta/malta-smtc.c +++ b/arch/mips/mti-malta/malta-smtc.c @@ -114,7 +114,7 @@ struct plat_smp_ops msmtc_smp_ops = { */ -void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity) +int plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity) { cpumask_t tmask; int cpu = 0; @@ -156,5 +156,7 @@ void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity) /* Do any generic SMTC IRQ affinity setup */ smtc_set_irq_affinity(irq, tmask); + + return 0; } #endif /* CONFIG_MIPS_MT_SMTC_IRQAFF */ diff --git a/arch/mips/sibyte/bcm1480/irq.c b/arch/mips/sibyte/bcm1480/irq.c index 352352b3cb2..4f256a131bf 100644 --- a/arch/mips/sibyte/bcm1480/irq.c +++ b/arch/mips/sibyte/bcm1480/irq.c @@ -50,7 +50,7 @@ static void enable_bcm1480_irq(unsigned int irq); static void disable_bcm1480_irq(unsigned int irq); static void ack_bcm1480_irq(unsigned int irq); #ifdef CONFIG_SMP -static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask); +static int bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask); #endif #ifdef CONFIG_PCI @@ -109,7 +109,7 @@ void bcm1480_unmask_irq(int cpu, int irq) } #ifdef CONFIG_SMP -static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask) +static int bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask) { int i = 0, old_cpu, cpu, int_on, k; u64 cur_ints; @@ -119,7 +119,7 @@ static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask) if (cpumask_weight(mask) != 1) { printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq); - return; + return -1; } i = cpumask_first(mask); @@ -155,6 +155,8 @@ static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask) } spin_unlock(&bcm1480_imr_lock); spin_unlock_irqrestore(&desc->lock, flags); + + return 0; } #endif diff --git a/arch/mips/sibyte/sb1250/irq.c b/arch/mips/sibyte/sb1250/irq.c index c08ff582da6..e389507f1f9 100644 --- a/arch/mips/sibyte/sb1250/irq.c +++ b/arch/mips/sibyte/sb1250/irq.c @@ -50,7 +50,7 @@ static void enable_sb1250_irq(unsigned int irq); static void disable_sb1250_irq(unsigned int irq); static void ack_sb1250_irq(unsigned int irq); #ifdef CONFIG_SMP -static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask); +static int sb1250_set_affinity(unsigned int irq, const struct cpumask *mask); #endif #ifdef CONFIG_SIBYTE_HAS_LDT @@ -103,7 +103,7 @@ void sb1250_unmask_irq(int cpu, int irq) } #ifdef CONFIG_SMP -static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask) +static int sb1250_set_affinity(unsigned int irq, const struct cpumask *mask) { int i = 0, old_cpu, cpu, int_on; u64 cur_ints; @@ -114,7 +114,7 @@ static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask) if (cpumask_weight(mask) > 1) { printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq); - return; + return -1; } /* Convert logical CPU to physical CPU */ @@ -146,6 +146,8 @@ static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask) } spin_unlock(&sb1250_imr_lock); spin_unlock_irqrestore(&desc->lock, flags); + + return 0; } #endif diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index 4ea4229d765..8007f1e6572 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -130,15 +130,17 @@ int cpu_check_affinity(unsigned int irq, const struct cpumask *dest) return cpu_dest; } -static void cpu_set_affinity_irq(unsigned int irq, const struct cpumask *dest) +static int cpu_set_affinity_irq(unsigned int irq, const struct cpumask *dest) { int cpu_dest; cpu_dest = cpu_check_affinity(irq, dest); if (cpu_dest < 0) - return; + return -1; cpumask_copy(&irq_desc[irq].affinity, dest); + + return 0; } #endif diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c index 80b513449f4..be3581a8c29 100644 --- a/arch/powerpc/platforms/pseries/xics.c +++ b/arch/powerpc/platforms/pseries/xics.c @@ -333,7 +333,7 @@ static void xics_eoi_lpar(unsigned int virq) lpar_xirr_info_set((0xff << 24) | irq); } -static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask) +static int xics_set_affinity(unsigned int virq, const struct cpumask *cpumask) { unsigned int irq; int status; @@ -342,14 +342,14 @@ static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask) irq = (unsigned int)irq_map[virq].hwirq; if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS) - return; + return -1; status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq); if (status) { printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n", __func__, irq, status); - return; + return -1; } /* @@ -363,7 +363,7 @@ static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask) printk(KERN_WARNING "%s: No online cpus in the mask %s for irq %d\n", __func__, cpulist, virq); - return; + return -1; } status = rtas_call(ibm_set_xive, 3, 1, NULL, @@ -372,8 +372,10 @@ static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask) if (status) { printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n", __func__, irq, status); - return; + return -1; } + + return 0; } static struct irq_chip xics_pic_direct = { diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 21b95670159..f4cbd15cf22 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -807,7 +807,7 @@ static void mpic_end_ipi(unsigned int irq) #endif /* CONFIG_SMP */ -void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask) +int mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask) { struct mpic *mpic = mpic_from_irq(irq); unsigned int src = mpic_irq_to_hw(irq); @@ -824,6 +824,8 @@ void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask) mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION), mpic_physmask(cpus_addr(tmp)[0])); } + + return 0; } static unsigned int mpic_type_to_vecpri(struct mpic *mpic, unsigned int type) diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index 5deabe921a4..e5e78f9cfc9 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -318,10 +318,12 @@ static void sun4u_irq_enable(unsigned int virt_irq) } } -static void sun4u_set_affinity(unsigned int virt_irq, +static int sun4u_set_affinity(unsigned int virt_irq, const struct cpumask *mask) { sun4u_irq_enable(virt_irq); + + return 0; } /* Don't do anything. The desc->status check for IRQ_DISABLED in @@ -377,7 +379,7 @@ static void sun4v_irq_enable(unsigned int virt_irq) ino, err); } -static void sun4v_set_affinity(unsigned int virt_irq, +static int sun4v_set_affinity(unsigned int virt_irq, const struct cpumask *mask) { unsigned int ino = virt_irq_table[virt_irq].dev_ino; @@ -388,6 +390,8 @@ static void sun4v_set_affinity(unsigned int virt_irq, if (err != HV_EOK) printk(KERN_ERR "sun4v_intr_settarget(%x,%lu): " "err(%d)\n", ino, cpuid, err); + + return 0; } static void sun4v_irq_disable(unsigned int virt_irq) @@ -445,7 +449,7 @@ static void sun4v_virq_enable(unsigned int virt_irq) dev_handle, dev_ino, err); } -static void sun4v_virt_set_affinity(unsigned int virt_irq, +static int sun4v_virt_set_affinity(unsigned int virt_irq, const struct cpumask *mask) { unsigned long cpuid, dev_handle, dev_ino; @@ -461,6 +465,8 @@ static void sun4v_virt_set_affinity(unsigned int virt_irq, printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): " "err(%d)\n", dev_handle, dev_ino, cpuid, err); + + return 0; } static void sun4v_virq_disable(unsigned int virt_irq) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 9fbf0f7ec7e..5c7630b40a5 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -574,13 +574,14 @@ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain); } -static void +static int set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) { struct irq_cfg *cfg; unsigned long flags; unsigned int dest; unsigned int irq; + int ret = -1; irq = desc->irq; cfg = desc->chip_data; @@ -591,18 +592,21 @@ set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) /* Only the high 8 bits are valid. */ dest = SET_APIC_LOGICAL_ID(dest); __target_IO_APIC_irq(irq, dest, cfg); + ret = 0; } spin_unlock_irqrestore(&ioapic_lock, flags); + + return ret; } -static void +static int set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc; desc = irq_to_desc(irq); - set_ioapic_affinity_irq_desc(desc, mask); + return set_ioapic_affinity_irq_desc(desc, mask); } #endif /* CONFIG_SMP */ @@ -2348,24 +2352,25 @@ static int ioapic_retrigger_irq(unsigned int irq) * Real vector that is used for interrupting cpu will be coming from * the interrupt-remapping table entry. */ -static void +static int migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) { struct irq_cfg *cfg; struct irte irte; unsigned int dest; unsigned int irq; + int ret = -1; if (!cpumask_intersects(mask, cpu_online_mask)) - return; + return ret; irq = desc->irq; if (get_irte(irq, &irte)) - return; + return ret; cfg = desc->chip_data; if (assign_irq_vector(irq, cfg, mask)) - return; + return ret; dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask); @@ -2381,27 +2386,30 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) send_cleanup_vector(cfg); cpumask_copy(desc->affinity, mask); + + return 0; } /* * Migrates the IRQ destination in the process context. */ -static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, +static int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) { - migrate_ioapic_irq_desc(desc, mask); + return migrate_ioapic_irq_desc(desc, mask); } -static void set_ir_ioapic_affinity_irq(unsigned int irq, +static int set_ir_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); - set_ir_ioapic_affinity_irq_desc(desc, mask); + return set_ir_ioapic_affinity_irq_desc(desc, mask); } #else -static inline void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, +static inline int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) { + return 0; } #endif @@ -3318,7 +3326,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms } #ifdef CONFIG_SMP -static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) +static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; @@ -3327,7 +3335,7 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) dest = set_desc_affinity(desc, mask); if (dest == BAD_APICID) - return; + return -1; cfg = desc->chip_data; @@ -3339,13 +3347,15 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); write_msi_msg_desc(desc, &msg); + + return 0; } #ifdef CONFIG_INTR_REMAP /* * Migrate the MSI irq to another cpumask. This migration is * done in the process context using interrupt-remapping hardware. */ -static void +static int ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); @@ -3354,11 +3364,11 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) struct irte irte; if (get_irte(irq, &irte)) - return; + return -1; dest = set_desc_affinity(desc, mask); if (dest == BAD_APICID) - return; + return -1; irte.vector = cfg->vector; irte.dest_id = IRTE_DEST(dest); @@ -3375,6 +3385,8 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) */ if (cfg->move_in_progress) send_cleanup_vector(cfg); + + return 0; } #endif @@ -3528,7 +3540,7 @@ void arch_teardown_msi_irq(unsigned int irq) #if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP) #ifdef CONFIG_SMP -static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) +static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; @@ -3537,7 +3549,7 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) dest = set_desc_affinity(desc, mask); if (dest == BAD_APICID) - return; + return -1; cfg = desc->chip_data; @@ -3549,6 +3561,8 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); dmar_msi_write(irq, &msg); + + return 0; } #endif /* CONFIG_SMP */ @@ -3582,7 +3596,7 @@ int arch_setup_dmar_msi(unsigned int irq) #ifdef CONFIG_HPET_TIMER #ifdef CONFIG_SMP -static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) +static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; @@ -3591,7 +3605,7 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) dest = set_desc_affinity(desc, mask); if (dest == BAD_APICID) - return; + return -1; cfg = desc->chip_data; @@ -3603,6 +3617,8 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); hpet_msi_write(irq, &msg); + + return 0; } #endif /* CONFIG_SMP */ @@ -3659,7 +3675,7 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) write_ht_irq_msg(irq, &msg); } -static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) +static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; @@ -3667,11 +3683,13 @@ static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) dest = set_desc_affinity(desc, mask); if (dest == BAD_APICID) - return; + return -1; cfg = desc->chip_data; target_ht_irq(irq, dest, cfg->vector); + + return 0; } #endif diff --git a/drivers/parisc/iosapic.c b/drivers/parisc/iosapic.c index 73348c4047e..4a9cc92d4d1 100644 --- a/drivers/parisc/iosapic.c +++ b/drivers/parisc/iosapic.c @@ -702,7 +702,7 @@ static unsigned int iosapic_startup_irq(unsigned int irq) } #ifdef CONFIG_SMP -static void iosapic_set_affinity_irq(unsigned int irq, +static int iosapic_set_affinity_irq(unsigned int irq, const struct cpumask *dest) { struct vector_info *vi = iosapic_get_vector(irq); @@ -712,7 +712,7 @@ static void iosapic_set_affinity_irq(unsigned int irq, dest_cpu = cpu_check_affinity(irq, dest); if (dest_cpu < 0) - return; + return -1; cpumask_copy(irq_desc[irq].affinity, cpumask_of(dest_cpu)); vi->txn_addr = txn_affinity_addr(irq, dest_cpu); @@ -724,6 +724,8 @@ static void iosapic_set_affinity_irq(unsigned int irq, iosapic_set_irt_data(vi, &dummy_d0, &d1); iosapic_wr_irt_entry(vi, d0, d1); spin_unlock_irqrestore(&iosapic_lock, flags); + + return 0; } #endif diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 30963af5dba..33389880279 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -688,13 +688,13 @@ void rebind_evtchn_irq(int evtchn, int irq) } /* Rebind an evtchn so that it gets delivered to a specific cpu */ -static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu) +static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu) { struct evtchn_bind_vcpu bind_vcpu; int evtchn = evtchn_from_irq(irq); if (!VALID_EVTCHN(evtchn)) - return; + return -1; /* Send future instances of this interrupt to other vcpu. */ bind_vcpu.port = evtchn; @@ -707,13 +707,15 @@ static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu) */ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0) bind_evtchn_to_cpu(evtchn, tcpu); -} + return 0; +} -static void set_affinity_irq(unsigned irq, const struct cpumask *dest) +static int set_affinity_irq(unsigned irq, const struct cpumask *dest) { unsigned tcpu = cpumask_first(dest); - rebind_irq_to_cpu(irq, tcpu); + + return rebind_irq_to_cpu(irq, tcpu); } int resend_irq_on_evtchn(unsigned int irq) diff --git a/include/linux/irq.h b/include/linux/irq.h index 2a34cd6281d..8e4c18b2915 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -117,7 +117,7 @@ struct irq_chip { void (*eoi)(unsigned int irq); void (*end)(unsigned int irq); - void (*set_affinity)(unsigned int irq, + int (*set_affinity)(unsigned int irq, const struct cpumask *dest); int (*retrigger)(unsigned int irq); int (*set_type)(unsigned int irq, unsigned int flow_type); -- cgit v1.2.3-70-g09d2 From 85ac16d033370caf6f48d743c8dc8103700f5cc5 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 27 Apr 2009 18:00:38 -0700 Subject: x86/irq: change irq_desc_alloc() to take node instead of cpu This simplifies the node awareness of the code. All our allocators only deal with a NUMA node ID locality not with CPU ids anyway - so there's no need to maintain (and transform) a CPU id all across the IRq layer. v2: keep move_irq_desc related [ Impact: cleanup, prepare IRQ code to be NUMA-aware ] Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: Suresh Siddha Cc: "Eric W. Biederman" Cc: Rusty Russell Cc: Jeremy Fitzhardinge LKML-Reference: <49F65536.2020300@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 58 +++++++++++++++++++----------------------- arch/x86/lguest/boot.c | 2 +- drivers/pci/intr_remapping.c | 15 +++++------ drivers/xen/events.c | 2 +- include/linux/interrupt.h | 2 +- include/linux/irq.h | 16 +++++------- kernel/irq/handle.c | 28 ++++++++------------ kernel/irq/internals.h | 2 +- kernel/irq/numa_migrate.c | 36 +++++++++----------------- kernel/softirq.c | 2 +- 10 files changed, 66 insertions(+), 97 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 5c7630b40a5..560b887ba27 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -129,12 +129,9 @@ struct irq_pin_list { struct irq_pin_list *next; }; -static struct irq_pin_list *get_one_free_irq_2_pin(int cpu) +static struct irq_pin_list *get_one_free_irq_2_pin(int node) { struct irq_pin_list *pin; - int node; - - node = cpu_to_node(cpu); pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node); @@ -209,12 +206,9 @@ static struct irq_cfg *irq_cfg(unsigned int irq) return cfg; } -static struct irq_cfg *get_one_free_irq_cfg(int cpu) +static struct irq_cfg *get_one_free_irq_cfg(int node) { struct irq_cfg *cfg; - int node; - - node = cpu_to_node(cpu); cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); if (cfg) { @@ -235,13 +229,13 @@ static struct irq_cfg *get_one_free_irq_cfg(int cpu) return cfg; } -int arch_init_chip_data(struct irq_desc *desc, int cpu) +int arch_init_chip_data(struct irq_desc *desc, int node) { struct irq_cfg *cfg; cfg = desc->chip_data; if (!cfg) { - desc->chip_data = get_one_free_irq_cfg(cpu); + desc->chip_data = get_one_free_irq_cfg(node); if (!desc->chip_data) { printk(KERN_ERR "can not alloc irq_cfg\n"); BUG_ON(1); @@ -253,7 +247,7 @@ int arch_init_chip_data(struct irq_desc *desc, int cpu) /* for move_irq_desc */ static void -init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu) +init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int node) { struct irq_pin_list *old_entry, *head, *tail, *entry; @@ -262,7 +256,7 @@ init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu) if (!old_entry) return; - entry = get_one_free_irq_2_pin(cpu); + entry = get_one_free_irq_2_pin(node); if (!entry) return; @@ -272,7 +266,7 @@ init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu) tail = entry; old_entry = old_entry->next; while (old_entry) { - entry = get_one_free_irq_2_pin(cpu); + entry = get_one_free_irq_2_pin(node); if (!entry) { entry = head; while (entry) { @@ -312,12 +306,12 @@ static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg) } void arch_init_copy_chip_data(struct irq_desc *old_desc, - struct irq_desc *desc, int cpu) + struct irq_desc *desc, int node) { struct irq_cfg *cfg; struct irq_cfg *old_cfg; - cfg = get_one_free_irq_cfg(cpu); + cfg = get_one_free_irq_cfg(node); if (!cfg) return; @@ -328,7 +322,7 @@ void arch_init_copy_chip_data(struct irq_desc *old_desc, memcpy(cfg, old_cfg, sizeof(struct irq_cfg)); - init_copy_irq_2_pin(old_cfg, cfg, cpu); + init_copy_irq_2_pin(old_cfg, cfg, node); } static void free_irq_cfg(struct irq_cfg *old_cfg) @@ -615,13 +609,13 @@ set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask) * shared ISA-space IRQs, so we have to support them. We are super * fast in the common case, and fast for shared ISA-space IRQs. */ -static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin) +static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) { struct irq_pin_list *entry; entry = cfg->irq_2_pin; if (!entry) { - entry = get_one_free_irq_2_pin(cpu); + entry = get_one_free_irq_2_pin(node); if (!entry) { printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n", apic, pin); @@ -641,7 +635,7 @@ static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin) entry = entry->next; } - entry->next = get_one_free_irq_2_pin(cpu); + entry->next = get_one_free_irq_2_pin(node); entry = entry->next; entry->apic = apic; entry->pin = pin; @@ -650,7 +644,7 @@ static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin) /* * Reroute an IRQ to a different pin. */ -static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu, +static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node, int oldapic, int oldpin, int newapic, int newpin) { @@ -670,7 +664,7 @@ static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu, /* why? call replace before add? */ if (!replaced) - add_pin_to_irq_cpu(cfg, cpu, newapic, newpin); + add_pin_to_irq_node(cfg, node, newapic, newpin); } static inline void io_apic_modify_irq(struct irq_cfg *cfg, @@ -1612,7 +1606,7 @@ static void __init setup_IO_APIC_irqs(void) int notcon = 0; struct irq_desc *desc; struct irq_cfg *cfg; - int cpu = boot_cpu_id; + int node = cpu_to_node(boot_cpu_id); apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); @@ -1647,13 +1641,13 @@ static void __init setup_IO_APIC_irqs(void) apic->multi_timer_check(apic_id, irq)) continue; - desc = irq_to_desc_alloc_cpu(irq, cpu); + desc = irq_to_desc_alloc_node(irq, node); if (!desc) { printk(KERN_INFO "can not get irq_desc for %d\n", irq); continue; } cfg = desc->chip_data; - add_pin_to_irq_cpu(cfg, cpu, apic_id, pin); + add_pin_to_irq_node(cfg, node, apic_id, pin); setup_IO_APIC_irq(apic_id, pin, irq, desc, irq_trigger(idx), irq_polarity(idx)); @@ -2863,7 +2857,7 @@ static inline void __init check_timer(void) { struct irq_desc *desc = irq_to_desc(0); struct irq_cfg *cfg = desc->chip_data; - int cpu = boot_cpu_id; + int node = cpu_to_node(boot_cpu_id); int apic1, pin1, apic2, pin2; unsigned long flags; int no_pin1 = 0; @@ -2929,7 +2923,7 @@ static inline void __init check_timer(void) * Ok, does IRQ0 through the IOAPIC work? */ if (no_pin1) { - add_pin_to_irq_cpu(cfg, cpu, apic1, pin1); + add_pin_to_irq_node(cfg, node, apic1, pin1); setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); } else { /* for edge trigger, setup_IO_APIC_irq already @@ -2966,7 +2960,7 @@ static inline void __init check_timer(void) /* * legacy devices should be connected to IO APIC #0 */ - replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2); + replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2); setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); enable_8259A_irq(0); if (timer_irq_works()) { @@ -3185,7 +3179,7 @@ unsigned int create_irq_nr(unsigned int irq_want) unsigned int new; unsigned long flags; struct irq_cfg *cfg_new = NULL; - int cpu = boot_cpu_id; + int node = cpu_to_node(boot_cpu_id); struct irq_desc *desc_new = NULL; irq = 0; @@ -3194,7 +3188,7 @@ unsigned int create_irq_nr(unsigned int irq_want) spin_lock_irqsave(&vector_lock, flags); for (new = irq_want; new < nr_irqs; new++) { - desc_new = irq_to_desc_alloc_cpu(new, cpu); + desc_new = irq_to_desc_alloc_node(new, node); if (!desc_new) { printk(KERN_INFO "can not get irq_desc for %d\n", new); continue; @@ -3968,7 +3962,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int p { struct irq_desc *desc; struct irq_cfg *cfg; - int cpu = boot_cpu_id; + int node = cpu_to_node(boot_cpu_id); if (!IO_APIC_IRQ(irq)) { apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", @@ -3976,7 +3970,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int p return -EINVAL; } - desc = irq_to_desc_alloc_cpu(irq, cpu); + desc = irq_to_desc_alloc_node(irq, node); if (!desc) { printk(KERN_INFO "can not get irq_desc %d\n", irq); return 0; @@ -3987,7 +3981,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int p */ if (irq >= NR_IRQS_LEGACY) { cfg = desc->chip_data; - add_pin_to_irq_cpu(cfg, cpu, ioapic, pin); + add_pin_to_irq_node(cfg, node, ioapic, pin); } setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity); diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index ca7ec44bafc..45acbcf2568 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -636,7 +636,7 @@ static void __init lguest_init_IRQ(void) void lguest_setup_irq(unsigned int irq) { - irq_to_desc_alloc_cpu(irq, 0); + irq_to_desc_alloc_node(irq, 0); set_irq_chip_and_handler_name(irq, &lguest_irq_controller, handle_level_irq, "level"); } diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index f5e0ea724a6..9eff36a293e 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -23,15 +23,12 @@ struct irq_2_iommu { }; #ifdef CONFIG_GENERIC_HARDIRQS -static struct irq_2_iommu *get_one_free_irq_2_iommu(int cpu) +static struct irq_2_iommu *get_one_free_irq_2_iommu(int node) { struct irq_2_iommu *iommu; - int node; - - node = cpu_to_node(cpu); iommu = kzalloc_node(sizeof(*iommu), GFP_ATOMIC, node); - printk(KERN_DEBUG "alloc irq_2_iommu on cpu %d node %d\n", cpu, node); + printk(KERN_DEBUG "alloc irq_2_iommu on node %d\n", node); return iommu; } @@ -48,7 +45,7 @@ static struct irq_2_iommu *irq_2_iommu(unsigned int irq) return desc->irq_2_iommu; } -static struct irq_2_iommu *irq_2_iommu_alloc_cpu(unsigned int irq, int cpu) +static struct irq_2_iommu *irq_2_iommu_alloc_node(unsigned int irq, int node) { struct irq_desc *desc; struct irq_2_iommu *irq_iommu; @@ -56,7 +53,7 @@ static struct irq_2_iommu *irq_2_iommu_alloc_cpu(unsigned int irq, int cpu) /* * alloc irq desc if not allocated already. */ - desc = irq_to_desc_alloc_cpu(irq, cpu); + desc = irq_to_desc_alloc_node(irq, node); if (!desc) { printk(KERN_INFO "can not get irq_desc for %d\n", irq); return NULL; @@ -65,14 +62,14 @@ static struct irq_2_iommu *irq_2_iommu_alloc_cpu(unsigned int irq, int cpu) irq_iommu = desc->irq_2_iommu; if (!irq_iommu) - desc->irq_2_iommu = get_one_free_irq_2_iommu(cpu); + desc->irq_2_iommu = get_one_free_irq_2_iommu(node); return desc->irq_2_iommu; } static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq) { - return irq_2_iommu_alloc_cpu(irq, boot_cpu_id); + return irq_2_iommu_alloc_node(irq, cpu_to_node(boot_cpu_id)); } #else /* !CONFIG_SPARSE_IRQ */ diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 33389880279..be437c2bc94 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -335,7 +335,7 @@ static int find_unbound_irq(void) if (irq == nr_irqs) panic("No available IRQ to bind to: increase nr_irqs!\n"); - desc = irq_to_desc_alloc_cpu(irq, 0); + desc = irq_to_desc_alloc_node(irq, 0); if (WARN_ON(desc == NULL)) return -1; diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 91bb76f44f1..ff374ceface 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -566,6 +566,6 @@ struct irq_desc; extern int early_irq_init(void); extern int arch_probe_nr_irqs(void); extern int arch_early_irq_init(void); -extern int arch_init_chip_data(struct irq_desc *desc, int cpu); +extern int arch_init_chip_data(struct irq_desc *desc, int node); #endif diff --git a/include/linux/irq.h b/include/linux/irq.h index 8e4c18b2915..a09baf8f9d9 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -187,7 +187,7 @@ struct irq_desc { spinlock_t lock; #ifdef CONFIG_SMP cpumask_var_t affinity; - unsigned int cpu; + unsigned int node; #ifdef CONFIG_GENERIC_PENDING_IRQ cpumask_var_t pending_mask; #endif @@ -201,16 +201,16 @@ struct irq_desc { } ____cacheline_internodealigned_in_smp; extern void arch_init_copy_chip_data(struct irq_desc *old_desc, - struct irq_desc *desc, int cpu); + struct irq_desc *desc, int node); extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc); #ifndef CONFIG_SPARSE_IRQ extern struct irq_desc irq_desc[NR_IRQS]; #else /* CONFIG_SPARSE_IRQ */ -extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu); +extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int node); #endif /* CONFIG_SPARSE_IRQ */ -extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu); +extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node); /* * Migration helpers for obsolete names, they will go away: @@ -422,12 +422,10 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); * Allocates affinity and pending_mask cpumask if required. * Returns true if successful (or not required). */ -static inline bool alloc_desc_masks(struct irq_desc *desc, int cpu, +static inline bool alloc_desc_masks(struct irq_desc *desc, int node, bool boot) { #ifdef CONFIG_CPUMASK_OFFSTACK - int node; - if (boot) { alloc_bootmem_cpumask_var(&desc->affinity); @@ -437,8 +435,6 @@ static inline bool alloc_desc_masks(struct irq_desc *desc, int cpu, return true; } - node = cpu_to_node(cpu); - if (!alloc_cpumask_var_node(&desc->affinity, GFP_ATOMIC, node)) return false; @@ -494,7 +490,7 @@ static inline void free_desc_masks(struct irq_desc *old_desc, #else /* !CONFIG_SMP */ -static inline bool alloc_desc_masks(struct irq_desc *desc, int cpu, +static inline bool alloc_desc_masks(struct irq_desc *desc, int node, bool boot) { return true; diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 3e0cbc44bd7..a6368db2618 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -81,12 +81,10 @@ static struct irq_desc irq_desc_init = { .lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock), }; -void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr) +void init_kstat_irqs(struct irq_desc *desc, int node, int nr) { - int node; void *ptr; - node = cpu_to_node(cpu); ptr = kzalloc_node(nr * sizeof(*desc->kstat_irqs), GFP_ATOMIC, node); /* @@ -94,33 +92,32 @@ void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr) * init_copy_kstat_irqs() could still use old one */ if (ptr) { - printk(KERN_DEBUG " alloc kstat_irqs on cpu %d node %d\n", - cpu, node); + printk(KERN_DEBUG " alloc kstat_irqs on node %d\n", node); desc->kstat_irqs = ptr; } } -static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu) +static void init_one_irq_desc(int irq, struct irq_desc *desc, int node) { memcpy(desc, &irq_desc_init, sizeof(struct irq_desc)); spin_lock_init(&desc->lock); desc->irq = irq; #ifdef CONFIG_SMP - desc->cpu = cpu; + desc->node = node; #endif lockdep_set_class(&desc->lock, &irq_desc_lock_class); - init_kstat_irqs(desc, cpu, nr_cpu_ids); + init_kstat_irqs(desc, node, nr_cpu_ids); if (!desc->kstat_irqs) { printk(KERN_ERR "can not alloc kstat_irqs\n"); BUG_ON(1); } - if (!alloc_desc_masks(desc, cpu, false)) { + if (!alloc_desc_masks(desc, node, false)) { printk(KERN_ERR "can not alloc irq_desc cpumasks\n"); BUG_ON(1); } init_desc_masks(desc); - arch_init_chip_data(desc, cpu); + arch_init_chip_data(desc, node); } /* @@ -189,11 +186,10 @@ struct irq_desc *irq_to_desc(unsigned int irq) return NULL; } -struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) +struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node) { struct irq_desc *desc; unsigned long flags; - int node; if (irq >= nr_irqs) { WARN(1, "irq (%d) >= nr_irqs (%d) in irq_to_desc_alloc\n", @@ -212,15 +208,13 @@ struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) if (desc) goto out_unlock; - node = cpu_to_node(cpu); desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); - printk(KERN_DEBUG " alloc irq_desc for %d on cpu %d node %d\n", - irq, cpu, node); + printk(KERN_DEBUG " alloc irq_desc for %d on node %d\n", irq, node); if (!desc) { printk(KERN_ERR "can not alloc irq_desc\n"); BUG_ON(1); } - init_one_irq_desc(irq, desc, cpu); + init_one_irq_desc(irq, desc, node); irq_desc_ptrs[irq] = desc; @@ -270,7 +264,7 @@ struct irq_desc *irq_to_desc(unsigned int irq) return (irq < NR_IRQS) ? irq_desc + irq : NULL; } -struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) +struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node) { return irq_to_desc(irq); } diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index de5f412f6a9..73468253143 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -16,7 +16,7 @@ extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp); extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume); extern struct lock_class_key irq_desc_lock_class; -extern void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr); +extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr); extern void clear_kstat_irqs(struct irq_desc *desc); extern spinlock_t sparse_irq_lock; diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c index ce72bc3f4ce..2f69bee57bf 100644 --- a/kernel/irq/numa_migrate.c +++ b/kernel/irq/numa_migrate.c @@ -15,9 +15,9 @@ static void init_copy_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc, - int cpu, int nr) + int node, int nr) { - init_kstat_irqs(desc, cpu, nr); + init_kstat_irqs(desc, node, nr); if (desc->kstat_irqs != old_desc->kstat_irqs) memcpy(desc->kstat_irqs, old_desc->kstat_irqs, @@ -34,20 +34,20 @@ static void free_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc) } static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc, - struct irq_desc *desc, int cpu) + struct irq_desc *desc, int node) { memcpy(desc, old_desc, sizeof(struct irq_desc)); - if (!alloc_desc_masks(desc, cpu, false)) { + if (!alloc_desc_masks(desc, node, false)) { printk(KERN_ERR "irq %d: can not get new irq_desc cpumask " "for migration.\n", irq); return false; } spin_lock_init(&desc->lock); - desc->cpu = cpu; + desc->node = node; lockdep_set_class(&desc->lock, &irq_desc_lock_class); - init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids); + init_copy_kstat_irqs(old_desc, desc, node, nr_cpu_ids); init_copy_desc_masks(old_desc, desc); - arch_init_copy_chip_data(old_desc, desc, cpu); + arch_init_copy_chip_data(old_desc, desc, node); return true; } @@ -59,12 +59,11 @@ static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc) } static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, - int cpu) + int node) { struct irq_desc *desc; unsigned int irq; unsigned long flags; - int node; irq = old_desc->irq; @@ -76,7 +75,6 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, if (desc && old_desc != desc) goto out_unlock; - node = cpu_to_node(cpu); desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); if (!desc) { printk(KERN_ERR "irq %d: can not get new irq_desc " @@ -85,7 +83,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, desc = old_desc; goto out_unlock; } - if (!init_copy_one_irq_desc(irq, old_desc, desc, cpu)) { + if (!init_copy_one_irq_desc(irq, old_desc, desc, node)) { /* still use old one */ kfree(desc); desc = old_desc; @@ -107,24 +105,14 @@ out_unlock: return desc; } -struct irq_desc *move_irq_desc(struct irq_desc *desc, int cpu) +struct irq_desc *move_irq_desc(struct irq_desc *desc, int node) { - int old_cpu; - int node, old_node; - /* those all static, do move them */ if (desc->irq < NR_IRQS_LEGACY) return desc; - old_cpu = desc->cpu; - if (old_cpu != cpu) { - node = cpu_to_node(cpu); - old_node = cpu_to_node(old_cpu); - if (old_node != node) - desc = __real_move_irq_desc(desc, cpu); - else - desc->cpu = cpu; - } + if (desc->node != node) + desc = __real_move_irq_desc(desc, node); return desc; } diff --git a/kernel/softirq.c b/kernel/softirq.c index b525dd34851..f674f332a02 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -828,7 +828,7 @@ int __init __weak arch_early_irq_init(void) return 0; } -int __weak arch_init_chip_data(struct irq_desc *desc, int cpu) +int __weak arch_init_chip_data(struct irq_desc *desc, int node) { return 0; } -- cgit v1.2.3-70-g09d2 From a2f809b08ae4dddc1015c7dcd8659e5729e45b3e Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 27 Apr 2009 18:01:20 -0700 Subject: irq: change ACPI GSI APIs to also take a device argument We want to use dev_to_node() later on, to be aware of the 'home node' of the GSI in question. [ Impact: cleanup, prepare the IRQ code to be more NUMA aware ] Signed-off-by: Yinghai Lu Acked-by: Len Brown Cc: Andrew Morton Cc: Suresh Siddha Cc: "Eric W. Biederman" Cc: Rusty Russell Cc: Len Brown Cc: Bjorn Helgaas Cc: Tony Luck Cc: linux-acpi@vger.kernel.org Cc: linux-ia64@vger.kernel.org LKML-Reference: <49F65560.20904@kernel.org> Signed-off-by: Ingo Molnar --- arch/ia64/kernel/acpi.c | 5 +++-- arch/x86/include/asm/io_apic.h | 4 ++-- arch/x86/include/asm/mpspec.h | 4 +++- arch/x86/kernel/acpi/boot.c | 8 ++++---- arch/x86/kernel/apic/io_apic.c | 3 ++- drivers/acpi/pci_irq.c | 5 +++-- drivers/char/hpet.c | 4 ++-- drivers/pnp/pnpacpi/rsparser.c | 2 +- include/linux/acpi.h | 2 +- 9 files changed, 21 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 5510317db37..baec6f00f7f 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -636,7 +636,7 @@ void __init acpi_numa_arch_fixup(void) * success: return IRQ number (>=0) * failure: return < 0 */ -int acpi_register_gsi(u32 gsi, int triggering, int polarity) +int acpi_register_gsi(struct device *dev, u32 gsi, int triggering, int polarity) { if (acpi_irq_model == ACPI_IRQ_MODEL_PLATFORM) return gsi; @@ -678,7 +678,8 @@ static int __init acpi_parse_fadt(struct acpi_table_header *table) fadt = (struct acpi_table_fadt *)fadt_header; - acpi_register_gsi(fadt->sci_interrupt, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW); + acpi_register_gsi(NULL, fadt->sci_interrupt, ACPI_LEVEL_SENSITIVE, + ACPI_ACTIVE_LOW); return 0; } diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 9d826e43601..07f2913ba5d 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -154,8 +154,8 @@ extern int timer_through_8259; extern int io_apic_get_unique_id(int ioapic, int apic_id); extern int io_apic_get_version(int ioapic); extern int io_apic_get_redir_entries(int ioapic); -extern int io_apic_set_pci_routing(int ioapic, int pin, int irq, - int edge_level, int active_high_low); +extern int io_apic_set_pci_routing(struct device *dev, int ioapic, int pin, + int irq, int edge_level, int active_high_low); #endif /* CONFIG_ACPI */ extern int (*ioapic_renumber_irq)(int ioapic, int irq); diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 642fc7fc8cd..3ea1f531f53 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -72,7 +72,9 @@ extern void mp_register_ioapic(int id, u32 address, u32 gsi_base); extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi); extern void mp_config_acpi_legacy_irqs(void); -extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low); +struct device; +extern int mp_register_gsi(struct device *dev, u32 gsi, int edge_level, + int active_high_low); extern int acpi_probe_gsi(void); #ifdef CONFIG_X86_IO_APIC extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 723989d7f80..6ee96b5530f 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -522,7 +522,7 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) * success: return IRQ number (>=0) * failure: return < 0 */ -int acpi_register_gsi(u32 gsi, int triggering, int polarity) +int acpi_register_gsi(struct device *dev, u32 gsi, int triggering, int polarity) { unsigned int irq; unsigned int plat_gsi = gsi; @@ -539,7 +539,7 @@ int acpi_register_gsi(u32 gsi, int triggering, int polarity) #ifdef CONFIG_X86_IO_APIC if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) { - plat_gsi = mp_register_gsi(gsi, triggering, polarity); + plat_gsi = mp_register_gsi(dev, gsi, triggering, polarity); } #endif acpi_gsi_to_irq(plat_gsi, &irq); @@ -1158,7 +1158,7 @@ void __init mp_config_acpi_legacy_irqs(void) } } -int mp_register_gsi(u32 gsi, int triggering, int polarity) +int mp_register_gsi(struct device *dev, u32 gsi, int triggering, int polarity) { int ioapic; int ioapic_pin; @@ -1253,7 +1253,7 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) } } #endif - io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, + io_apic_set_pci_routing(dev, ioapic, ioapic_pin, gsi, triggering == ACPI_EDGE_SENSITIVE ? 0 : 1, polarity == ACPI_ACTIVE_HIGH ? 0 : 1); return gsi; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 560b887ba27..d9346622601 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3958,7 +3958,8 @@ int __init io_apic_get_version(int ioapic) } #endif -int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) +int io_apic_set_pci_routing(struct device *dev, int ioapic, int pin, int irq, + int triggering, int polarity) { struct irq_desc *desc; struct irq_cfg *cfg; diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c index 51b9f8280f8..2faa9e2ac89 100644 --- a/drivers/acpi/pci_irq.c +++ b/drivers/acpi/pci_irq.c @@ -401,7 +401,8 @@ int acpi_pci_irq_enable(struct pci_dev *dev) /* Interrupt Line values above 0xF are forbidden */ if (dev->irq > 0 && (dev->irq <= 0xF)) { printk(" - using IRQ %d\n", dev->irq); - acpi_register_gsi(dev->irq, ACPI_LEVEL_SENSITIVE, + acpi_register_gsi(&dev->dev, dev->irq, + ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW); return 0; } else { @@ -410,7 +411,7 @@ int acpi_pci_irq_enable(struct pci_dev *dev) } } - rc = acpi_register_gsi(gsi, triggering, polarity); + rc = acpi_register_gsi(&dev->dev, gsi, triggering, polarity); if (rc < 0) { dev_warn(&dev->dev, "PCI INT %c: failed to register GSI\n", pin_name(pin)); diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c index 340ba4f9dc5..4a9f3492b92 100644 --- a/drivers/char/hpet.c +++ b/drivers/char/hpet.c @@ -224,7 +224,7 @@ static void hpet_timer_set_irq(struct hpet_dev *devp) break; } - gsi = acpi_register_gsi(irq, ACPI_LEVEL_SENSITIVE, + gsi = acpi_register_gsi(NULL, irq, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW); if (gsi > 0) break; @@ -939,7 +939,7 @@ static acpi_status hpet_resources(struct acpi_resource *res, void *data) irqp = &res->data.extended_irq; for (i = 0; i < irqp->interrupt_count; i++) { - irq = acpi_register_gsi(irqp->interrupts[i], + irq = acpi_register_gsi(NULL, irqp->interrupts[i], irqp->triggering, irqp->polarity); if (irq < 0) return AE_ERROR; diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c index adf17856bac..7f207f335be 100644 --- a/drivers/pnp/pnpacpi/rsparser.c +++ b/drivers/pnp/pnpacpi/rsparser.c @@ -123,7 +123,7 @@ static void pnpacpi_parse_allocated_irqresource(struct pnp_dev *dev, } flags = irq_flags(triggering, polarity, shareable); - irq = acpi_register_gsi(gsi, triggering, polarity); + irq = acpi_register_gsi(&dev->dev, gsi, triggering, polarity); if (irq >= 0) pcibios_penalize_isa_irq(irq, 1); else diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 88be890ee3c..51b4b0a5ce8 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -119,7 +119,7 @@ extern int pci_mmcfg_config_num; extern int sbf_port; extern unsigned long acpi_realmode_flags; -int acpi_register_gsi (u32 gsi, int triggering, int polarity); +int acpi_register_gsi (struct device *dev, u32 gsi, int triggering, int polarity); int acpi_gsi_to_irq (u32 gsi, unsigned int *irq); #ifdef CONFIG_X86_IO_APIC -- cgit v1.2.3-70-g09d2 From d047f53a2ecce37e3bdf79eac5a326fbaadb3628 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 27 Apr 2009 18:02:23 -0700 Subject: x86/irq: change MSI irq_desc to be more numa aware Try to get irq_desc on the home node in create_irq_nr(). v2: don't check if we can move it when sparse_irq is not used v3: use move_irq_des, if that node is not what we want [ Impact: optimization, make MSI IRQ descriptors more NUMA aware ] Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: Suresh Siddha Cc: "Eric W. Biederman" Cc: Rusty Russell LKML-Reference: <49F6559F.7070005@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 17 +++++++++++++---- include/linux/irq.h | 2 +- 2 files changed, 14 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 82376e021b5..9cd4806cdf5 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3172,14 +3172,13 @@ static int nr_irqs_gsi = NR_IRQS_LEGACY; /* * Dynamic irq allocate and deallocation */ -unsigned int create_irq_nr(unsigned int irq_want) +unsigned int create_irq_nr(unsigned int irq_want, int node) { /* Allocate an unused irq */ unsigned int irq; unsigned int new; unsigned long flags; struct irq_cfg *cfg_new = NULL; - int node = cpu_to_node(boot_cpu_id); struct irq_desc *desc_new = NULL; irq = 0; @@ -3197,6 +3196,13 @@ unsigned int create_irq_nr(unsigned int irq_want) if (cfg_new->vector != 0) continue; + +#ifdef CONFIG_NUMA_IRQ_DESC + /* different node ?*/ + if (desc_new->node != node) + desc = move_irq_desc(desc, node); +#endif + if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0) irq = new; break; @@ -3214,11 +3220,12 @@ unsigned int create_irq_nr(unsigned int irq_want) int create_irq(void) { + int node = cpu_to_node(boot_cpu_id); unsigned int irq_want; int irq; irq_want = nr_irqs_gsi; - irq = create_irq_nr(irq_want); + irq = create_irq_nr(irq_want, node); if (irq == 0) irq = -1; @@ -3476,15 +3483,17 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) unsigned int irq_want; struct intel_iommu *iommu = NULL; int index = 0; + int node; /* x86 doesn't support multiple MSI yet */ if (type == PCI_CAP_ID_MSI && nvec > 1) return 1; + node = dev_to_node(&dev->dev); irq_want = nr_irqs_gsi; sub_handle = 0; list_for_each_entry(msidesc, &dev->msi_list, list) { - irq = create_irq_nr(irq_want); + irq = create_irq_nr(irq_want, node); if (irq == 0) return -1; irq_want = irq + 1; diff --git a/include/linux/irq.h b/include/linux/irq.h index a09baf8f9d9..4b95ddb5304 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -376,7 +376,7 @@ extern void set_irq_noprobe(unsigned int irq); extern void set_irq_probe(unsigned int irq); /* Handle dynamic irq creation and destruction */ -extern unsigned int create_irq_nr(unsigned int irq_want); +extern unsigned int create_irq_nr(unsigned int irq_want, int node); extern int create_irq(void); extern void destroy_irq(unsigned int irq); -- cgit v1.2.3-70-g09d2 From 6a321cb370ad3db4ba6e405e638b3a42c41089b0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Apr 2009 04:43:42 -0700 Subject: net: netif_tx_queue_stopped too expensive netif_tx_queue_stopped(txq) is most of the time false. Yet its cost is very expensive on SMP. static inline int netif_tx_queue_stopped(const struct netdev_queue *dev_queue) { return test_bit(__QUEUE_STATE_XOFF, &dev_queue->state); } I saw this on oprofile hunting and bnx2 driver bnx2_tx_int(). We probably should split "struct netdev_queue" in two parts, one being read mostly. __netif_tx_lock() touches _xmit_lock & xmit_lock_owner, these deserve a separate cache line. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f8c3619d551..505a3c6cb12 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -447,12 +447,18 @@ enum netdev_queue_state_t }; struct netdev_queue { +/* + * read mostly part + */ struct net_device *dev; struct Qdisc *qdisc; unsigned long state; - spinlock_t _xmit_lock; - int xmit_lock_owner; struct Qdisc *qdisc_sleeping; +/* + * write mostly part + */ + spinlock_t _xmit_lock ____cacheline_aligned_in_smp; + int xmit_lock_owner; } ____cacheline_aligned_in_smp; -- cgit v1.2.3-70-g09d2 From abc5c44d6284fab8fb21bcfc52c0f16f980637df Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 23 Apr 2009 19:31:25 -0400 Subject: SUNRPC: Fix error return value of svc_addr_len() The svc_addr_len() helper function returns -EAFNOSUPPORT if it doesn't recognize the address family of the passed-in socket address. However, the return type of this function is size_t, which means -EAFNOSUPPORT is turned into a very large positive value in this case. The check in svc_udp_recvfrom() to see if the return value is less than zero therefore won't work at all. Additionally, handle_connect_req() passes this value directly to memset(). This could cause memset() to clobber a large chunk of memory if svc_addr_len() has returned an error. Currently the address family of these addresses, however, is known to be supported long before handle_connect_req() is called, so this isn't a real risk. Change the error return value of svc_addr_len() to zero, which fits in the range of size_t, and is safer to pass to memset() directly. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_xprt.h | 5 +++-- net/sunrpc/svcsock.c | 7 ++++--- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 0d9cb6ef28b..d790c52525c 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -118,7 +118,7 @@ static inline unsigned short svc_addr_port(const struct sockaddr *sa) return 0; } -static inline size_t svc_addr_len(struct sockaddr *sa) +static inline size_t svc_addr_len(const struct sockaddr *sa) { switch (sa->sa_family) { case AF_INET: @@ -126,7 +126,8 @@ static inline size_t svc_addr_len(struct sockaddr *sa) case AF_INET6: return sizeof(struct sockaddr_in6); } - return -EAFNOSUPPORT; + + return 0; } static inline unsigned short svc_xprt_local_port(const struct svc_xprt *xprt) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index af3198814c1..8b083283413 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -426,13 +426,14 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) long all[SVC_PKTINFO_SPACE / sizeof(long)]; } buffer; struct cmsghdr *cmh = &buffer.hdr; - int err, len; struct msghdr msg = { .msg_name = svc_addr(rqstp), .msg_control = cmh, .msg_controllen = sizeof(buffer), .msg_flags = MSG_DONTWAIT, }; + size_t len; + int err; if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) /* udp sockets need large rcvbuf as all pending @@ -464,8 +465,8 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) return -EAGAIN; } len = svc_addr_len(svc_addr(rqstp)); - if (len < 0) - return len; + if (len == 0) + return -EAFNOSUPPORT; rqstp->rq_addrlen = len; if (skb->tstamp.tv64 == 0) { skb->tstamp = ktime_get_real(); -- cgit v1.2.3-70-g09d2 From 335c54bdc4d3bacdbd619ec95cd0b352435bd37f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 23 Apr 2009 19:32:25 -0400 Subject: NFSD: Prevent a buffer overflow in svc_xprt_names() The svc_xprt_names() function can overflow its buffer if it's so near the end of the passed in buffer that the "name too long" string still doesn't fit. Of course, it could never tell if it was near the end of the passed in buffer, since its only caller passes in zero as the buffer length. Let's make this API a little safer. Change svc_xprt_names() so it *always* checks for a buffer overflow, and change its only caller to pass in the correct buffer length. If svc_xprt_names() does overflow its buffer, it now fails with an ENAMETOOLONG errno, instead of trying to write a message at the end of the buffer. I don't like this much, but I can't figure out a clean way that's always safe to return some of the names, *and* an indication that the buffer was not long enough. The displayed error when doing a 'cat /proc/fs/nfsd/portlist' is "File name too long". Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsctl.c | 2 +- include/linux/sunrpc/svc_xprt.h | 2 +- net/sunrpc/svc_xprt.c | 56 ++++++++++++++++++++++++++++------------- 3 files changed, 41 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index e051847b93f..6a1cd908e6b 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -918,7 +918,7 @@ static ssize_t __write_ports_names(char *buf) { if (nfsd_serv == NULL) return 0; - return svc_xprt_names(nfsd_serv, buf, 0); + return svc_xprt_names(nfsd_serv, buf, SIMPLE_TRANSACTION_LIMIT); } /* diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index d790c52525c..2223ae0b5ed 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -83,7 +83,7 @@ int svc_port_is_privileged(struct sockaddr *sin); int svc_print_xprts(char *buf, int maxlen); struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, const sa_family_t af, const unsigned short port); -int svc_xprt_names(struct svc_serv *serv, char *buf, int buflen); +int svc_xprt_names(struct svc_serv *serv, char *buf, const int buflen); static inline void svc_xprt_get(struct svc_xprt *xprt) { diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index f200393ac87..6f33d33cc06 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1098,36 +1098,58 @@ struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, } EXPORT_SYMBOL_GPL(svc_find_xprt); -/* - * Format a buffer with a list of the active transports. A zero for - * the buflen parameter disables target buffer overflow checking. +static int svc_one_xprt_name(const struct svc_xprt *xprt, + char *pos, int remaining) +{ + int len; + + len = snprintf(pos, remaining, "%s %u\n", + xprt->xpt_class->xcl_name, + svc_xprt_local_port(xprt)); + if (len >= remaining) + return -ENAMETOOLONG; + return len; +} + +/** + * svc_xprt_names - format a buffer with a list of transport names + * @serv: pointer to an RPC service + * @buf: pointer to a buffer to be filled in + * @buflen: length of buffer to be filled in + * + * Fills in @buf with a string containing a list of transport names, + * each name terminated with '\n'. + * + * Returns positive length of the filled-in string on success; otherwise + * a negative errno value is returned if an error occurs. */ -int svc_xprt_names(struct svc_serv *serv, char *buf, int buflen) +int svc_xprt_names(struct svc_serv *serv, char *buf, const int buflen) { struct svc_xprt *xprt; - char xprt_str[64]; - int totlen = 0; - int len; + int len, totlen; + char *pos; /* Sanity check args */ if (!serv) return 0; spin_lock_bh(&serv->sv_lock); + + pos = buf; + totlen = 0; list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) { - len = snprintf(xprt_str, sizeof(xprt_str), - "%s %d\n", xprt->xpt_class->xcl_name, - svc_xprt_local_port(xprt)); - /* If the string was truncated, replace with error string */ - if (len >= sizeof(xprt_str)) - strcpy(xprt_str, "name-too-long\n"); - /* Don't overflow buffer */ - len = strlen(xprt_str); - if (buflen && (len + totlen >= buflen)) + len = svc_one_xprt_name(xprt, pos, buflen - totlen); + if (len < 0) { + *buf = '\0'; + totlen = len; + } + if (len <= 0) break; - strcpy(buf+totlen, xprt_str); + + pos += len; totlen += len; } + spin_unlock_bh(&serv->sv_lock); return totlen; } -- cgit v1.2.3-70-g09d2 From bfba9ab4c64f0e5c33930711e6c073c285e01fcf Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 23 Apr 2009 19:32:33 -0400 Subject: SUNRPC: pass buffer size to svc_addsock() Adjust the synopsis of svc_addsock() to pass in the size of the output buffer. Add a documenting comment. This is a cosmetic change for now. A subsequent patch will make sure the buffer length is passed to one_sock_name(), where the length will actually be useful. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsctl.c | 2 +- include/linux/sunrpc/svcsock.h | 3 ++- net/sunrpc/svcsock.c | 16 +++++++++++++--- 3 files changed, 16 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 6a1cd908e6b..1f1c2159b80 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -943,7 +943,7 @@ static ssize_t __write_ports_addfd(char *buf) if (err != 0) goto out; - err = svc_addsock(nfsd_serv, fd, buf); + err = svc_addsock(nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT); if (err < 0) lockd_down(); diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 483e10380aa..e23241c53f4 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -39,7 +39,8 @@ int svc_send(struct svc_rqst *); void svc_drop(struct svc_rqst *); void svc_sock_update_bufs(struct svc_serv *serv); int svc_sock_names(char *buf, struct svc_serv *serv, char *toclose); -int svc_addsock(struct svc_serv *serv, int fd, char *name_return); +int svc_addsock(struct svc_serv *serv, const int fd, + char *name_return, const size_t len); void svc_init_xprt_sock(void); void svc_cleanup_xprt_sock(void); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 8b083283413..6bec1e25b54 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1128,9 +1128,19 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, return svsk; } -int svc_addsock(struct svc_serv *serv, - int fd, - char *name_return) +/** + * svc_addsock - add a listener socket to an RPC service + * @serv: pointer to RPC service to which to add a new listener + * @fd: file descriptor of the new listener + * @name_return: pointer to buffer to fill in with name of listener + * @len: size of the buffer + * + * Fills in socket name and returns positive length of name if successful. + * Name is terminated with '\n'. On error, returns a negative errno + * value. + */ +int svc_addsock(struct svc_serv *serv, const int fd, char *name_return, + const size_t len) { int err = 0; struct socket *so = sockfd_lookup(fd, &err); -- cgit v1.2.3-70-g09d2 From 8435d34dbbe75678c3cdad3d53b1e7996a79b3bf Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 23 Apr 2009 19:32:40 -0400 Subject: SUNRPC: pass buffer size to svc_sock_names() Adjust the synopsis of svc_sock_names() to pass in the size of the output buffer. Add a documenting comment. This is a cosmetic change for now. A subsequent patch will make sure the buffer length is passed to one_sock_name(), where the length will actually be useful. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsctl.c | 3 ++- include/linux/sunrpc/svcsock.h | 4 +++- net/sunrpc/svcsock.c | 19 +++++++++++++++++-- 3 files changed, 22 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 1f1c2159b80..b64a7fbfccf 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -966,7 +966,8 @@ static ssize_t __write_ports_delfd(char *buf) return -ENOMEM; if (nfsd_serv != NULL) - len = svc_sock_names(buf, nfsd_serv, toclose); + len = svc_sock_names(nfsd_serv, buf, + SIMPLE_TRANSACTION_LIMIT, toclose); if (len >= 0) lockd_down(); diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index e23241c53f4..82716313894 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -38,7 +38,9 @@ int svc_recv(struct svc_rqst *, long); int svc_send(struct svc_rqst *); void svc_drop(struct svc_rqst *); void svc_sock_update_bufs(struct svc_serv *serv); -int svc_sock_names(char *buf, struct svc_serv *serv, char *toclose); +int svc_sock_names(struct svc_serv *serv, char *buf, + const size_t buflen, + const char *toclose); int svc_addsock(struct svc_serv *serv, const int fd, char *name_return, const size_t len); void svc_init_xprt_sock(void); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 6bec1e25b54..032b52ea954 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -259,8 +259,23 @@ static int one_sock_name(char *buf, struct svc_sock *svsk) return len; } -int -svc_sock_names(char *buf, struct svc_serv *serv, char *toclose) +/** + * svc_sock_names - construct a list of listener names in a string + * @serv: pointer to RPC service + * @buf: pointer to a buffer to fill in with socket names + * @buflen: size of the buffer to be filled + * @toclose: pointer to '\0'-terminated C string containing the name + * of a listener to be closed + * + * Fills in @buf with a '\n'-separated list of names of listener + * sockets. If @toclose is not NULL, the socket named by @toclose + * is closed, and is not included in the output list. + * + * Returns positive length of the socket name string, or a negative + * errno value on error. + */ +int svc_sock_names(struct svc_serv *serv, char *buf, const size_t buflen, + const char *toclose) { struct svc_sock *svsk, *closesk = NULL; int len = 0; -- cgit v1.2.3-70-g09d2 From 4ed0d3e6c64cfd9ba4ceb2099b10d1cf8ece4320 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 24 Apr 2009 17:30:20 -0700 Subject: Intel IOMMU Pass Through Support The patch adds kernel parameter intel_iommu=pt to set up pass through mode in context mapping entry. This disables DMAR in linux kernel; but KVM still runs on VT-d and interrupt remapping still works. In this mode, kernel uses swiotlb for DMA API functions but other VT-d functionalities are enabled for KVM. KVM always uses multi level translation page table in VT-d. By default, pass though mode is disabled in kernel. This is useful when people don't want to enable VT-d DMAR in kernel but still want to use KVM and interrupt remapping for reasons like DMAR performance concern or debug purpose. Signed-off-by: Fenghua Yu Acked-by: Weidong Han Signed-off-by: David Woodhouse --- Documentation/kernel-parameters.txt | 1 + arch/ia64/include/asm/iommu.h | 1 + arch/ia64/kernel/pci-swiotlb.c | 2 +- arch/x86/include/asm/iommu.h | 1 + arch/x86/kernel/pci-dma.c | 6 ++ arch/x86/kernel/pci-swiotlb.c | 3 +- drivers/pci/dmar.c | 11 ++- drivers/pci/intel-iommu.c | 180 ++++++++++++++++++++++++++---------- include/linux/dma_remapping.h | 8 ++ include/linux/intel-iommu.h | 2 + 10 files changed, 165 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 600cdd72900..fa4faeb7597 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -965,6 +965,7 @@ and is between 256 and 4096 characters. It is defined in the file nomerge forcesac soft + pt [x86, IA64] io7= [HW] IO7 for Marvel based alpha systems See comment before marvel_specify_io7 in diff --git a/arch/ia64/include/asm/iommu.h b/arch/ia64/include/asm/iommu.h index 0490794fe4a..37d41ca5645 100644 --- a/arch/ia64/include/asm/iommu.h +++ b/arch/ia64/include/asm/iommu.h @@ -9,6 +9,7 @@ extern void pci_iommu_shutdown(void); extern void no_iommu_init(void); extern int force_iommu, no_iommu; extern int iommu_detected; +extern int iommu_pass_through; extern void iommu_dma_init(void); extern void machvec_init(const char *name); diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c index 285aae8431c..223abb13410 100644 --- a/arch/ia64/kernel/pci-swiotlb.c +++ b/arch/ia64/kernel/pci-swiotlb.c @@ -46,7 +46,7 @@ void __init swiotlb_dma_init(void) void __init pci_swiotlb_init(void) { - if (!iommu_detected) { + if (!iommu_detected || iommu_pass_through) { #ifdef CONFIG_IA64_GENERIC swiotlb = 1; printk(KERN_INFO "PCI-DMA: Re-initialize machine vector.\n"); diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index af326a2975b..fd6d21bbee6 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -6,6 +6,7 @@ extern void no_iommu_init(void); extern struct dma_map_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; +extern int iommu_pass_through; /* 10 seconds */ #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 745579bc825..8cad0d85424 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -160,6 +160,8 @@ again: return page_address(page); } +extern int iommu_pass_through; + /* * See for the iommu kernel parameter * documentation. @@ -209,6 +211,10 @@ static __init int iommu_setup(char *p) #ifdef CONFIG_SWIOTLB if (!strncmp(p, "soft", 4)) swiotlb = 1; + if (!strncmp(p, "pt", 2)) { + iommu_pass_through = 1; + return 1; + } #endif gart_parse_options(p); diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 221a3853e26..3a0c51e0ba6 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -71,7 +71,8 @@ void __init pci_swiotlb_init(void) { /* don't initialize swiotlb if iommu=off (no_iommu=1) */ #ifdef CONFIG_X86_64 - if (!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) + if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) || + iommu_pass_through) swiotlb = 1; #endif if (swiotlb_force) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index fa3a11365ec..d3d86b749ee 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -515,6 +515,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) u32 ver; static int iommu_allocated = 0; int agaw = 0; + int msagaw = 0; iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); if (!iommu) @@ -535,12 +536,20 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) agaw = iommu_calculate_agaw(iommu); if (agaw < 0) { printk(KERN_ERR - "Cannot get a valid agaw for iommu (seq_id = %d)\n", + "Cannot get a valid agaw for iommu (seq_id = %d)\n", + iommu->seq_id); + goto error; + } + msagaw = iommu_calculate_max_sagaw(iommu); + if (msagaw < 0) { + printk(KERN_ERR + "Cannot get a valid max agaw for iommu (seq_id = %d)\n", iommu->seq_id); goto error; } #endif iommu->agaw = agaw; + iommu->msagaw = msagaw; /* the registers might be more than one page */ map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 001b328adf8..13121821db7 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -53,6 +53,8 @@ #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 +#define MAX_AGAW_WIDTH 64 + #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) @@ -127,8 +129,6 @@ static inline void context_set_fault_enable(struct context_entry *context) context->lo &= (((u64)-1) << 2) | 1; } -#define CONTEXT_TT_MULTI_LEVEL 0 - static inline void context_set_translation_type(struct context_entry *context, unsigned long value) { @@ -288,6 +288,7 @@ int dmar_disabled = 1; static int __initdata dmar_map_gfx = 1; static int dmar_forcedac; static int intel_iommu_strict; +int iommu_pass_through; #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) static DEFINE_SPINLOCK(device_domain_lock); @@ -397,17 +398,13 @@ void free_iova_mem(struct iova *iova) static inline int width_to_agaw(int width); -/* calculate agaw for each iommu. - * "SAGAW" may be different across iommus, use a default agaw, and - * get a supported less agaw for iommus that don't support the default agaw. - */ -int iommu_calculate_agaw(struct intel_iommu *iommu) +static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw) { unsigned long sagaw; int agaw = -1; sagaw = cap_sagaw(iommu->cap); - for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH); + for (agaw = width_to_agaw(max_gaw); agaw >= 0; agaw--) { if (test_bit(agaw, &sagaw)) break; @@ -416,6 +413,24 @@ int iommu_calculate_agaw(struct intel_iommu *iommu) return agaw; } +/* + * Calculate max SAGAW for each iommu. + */ +int iommu_calculate_max_sagaw(struct intel_iommu *iommu) +{ + return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH); +} + +/* + * calculate agaw for each iommu. + * "SAGAW" may be different across iommus, use a default agaw, and + * get a supported less agaw for iommus that don't support the default agaw. + */ +int iommu_calculate_agaw(struct intel_iommu *iommu) +{ + return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH); +} + /* in native case, each domain is related to only one iommu */ static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) { @@ -1321,8 +1336,8 @@ static void domain_exit(struct dmar_domain *domain) free_domain_mem(domain); } -static int domain_context_mapping_one(struct dmar_domain *domain, - int segment, u8 bus, u8 devfn) +static int domain_context_mapping_one(struct dmar_domain *domain, int segment, + u8 bus, u8 devfn, int translation) { struct context_entry *context; unsigned long flags; @@ -1335,7 +1350,10 @@ static int domain_context_mapping_one(struct dmar_domain *domain, pr_debug("Set context mapping for %02x:%02x.%d\n", bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + BUG_ON(!domain->pgd); + BUG_ON(translation != CONTEXT_TT_PASS_THROUGH && + translation != CONTEXT_TT_MULTI_LEVEL); iommu = device_to_iommu(segment, bus, devfn); if (!iommu) @@ -1395,9 +1413,18 @@ static int domain_context_mapping_one(struct dmar_domain *domain, } context_set_domain_id(context, id); - context_set_address_width(context, iommu->agaw); - context_set_address_root(context, virt_to_phys(pgd)); - context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL); + + /* + * In pass through mode, AW must be programmed to indicate the largest + * AGAW value supported by hardware. And ASR is ignored by hardware. + */ + if (likely(translation == CONTEXT_TT_MULTI_LEVEL)) { + context_set_address_width(context, iommu->agaw); + context_set_address_root(context, virt_to_phys(pgd)); + } else + context_set_address_width(context, iommu->msagaw); + + context_set_translation_type(context, translation); context_set_fault_enable(context); context_set_present(context); domain_flush_cache(domain, context, sizeof(*context)); @@ -1422,13 +1449,15 @@ static int domain_context_mapping_one(struct dmar_domain *domain, } static int -domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev) +domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev, + int translation) { int ret; struct pci_dev *tmp, *parent; ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus), - pdev->bus->number, pdev->devfn); + pdev->bus->number, pdev->devfn, + translation); if (ret) return ret; @@ -1442,7 +1471,7 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev) ret = domain_context_mapping_one(domain, pci_domain_nr(parent->bus), parent->bus->number, - parent->devfn); + parent->devfn, translation); if (ret) return ret; parent = parent->bus->self; @@ -1450,12 +1479,14 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev) if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */ return domain_context_mapping_one(domain, pci_domain_nr(tmp->subordinate), - tmp->subordinate->number, 0); + tmp->subordinate->number, 0, + translation); else /* this is a legacy PCI bridge */ return domain_context_mapping_one(domain, pci_domain_nr(tmp->bus), tmp->bus->number, - tmp->devfn); + tmp->devfn, + translation); } static int domain_context_mapped(struct pci_dev *pdev) @@ -1752,7 +1783,7 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev, goto error; /* context entry init */ - ret = domain_context_mapping(domain, pdev); + ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL); if (!ret) return 0; error: @@ -1853,6 +1884,23 @@ static inline void iommu_prepare_isa(void) } #endif /* !CONFIG_DMAR_FLPY_WA */ +/* Initialize each context entry as pass through.*/ +static int __init init_context_pass_through(void) +{ + struct pci_dev *pdev = NULL; + struct dmar_domain *domain; + int ret; + + for_each_pci_dev(pdev) { + domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); + ret = domain_context_mapping(domain, pdev, + CONTEXT_TT_PASS_THROUGH); + if (ret) + return ret; + } + return 0; +} + static int __init init_dmars(void) { struct dmar_drhd_unit *drhd; @@ -1860,6 +1908,7 @@ static int __init init_dmars(void) struct pci_dev *pdev; struct intel_iommu *iommu; int i, ret; + int pass_through = 1; /* * for each drhd @@ -1913,7 +1962,15 @@ static int __init init_dmars(void) printk(KERN_ERR "IOMMU: allocate root entry failed\n"); goto error; } + if (!ecap_pass_through(iommu->ecap)) + pass_through = 0; } + if (iommu_pass_through) + if (!pass_through) { + printk(KERN_INFO + "Pass Through is not supported by hardware.\n"); + iommu_pass_through = 0; + } /* * Start from the sane iommu hardware state. @@ -1976,37 +2033,57 @@ static int __init init_dmars(void) "IOMMU: enable interrupt remapping failed\n"); } #endif + /* + * If pass through is set and enabled, context entries of all pci + * devices are intialized by pass through translation type. + */ + if (iommu_pass_through) { + ret = init_context_pass_through(); + if (ret) { + printk(KERN_ERR "IOMMU: Pass through init failed.\n"); + iommu_pass_through = 0; + } + } /* - * For each rmrr - * for each dev attached to rmrr - * do - * locate drhd for dev, alloc domain for dev - * allocate free domain - * allocate page table entries for rmrr - * if context not allocated for bus - * allocate and init context - * set present in root table for this bus - * init context with domain, translation etc - * endfor - * endfor + * If pass through is not set or not enabled, setup context entries for + * identity mappings for rmrr, gfx, and isa. */ - for_each_rmrr_units(rmrr) { - for (i = 0; i < rmrr->devices_cnt; i++) { - pdev = rmrr->devices[i]; - /* some BIOS lists non-exist devices in DMAR table */ - if (!pdev) - continue; - ret = iommu_prepare_rmrr_dev(rmrr, pdev); - if (ret) - printk(KERN_ERR + if (!iommu_pass_through) { + /* + * For each rmrr + * for each dev attached to rmrr + * do + * locate drhd for dev, alloc domain for dev + * allocate free domain + * allocate page table entries for rmrr + * if context not allocated for bus + * allocate and init context + * set present in root table for this bus + * init context with domain, translation etc + * endfor + * endfor + */ + for_each_rmrr_units(rmrr) { + for (i = 0; i < rmrr->devices_cnt; i++) { + pdev = rmrr->devices[i]; + /* + * some BIOS lists non-exist devices in DMAR + * table. + */ + if (!pdev) + continue; + ret = iommu_prepare_rmrr_dev(rmrr, pdev); + if (ret) + printk(KERN_ERR "IOMMU: mapping reserved region failed\n"); + } } - } - iommu_prepare_gfx_mapping(); + iommu_prepare_gfx_mapping(); - iommu_prepare_isa(); + iommu_prepare_isa(); + } /* * for each drhd @@ -2117,7 +2194,8 @@ get_valid_domain_for_dev(struct pci_dev *pdev) /* make sure context mapping is ok */ if (unlikely(!domain_context_mapped(pdev))) { - ret = domain_context_mapping(domain, pdev); + ret = domain_context_mapping(domain, pdev, + CONTEXT_TT_MULTI_LEVEL); if (ret) { printk(KERN_ERR "Domain context map for %s failed", @@ -2786,7 +2864,7 @@ int __init intel_iommu_init(void) * Check the need for DMA-remapping initialization now. * Above initialization will also be used by Interrupt-remapping. */ - if (no_iommu || swiotlb || dmar_disabled) + if (no_iommu || (swiotlb && !iommu_pass_through) || dmar_disabled) return -ENODEV; iommu_init_mempool(); @@ -2806,7 +2884,15 @@ int __init intel_iommu_init(void) init_timer(&unmap_timer); force_iommu = 1; - dma_ops = &intel_dma_ops; + + if (!iommu_pass_through) { + printk(KERN_INFO + "Multi-level page-table translation for DMAR.\n"); + dma_ops = &intel_dma_ops; + } else + printk(KERN_INFO + "DMAR: Pass through translation for DMAR.\n"); + init_iommu_sysfs(); register_iommu(&intel_iommu_ops); @@ -3146,7 +3232,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, return -EFAULT; } - ret = domain_context_mapping(dmar_domain, pdev); + ret = domain_context_mapping(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL); if (ret) return ret; diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index 1a455f1f86d..e0a03aff63d 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -13,6 +13,9 @@ #define DMA_PTE_WRITE (2) #define DMA_PTE_SNP (1 << 11) +#define CONTEXT_TT_MULTI_LEVEL 0 +#define CONTEXT_TT_PASS_THROUGH 2 + struct intel_iommu; struct dmar_domain; struct root_entry; @@ -21,11 +24,16 @@ extern void free_dmar_iommu(struct intel_iommu *iommu); #ifdef CONFIG_DMAR extern int iommu_calculate_agaw(struct intel_iommu *iommu); +extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); #else static inline int iommu_calculate_agaw(struct intel_iommu *iommu) { return 0; } +static inline int iommu_calculate_max_sagaw(struct intel_iommu *iommu) +{ + return 0; +} #endif extern int dmar_disabled; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index aa8c5317123..7246971a7fe 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -120,6 +120,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16) #define ecap_coherent(e) ((e) & 0x1) #define ecap_qis(e) ((e) & 0x2) +#define ecap_pass_through(e) ((e >> 6) & 0x1) #define ecap_eim_support(e) ((e >> 4) & 0x1) #define ecap_ir_support(e) ((e >> 3) & 0x1) #define ecap_max_handle_mask(e) ((e >> 20) & 0xf) @@ -302,6 +303,7 @@ struct intel_iommu { spinlock_t register_lock; /* protect register handling */ int seq_id; /* sequence id of the iommu */ int agaw; /* agaw of this iommu */ + int msagaw; /* max sagaw of this iommu */ unsigned int irq; unsigned char name[13]; /* Device Name */ -- cgit v1.2.3-70-g09d2 From 0f9a623dd6c9b5b4dd00c232f29525bfc7a8ecf2 Mon Sep 17 00:00:00 2001 From: Stuart Bennett Date: Tue, 28 Apr 2009 20:17:51 +0100 Subject: tracing: x86, mmiotrace: only register for die notifier when tracer active Follow up to afcfe024aebd74b0984a41af9a34e009cf5badaf in Linus' tree ("x86: mmiotrace: quieten spurious warning message") Signed-off-by: Stuart Bennett Acked-by: Pekka Paalanen Cc: Steven Rostedt LKML-Reference: <1240946271-7083-5-git-send-email-stuart@freedesktop.org> Signed-off-by: Ingo Molnar --- arch/x86/mm/kmmio.c | 27 ++++++++++++++++++++++----- arch/x86/mm/mmio-mod.c | 2 ++ include/linux/mmiotrace.h | 2 ++ 3 files changed, 26 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index a769d1a2d93..256ce643b0b 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -311,7 +311,12 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); if (!ctx->active) { - pr_debug("kmmio: spurious debug trap on CPU %d.\n", + /* + * debug traps without an active context are due to either + * something external causing them (f.e. using a debugger while + * mmio tracing enabled), or erroneous behaviour + */ + pr_warning("kmmio: unexpected debug trap on CPU %d.\n", smp_processor_id()); goto out; } @@ -529,8 +534,8 @@ void unregister_kmmio_probe(struct kmmio_probe *p) } EXPORT_SYMBOL(unregister_kmmio_probe); -static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, - void *args) +static int +kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args) { struct die_args *arg = args; @@ -545,11 +550,23 @@ static struct notifier_block nb_die = { .notifier_call = kmmio_die_notifier }; -static int __init init_kmmio(void) +int kmmio_init(void) { int i; + for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) INIT_LIST_HEAD(&kmmio_page_table[i]); + return register_die_notifier(&nb_die); } -fs_initcall(init_kmmio); /* should be before device_initcall() */ + +void kmmio_cleanup(void) +{ + int i; + + unregister_die_notifier(&nb_die); + for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) { + WARN_ONCE(!list_empty(&kmmio_page_table[i]), + KERN_ERR "kmmio_page_table not empty at cleanup, any further tracing will leak memory.\n"); + } +} diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index c9342ed8b40..132772a8ec5 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -451,6 +451,7 @@ void enable_mmiotrace(void) if (nommiotrace) pr_info(NAME "MMIO tracing disabled.\n"); + kmmio_init(); enter_uniprocessor(); spin_lock_irq(&trace_lock); atomic_inc(&mmiotrace_enabled); @@ -473,6 +474,7 @@ void disable_mmiotrace(void) clear_trace_list(); /* guarantees: no more kmmio callbacks */ leave_uniprocessor(); + kmmio_cleanup(); pr_info(NAME "disabled.\n"); out: mutex_unlock(&mmiotrace_mutex); diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index 3d1b7bde128..97491f78b08 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -30,6 +30,8 @@ extern unsigned int kmmio_count; extern int register_kmmio_probe(struct kmmio_probe *p); extern void unregister_kmmio_probe(struct kmmio_probe *p); +extern int kmmio_init(void); +extern void kmmio_cleanup(void); #ifdef CONFIG_MMIOTRACE /* kmmio is active by some kmmio_probes? */ -- cgit v1.2.3-70-g09d2 From 30e673b230f9d556eb81ef68a7b1a08c8b3b142c Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 28 Apr 2009 03:04:47 -0500 Subject: tracing/filters: move preds into event_filter object Create a new event_filter object, and move the pred-related members out of the call and subsystem objects and into the filter object - the details of the filter implementation don't need to be exposed in the call and subsystem in any case, and it will also help make the new parser implementation a little cleaner. [ Impact: refactor trace-filter code to prepare for new features ] Signed-off-by: Tom Zanussi Acked-by: Steven Rostedt Cc: fweisbec@gmail.com Cc: Li Zefan LKML-Reference: <1240905887.6416.119.camel@tropicana> Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 4 +- kernel/trace/trace.h | 10 ++-- kernel/trace/trace_events.c | 3 +- kernel/trace/trace_events_filter.c | 107 +++++++++++++++++++++++-------------- 4 files changed, 76 insertions(+), 48 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 78a9ba24cbf..46a27f2695a 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -101,8 +101,8 @@ struct ftrace_event_call { int (*show_format)(struct trace_seq *s); int (*define_fields)(void); struct list_head fields; - int n_preds; - struct filter_pred **preds; + int filter_active; + void *filter; void *mod; #ifdef CONFIG_EVENT_PROFILE diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 7d55bcf50e4..1fb7d6ccadf 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -731,12 +731,16 @@ struct ftrace_event_field { int size; }; +struct event_filter { + int n_preds; + struct filter_pred **preds; +}; + struct event_subsystem { struct list_head list; const char *name; struct dentry *entry; - int n_preds; - struct filter_pred **preds; + void *filter; }; struct filter_pred; @@ -774,7 +778,7 @@ filter_check_discard(struct ftrace_event_call *call, void *rec, struct ring_buffer *buffer, struct ring_buffer_event *event) { - if (unlikely(call->n_preds) && !filter_match_preds(call, rec)) { + if (unlikely(call->filter_active) && !filter_match_preds(call, rec)) { ring_buffer_discard_commit(buffer, event); return 1; } diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index be4d3a437c1..1cd1f37373d 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -757,8 +757,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events) list_add(&system->list, &event_subsystems); - system->preds = NULL; - system->n_preds = 0; + system->filter = NULL; entry = debugfs_create_file("filter", 0644, system->entry, system, &ftrace_subsystem_filter_fops); diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 65418288f95..1e861eca3d0 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -93,11 +93,12 @@ static int filter_pred_none(struct filter_pred *pred, void *event) /* return 1 if event matches, 0 otherwise (discard) */ int filter_match_preds(struct ftrace_event_call *call, void *rec) { + struct event_filter *filter = call->filter; int i, matched, and_failed = 0; struct filter_pred *pred; - for (i = 0; i < call->n_preds; i++) { - pred = call->preds[i]; + for (i = 0; i < filter->n_preds; i++) { + pred = filter->preds[i]; if (and_failed && !pred->or) continue; matched = pred->fn(pred, rec); @@ -115,20 +116,20 @@ int filter_match_preds(struct ftrace_event_call *call, void *rec) } EXPORT_SYMBOL_GPL(filter_match_preds); -static void __filter_print_preds(struct filter_pred **preds, int n_preds, +static void __filter_print_preds(struct event_filter *filter, struct trace_seq *s) { - char *field_name; struct filter_pred *pred; + char *field_name; int i; - if (!n_preds) { + if (!filter || !filter->n_preds) { trace_seq_printf(s, "none\n"); return; } - for (i = 0; i < n_preds; i++) { - pred = preds[i]; + for (i = 0; i < filter->n_preds; i++) { + pred = filter->preds[i]; field_name = pred->field_name; if (i) trace_seq_printf(s, pred->or ? "|| " : "&& "); @@ -144,7 +145,7 @@ static void __filter_print_preds(struct filter_pred **preds, int n_preds, void filter_print_preds(struct ftrace_event_call *call, struct trace_seq *s) { mutex_lock(&filter_mutex); - __filter_print_preds(call->preds, call->n_preds, s); + __filter_print_preds(call->filter, s); mutex_unlock(&filter_mutex); } @@ -152,7 +153,7 @@ void filter_print_subsystem_preds(struct event_subsystem *system, struct trace_seq *s) { mutex_lock(&filter_mutex); - __filter_print_preds(system->preds, system->n_preds, s); + __filter_print_preds(system->filter, s); mutex_unlock(&filter_mutex); } @@ -200,12 +201,14 @@ static int filter_set_pred(struct filter_pred *dest, static void __filter_disable_preds(struct ftrace_event_call *call) { + struct event_filter *filter = call->filter; int i; - call->n_preds = 0; + call->filter_active = 0; + filter->n_preds = 0; for (i = 0; i < MAX_FILTER_PRED; i++) - call->preds[i]->fn = filter_pred_none; + filter->preds[i]->fn = filter_pred_none; } void filter_disable_preds(struct ftrace_event_call *call) @@ -217,32 +220,39 @@ void filter_disable_preds(struct ftrace_event_call *call) int init_preds(struct ftrace_event_call *call) { + struct event_filter *filter; struct filter_pred *pred; int i; - call->n_preds = 0; - - call->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL); - if (!call->preds) + filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL); + if (!call->filter) return -ENOMEM; + call->filter_active = 0; + filter->n_preds = 0; + + filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL); + if (!filter->preds) + goto oom; + for (i = 0; i < MAX_FILTER_PRED; i++) { pred = kzalloc(sizeof(*pred), GFP_KERNEL); if (!pred) goto oom; pred->fn = filter_pred_none; - call->preds[i] = pred; + filter->preds[i] = pred; } return 0; oom: for (i = 0; i < MAX_FILTER_PRED; i++) { - if (call->preds[i]) - filter_free_pred(call->preds[i]); + if (filter->preds[i]) + filter_free_pred(filter->preds[i]); } - kfree(call->preds); - call->preds = NULL; + kfree(filter->preds); + kfree(call->filter); + call->filter = NULL; return -ENOMEM; } @@ -250,15 +260,16 @@ EXPORT_SYMBOL_GPL(init_preds); static void __filter_free_subsystem_preds(struct event_subsystem *system) { + struct event_filter *filter = system->filter; struct ftrace_event_call *call; int i; - if (system->n_preds) { - for (i = 0; i < system->n_preds; i++) - filter_free_pred(system->preds[i]); - kfree(system->preds); - system->preds = NULL; - system->n_preds = 0; + if (filter && filter->n_preds) { + for (i = 0; i < filter->n_preds; i++) + filter_free_pred(filter->preds[i]); + kfree(filter->preds); + kfree(filter); + system->filter = NULL; } list_for_each_entry(call, &ftrace_events, list) { @@ -281,21 +292,23 @@ static int filter_add_pred_fn(struct ftrace_event_call *call, struct filter_pred *pred, filter_pred_fn_t fn) { + struct event_filter *filter = call->filter; int idx, err; - if (call->n_preds && !pred->compound) + if (filter->n_preds && !pred->compound) __filter_disable_preds(call); - if (call->n_preds == MAX_FILTER_PRED) + if (filter->n_preds == MAX_FILTER_PRED) return -ENOSPC; - idx = call->n_preds; - filter_clear_pred(call->preds[idx]); - err = filter_set_pred(call->preds[idx], pred, fn); + idx = filter->n_preds; + filter_clear_pred(filter->preds[idx]); + err = filter_set_pred(filter->preds[idx], pred, fn); if (err) return err; - call->n_preds++; + filter->n_preds++; + call->filter_active = 1; return 0; } @@ -366,29 +379,41 @@ int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred) int filter_add_subsystem_pred(struct event_subsystem *system, struct filter_pred *pred) { + struct event_filter *filter = system->filter; struct ftrace_event_call *call; mutex_lock(&filter_mutex); - if (system->n_preds && !pred->compound) + if (filter && filter->n_preds && !pred->compound) { __filter_free_subsystem_preds(system); + filter = NULL; + } - if (!system->n_preds) { - system->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), + if (!filter) { + system->filter = kzalloc(sizeof(*filter), GFP_KERNEL); + if (!system->filter) { + mutex_unlock(&filter_mutex); + return -ENOMEM; + } + filter = system->filter; + filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL); - if (!system->preds) { + + if (!filter->preds) { + kfree(system->filter); + system->filter = NULL; mutex_unlock(&filter_mutex); return -ENOMEM; } } - if (system->n_preds == MAX_FILTER_PRED) { + if (filter->n_preds == MAX_FILTER_PRED) { mutex_unlock(&filter_mutex); return -ENOSPC; } - system->preds[system->n_preds] = pred; - system->n_preds++; + filter->preds[filter->n_preds] = pred; + filter->n_preds++; list_for_each_entry(call, &ftrace_events, list) { int err; @@ -401,8 +426,8 @@ int filter_add_subsystem_pred(struct event_subsystem *system, err = __filter_add_pred(call, pred); if (err == -ENOMEM) { - system->preds[system->n_preds] = NULL; - system->n_preds--; + filter->preds[filter->n_preds] = NULL; + filter->n_preds--; mutex_unlock(&filter_mutex); return err; } -- cgit v1.2.3-70-g09d2 From a118e4d1402f1349fe3d953493e4168a300a752d Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 28 Apr 2009 03:04:53 -0500 Subject: tracing/filters: distinguish between signed and unsigned fields The new filter comparison ops need to be able to distinguish between signed and unsigned field types, so add an is_signed flag/param to the event field struct/trace_define_fields(). Also define a simple macro, is_signed_type() to determine the signedness at compile time, used in the trace macros. If the is_signed_type() macro won't work with a specific type, a new slightly modified version of TRACE_FIELD() called TRACE_FIELD_SIGN(), allows the signedness to be set explicitly. [ Impact: extend trace-filter code for new feature ] Signed-off-by: Tom Zanussi Acked-by: Steven Rostedt Cc: fweisbec@gmail.com Cc: Li Zefan LKML-Reference: <1240905893.6416.120.camel@tropicana> Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 7 ++++--- include/trace/ftrace.h | 16 ++++++++-------- kernel/trace/trace.h | 1 + kernel/trace/trace_event_types.h | 4 ++-- kernel/trace/trace_events.c | 3 ++- kernel/trace/trace_export.c | 29 ++++++++++++++++++++++------- 6 files changed, 39 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 46a27f2695a..e61a7403f3d 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -122,8 +122,9 @@ extern int filter_current_check_discard(struct ftrace_event_call *call, struct ring_buffer_event *event); extern int trace_define_field(struct ftrace_event_call *call, char *type, - char *name, int offset, int size); + char *name, int offset, int size, int is_signed); +#define is_signed_type(type) (((type)(-1)) < 0) /* * The double __builtin_constant_p is because gcc will give us an error @@ -144,10 +145,10 @@ do { \ __trace_printk(ip, fmt, ##args); \ } while (0) -#define __common_field(type, item) \ +#define __common_field(type, item, is_signed) \ ret = trace_define_field(event_call, #type, "common_" #item, \ offsetof(typeof(field.ent), item), \ - sizeof(field.ent.item)); \ + sizeof(field.ent.item), is_signed); \ if (ret) \ return ret; diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 1e681142f1d..edb02bc9f8f 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -225,7 +225,7 @@ ftrace_format_##call(struct trace_seq *s) \ #define __field(type, item) \ ret = trace_define_field(event_call, #type, #item, \ offsetof(typeof(field), item), \ - sizeof(field.item)); \ + sizeof(field.item), is_signed_type(type)); \ if (ret) \ return ret; @@ -234,7 +234,7 @@ ftrace_format_##call(struct trace_seq *s) \ BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ ret = trace_define_field(event_call, #type "[" #len "]", #item, \ offsetof(typeof(field), item), \ - sizeof(field.item)); \ + sizeof(field.item), 0); \ if (ret) \ return ret; @@ -242,7 +242,7 @@ ftrace_format_##call(struct trace_seq *s) \ #define __string(item, src) \ ret = trace_define_field(event_call, "__str_loc", #item, \ offsetof(typeof(field), __str_loc_##item), \ - sizeof(field.__str_loc_##item)); + sizeof(field.__str_loc_##item), 0); #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, func, print) \ @@ -253,11 +253,11 @@ ftrace_define_fields_##call(void) \ struct ftrace_event_call *event_call = &event_##call; \ int ret; \ \ - __common_field(int, type); \ - __common_field(unsigned char, flags); \ - __common_field(unsigned char, preempt_count); \ - __common_field(int, pid); \ - __common_field(int, tgid); \ + __common_field(int, type, 1); \ + __common_field(unsigned char, flags, 0); \ + __common_field(unsigned char, preempt_count, 0); \ + __common_field(int, pid, 1); \ + __common_field(int, tgid, 1); \ \ tstruct; \ \ diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 1fb7d6ccadf..866d0108fd2 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -729,6 +729,7 @@ struct ftrace_event_field { char *type; int offset; int size; + int is_signed; }; struct event_filter { diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h index cfcecc4fd86..5e32e375134 100644 --- a/kernel/trace/trace_event_types.h +++ b/kernel/trace/trace_event_types.h @@ -141,8 +141,8 @@ TRACE_EVENT_FORMAT(hw_branch, TRACE_HW_BRANCHES, hw_branch_entry, ignore, TRACE_EVENT_FORMAT(power, TRACE_POWER, trace_power, ignore, TRACE_STRUCT( - TRACE_FIELD(ktime_t, state_data.stamp, stamp) - TRACE_FIELD(ktime_t, state_data.end, end) + TRACE_FIELD_SIGN(ktime_t, state_data.stamp, stamp, 1) + TRACE_FIELD_SIGN(ktime_t, state_data.end, end, 1) TRACE_FIELD(int, state_data.type, type) TRACE_FIELD(int, state_data.state, state) ), diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 1cd1f37373d..bbbea747937 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -26,7 +26,7 @@ static DEFINE_MUTEX(event_mutex); LIST_HEAD(ftrace_events); int trace_define_field(struct ftrace_event_call *call, char *type, - char *name, int offset, int size) + char *name, int offset, int size, int is_signed) { struct ftrace_event_field *field; @@ -44,6 +44,7 @@ int trace_define_field(struct ftrace_event_call *call, char *type, field->offset = offset; field->size = size; + field->is_signed = is_signed; list_add(&field->link, &call->fields); return 0; diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 0cb1a142c74..d06cf898dc8 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -50,6 +50,9 @@ extern void __bad_type_size(void); if (!ret) \ return 0; +#undef TRACE_FIELD_SIGN +#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \ + TRACE_FIELD(type, item, assign) #undef TP_RAW_FMT #define TP_RAW_FMT(args...) args @@ -98,6 +101,10 @@ ftrace_format_##call(struct trace_seq *s) \ #define TRACE_FIELD(type, item, assign)\ entry->item = assign; +#undef TRACE_FIELD_SIGN +#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \ + TRACE_FIELD(type, item, assign) + #undef TP_CMD #define TP_CMD(cmd...) cmd @@ -149,7 +156,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #define TRACE_FIELD(type, item, assign) \ ret = trace_define_field(event_call, #type, #item, \ offsetof(typeof(field), item), \ - sizeof(field.item)); \ + sizeof(field.item), is_signed_type(type)); \ if (ret) \ return ret; @@ -157,7 +164,15 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #define TRACE_FIELD_SPECIAL(type, item, len, cmd) \ ret = trace_define_field(event_call, #type "[" #len "]", #item, \ offsetof(typeof(field), item), \ - sizeof(field.item)); \ + sizeof(field.item), 0); \ + if (ret) \ + return ret; + +#undef TRACE_FIELD_SIGN +#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \ + ret = trace_define_field(event_call, #type, #item, \ + offsetof(typeof(field), item), \ + sizeof(field.item), is_signed); \ if (ret) \ return ret; @@ -173,11 +188,11 @@ ftrace_define_fields_##call(void) \ struct args field; \ int ret; \ \ - __common_field(unsigned char, type); \ - __common_field(unsigned char, flags); \ - __common_field(unsigned char, preempt_count); \ - __common_field(int, pid); \ - __common_field(int, tgid); \ + __common_field(unsigned char, type, 0); \ + __common_field(unsigned char, flags, 0); \ + __common_field(unsigned char, preempt_count, 0); \ + __common_field(int, pid, 1); \ + __common_field(int, tgid, 1); \ \ tstruct; \ \ -- cgit v1.2.3-70-g09d2 From 8b3725621074040d380664964ffbc40610aef8c6 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 28 Apr 2009 03:04:59 -0500 Subject: tracing/filters: a better event parser Replace the current event parser hack with a better one. Filters are no longer specified predicate by predicate, but all at once and can use parens and any of the following operators: numeric fields: ==, !=, <, <=, >, >= string fields: ==, != predicates can be combined with the logical operators: &&, || examples: "common_preempt_count > 4" > filter "((sig >= 10 && sig < 15) || sig == 17) && comm != bash" > filter If there was an error, the erroneous string along with an error message can be seen by looking at the filter e.g.: ((sig >= 10 && sig < 15) || dsig == 17) && comm != bash ^ parse_error: Field not found Currently the caret for an error always appears at the beginning of the filter; a real position should be used, but the error message should be useful even without it. To clear a filter, '0' can be written to the filter file. Filters can also be set or cleared for a complete subsystem by writing the same filter as would be written to an individual event to the filter file at the root of the subsytem. Note however, that if any event in the subsystem lacks a field specified in the filter being set, the set will fail and all filters in the subsytem are automatically cleared. This change from the previous version was made because using only the fields that happen to exist for a given event would most likely result in a meaningless filter. Because the logical operators are now implemented as predicates, the maximum number of predicates in a filter was increased from 8 to 16. [ Impact: add new, extended trace-filter implementation ] Signed-off-by: Tom Zanussi Acked-by: Steven Rostedt Cc: fweisbec@gmail.com Cc: Li Zefan LKML-Reference: <1240905899.6416.121.camel@tropicana> Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 2 +- kernel/trace/trace.h | 66 ++- kernel/trace/trace_events.c | 86 ++- kernel/trace/trace_events_filter.c | 1020 ++++++++++++++++++++++++++++-------- 4 files changed, 884 insertions(+), 290 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index e61a7403f3d..5fff40c9ff5 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -112,7 +112,7 @@ struct ftrace_event_call { #endif }; -#define MAX_FILTER_PRED 8 +#define MAX_FILTER_PRED 32 #define MAX_FILTER_STR_VAL 128 extern int init_preds(struct ftrace_event_call *call); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 866d0108fd2..7736fe8c1b7 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -735,6 +735,7 @@ struct ftrace_event_field { struct event_filter { int n_preds; struct filter_pred **preds; + char *filter_string; }; struct event_subsystem { @@ -746,7 +747,8 @@ struct event_subsystem { struct filter_pred; -typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event); +typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event, + int val1, int val2); struct filter_pred { filter_pred_fn_t fn; @@ -756,23 +758,18 @@ struct filter_pred { char *field_name; int offset; int not; - int or; - int compound; - int clear; + int op; + int pop_n; }; -extern void filter_free_pred(struct filter_pred *pred); -extern void filter_print_preds(struct ftrace_event_call *call, +extern void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s); -extern int filter_parse(char **pbuf, struct filter_pred *pred); -extern int filter_add_pred(struct ftrace_event_call *call, - struct filter_pred *pred); -extern void filter_disable_preds(struct ftrace_event_call *call); -extern void filter_free_subsystem_preds(struct event_subsystem *system); -extern void filter_print_subsystem_preds(struct event_subsystem *system, +extern int apply_event_filter(struct ftrace_event_call *call, + char *filter_string); +extern int apply_subsystem_event_filter(struct event_subsystem *system, + char *filter_string); +extern void print_subsystem_event_filter(struct event_subsystem *system, struct trace_seq *s); -extern int filter_add_subsystem_pred(struct event_subsystem *system, - struct filter_pred *pred); static inline int filter_check_discard(struct ftrace_event_call *call, void *rec, @@ -787,6 +784,47 @@ filter_check_discard(struct ftrace_event_call *call, void *rec, return 0; } +#define DEFINE_COMPARISON_PRED(type) \ +static int filter_pred_##type(struct filter_pred *pred, void *event, \ + int val1, int val2) \ +{ \ + type *addr = (type *)(event + pred->offset); \ + type val = (type)pred->val; \ + int match = 0; \ + \ + switch (pred->op) { \ + case OP_LT: \ + match = (*addr < val); \ + break; \ + case OP_LE: \ + match = (*addr <= val); \ + break; \ + case OP_GT: \ + match = (*addr > val); \ + break; \ + case OP_GE: \ + match = (*addr >= val); \ + break; \ + default: \ + break; \ + } \ + \ + return match; \ +} + +#define DEFINE_EQUALITY_PRED(size) \ +static int filter_pred_##size(struct filter_pred *pred, void *event, \ + int val1, int val2) \ +{ \ + u##size *addr = (u##size *)(event + pred->offset); \ + u##size val = (u##size)pred->val; \ + int match; \ + \ + match = (val == *addr) ^ pred->not; \ + \ + return match; \ +} + extern struct list_head ftrace_events; extern const char *__start___trace_bprintk_fmt[]; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index bbbea747937..f789ca540fe 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -492,7 +492,7 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt, trace_seq_init(s); - filter_print_preds(call, s); + print_event_filter(call, s); r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); kfree(s); @@ -505,40 +505,26 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct ftrace_event_call *call = filp->private_data; - char buf[64], *pbuf = buf; - struct filter_pred *pred; + char *buf; int err; - if (cnt >= sizeof(buf)) + if (cnt >= PAGE_SIZE) return -EINVAL; - if (copy_from_user(&buf, ubuf, cnt)) - return -EFAULT; - buf[cnt] = '\0'; - - pred = kzalloc(sizeof(*pred), GFP_KERNEL); - if (!pred) + buf = (char *)__get_free_page(GFP_TEMPORARY); + if (!buf) return -ENOMEM; - err = filter_parse(&pbuf, pred); - if (err < 0) { - filter_free_pred(pred); - return err; - } - - if (pred->clear) { - filter_disable_preds(call); - filter_free_pred(pred); - return cnt; + if (copy_from_user(buf, ubuf, cnt)) { + free_page((unsigned long) buf); + return -EFAULT; } + buf[cnt] = '\0'; - err = filter_add_pred(call, pred); - if (err < 0) { - filter_free_pred(pred); + err = apply_event_filter(call, buf); + free_page((unsigned long) buf); + if (err < 0) return err; - } - - filter_free_pred(pred); *ppos += cnt; @@ -562,7 +548,7 @@ subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt, trace_seq_init(s); - filter_print_subsystem_preds(system, s); + print_subsystem_event_filter(system, s); r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); kfree(s); @@ -575,38 +561,26 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct event_subsystem *system = filp->private_data; - char buf[64], *pbuf = buf; - struct filter_pred *pred; + char *buf; int err; - if (cnt >= sizeof(buf)) + if (cnt >= PAGE_SIZE) return -EINVAL; - if (copy_from_user(&buf, ubuf, cnt)) - return -EFAULT; - buf[cnt] = '\0'; - - pred = kzalloc(sizeof(*pred), GFP_KERNEL); - if (!pred) + buf = (char *)__get_free_page(GFP_TEMPORARY); + if (!buf) return -ENOMEM; - err = filter_parse(&pbuf, pred); - if (err < 0) { - filter_free_pred(pred); - return err; - } - - if (pred->clear) { - filter_free_subsystem_preds(system); - filter_free_pred(pred); - return cnt; + if (copy_from_user(buf, ubuf, cnt)) { + free_page((unsigned long) buf); + return -EFAULT; } + buf[cnt] = '\0'; - err = filter_add_subsystem_pred(system, pred); - if (err < 0) { - filter_free_pred(pred); + err = apply_subsystem_event_filter(system, buf); + free_page((unsigned long) buf); + if (err < 0) return err; - } *ppos += cnt; @@ -760,11 +734,21 @@ event_subsystem_dir(const char *name, struct dentry *d_events) system->filter = NULL; + system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL); + if (!system->filter) { + pr_warning("Could not allocate filter for subsystem " + "'%s'\n", name); + return system->entry; + } + entry = debugfs_create_file("filter", 0644, system->entry, system, &ftrace_subsystem_filter_fops); - if (!entry) + if (!entry) { + kfree(system->filter); + system->filter = NULL; pr_warning("Could not create debugfs " "'%s/filter' entry\n", name); + } return system->entry; } diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 1e861eca3d0..f49486687ee 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -29,51 +29,130 @@ static DEFINE_MUTEX(filter_mutex); -static int filter_pred_64(struct filter_pred *pred, void *event) +enum filter_op_ids { - u64 *addr = (u64 *)(event + pred->offset); - u64 val = (u64)pred->val; - int match; - - match = (val == *addr) ^ pred->not; - - return match; -} - -static int filter_pred_32(struct filter_pred *pred, void *event) -{ - u32 *addr = (u32 *)(event + pred->offset); - u32 val = (u32)pred->val; - int match; - - match = (val == *addr) ^ pred->not; - - return match; -} - -static int filter_pred_16(struct filter_pred *pred, void *event) + OP_OR, + OP_AND, + OP_NE, + OP_EQ, + OP_LT, + OP_LE, + OP_GT, + OP_GE, + OP_NONE, + OP_OPEN_PAREN, +}; + +struct filter_op { + int id; + char *string; + int precedence; +}; + +static struct filter_op filter_ops[] = { + { OP_OR, "||", 1 }, + { OP_AND, "&&", 2 }, + { OP_NE, "!=", 4 }, + { OP_EQ, "==", 4 }, + { OP_LT, "<", 5 }, + { OP_LE, "<=", 5 }, + { OP_GT, ">", 5 }, + { OP_GE, ">=", 5 }, + { OP_NONE, "OP_NONE", 0 }, + { OP_OPEN_PAREN, "(", 0 }, +}; + +enum { + FILT_ERR_NONE, + FILT_ERR_INVALID_OP, + FILT_ERR_UNBALANCED_PAREN, + FILT_ERR_TOO_MANY_OPERANDS, + FILT_ERR_OPERAND_TOO_LONG, + FILT_ERR_FIELD_NOT_FOUND, + FILT_ERR_ILLEGAL_FIELD_OP, + FILT_ERR_ILLEGAL_INTVAL, + FILT_ERR_BAD_SUBSYS_FILTER, + FILT_ERR_TOO_MANY_PREDS, + FILT_ERR_MISSING_FIELD, + FILT_ERR_INVALID_FILTER, +}; + +static char *err_text[] = { + "No error", + "Invalid operator", + "Unbalanced parens", + "Too many operands", + "Operand too long", + "Field not found", + "Illegal operation for field type", + "Illegal integer value", + "Couldn't find or set field in one of a subsystem's events", + "Too many terms in predicate expression", + "Missing field name and/or value", + "Meaningless filter expression", +}; + +struct opstack_op { + int op; + struct list_head list; +}; + +struct postfix_elt { + int op; + char *operand; + struct list_head list; +}; + +struct filter_parse_state { + struct filter_op *ops; + struct list_head opstack; + struct list_head postfix; + int lasterr; + int lasterr_pos; + + struct { + char *string; + unsigned int cnt; + unsigned int tail; + } infix; + + struct { + char string[MAX_FILTER_STR_VAL]; + int pos; + unsigned int tail; + } operand; +}; + +DEFINE_COMPARISON_PRED(s64); +DEFINE_COMPARISON_PRED(u64); +DEFINE_COMPARISON_PRED(s32); +DEFINE_COMPARISON_PRED(u32); +DEFINE_COMPARISON_PRED(s16); +DEFINE_COMPARISON_PRED(u16); +DEFINE_COMPARISON_PRED(s8); +DEFINE_COMPARISON_PRED(u8); + +DEFINE_EQUALITY_PRED(64); +DEFINE_EQUALITY_PRED(32); +DEFINE_EQUALITY_PRED(16); +DEFINE_EQUALITY_PRED(8); + +static int filter_pred_and(struct filter_pred *pred __attribute((unused)), + void *event __attribute((unused)), + int val1, int val2) { - u16 *addr = (u16 *)(event + pred->offset); - u16 val = (u16)pred->val; - int match; - - match = (val == *addr) ^ pred->not; - - return match; + return val1 && val2; } -static int filter_pred_8(struct filter_pred *pred, void *event) +static int filter_pred_or(struct filter_pred *pred __attribute((unused)), + void *event __attribute((unused)), + int val1, int val2) { - u8 *addr = (u8 *)(event + pred->offset); - u8 val = (u8)pred->val; - int match; - - match = (val == *addr) ^ pred->not; - - return match; + return val1 || val2; } -static int filter_pred_string(struct filter_pred *pred, void *event) +static int filter_pred_string(struct filter_pred *pred, void *event, + int val1, int val2) { char *addr = (char *)(event + pred->offset); int cmp, match; @@ -85,7 +164,8 @@ static int filter_pred_string(struct filter_pred *pred, void *event) return match; } -static int filter_pred_none(struct filter_pred *pred, void *event) +static int filter_pred_none(struct filter_pred *pred, void *event, + int val1, int val2) { return 0; } @@ -94,66 +174,119 @@ static int filter_pred_none(struct filter_pred *pred, void *event) int filter_match_preds(struct ftrace_event_call *call, void *rec) { struct event_filter *filter = call->filter; - int i, matched, and_failed = 0; + int match, top = 0, val1 = 0, val2 = 0; + int stack[MAX_FILTER_PRED]; struct filter_pred *pred; + int i; for (i = 0; i < filter->n_preds; i++) { pred = filter->preds[i]; - if (and_failed && !pred->or) + if (!pred->pop_n) { + match = pred->fn(pred, rec, val1, val2); + stack[top++] = match; continue; - matched = pred->fn(pred, rec); - if (!matched && !pred->or) { - and_failed = 1; - continue; - } else if (matched && pred->or) - return 1; + } + if (pred->pop_n > top) { + WARN_ON_ONCE(1); + return 0; + } + val1 = stack[--top]; + val2 = stack[--top]; + match = pred->fn(pred, rec, val1, val2); + stack[top++] = match; } - if (and_failed) - return 0; - - return 1; + return stack[--top]; } EXPORT_SYMBOL_GPL(filter_match_preds); -static void __filter_print_preds(struct event_filter *filter, - struct trace_seq *s) +static void parse_error(struct filter_parse_state *ps, int err, int pos) { - struct filter_pred *pred; - char *field_name; - int i; + ps->lasterr = err; + ps->lasterr_pos = pos; +} - if (!filter || !filter->n_preds) { - trace_seq_printf(s, "none\n"); +static void remove_filter_string(struct event_filter *filter) +{ + kfree(filter->filter_string); + filter->filter_string = NULL; +} + +static int replace_filter_string(struct event_filter *filter, + char *filter_string) +{ + kfree(filter->filter_string); + filter->filter_string = kstrdup(filter_string, GFP_KERNEL); + if (!filter->filter_string) + return -ENOMEM; + + return 0; +} + +static int append_filter_string(struct event_filter *filter, + char *string) +{ + int newlen; + char *new_filter_string; + + BUG_ON(!filter->filter_string); + newlen = strlen(filter->filter_string) + strlen(string) + 1; + new_filter_string = kmalloc(newlen, GFP_KERNEL); + if (!new_filter_string) + return -ENOMEM; + + strcpy(new_filter_string, filter->filter_string); + strcat(new_filter_string, string); + kfree(filter->filter_string); + filter->filter_string = new_filter_string; + + return 0; +} + +static void append_filter_err(struct filter_parse_state *ps, + struct event_filter *filter) +{ + int pos = ps->lasterr_pos; + char *buf, *pbuf; + + buf = (char *)__get_free_page(GFP_TEMPORARY); + if (!buf) return; - } - for (i = 0; i < filter->n_preds; i++) { - pred = filter->preds[i]; - field_name = pred->field_name; - if (i) - trace_seq_printf(s, pred->or ? "|| " : "&& "); - trace_seq_printf(s, "%s ", field_name); - trace_seq_printf(s, pred->not ? "!= " : "== "); - if (pred->str_len) - trace_seq_printf(s, "%s\n", pred->str_val); - else - trace_seq_printf(s, "%llu\n", pred->val); - } + append_filter_string(filter, "\n"); + memset(buf, ' ', PAGE_SIZE); + if (pos > PAGE_SIZE - 128) + pos = 0; + buf[pos] = '^'; + pbuf = &buf[pos] + 1; + + sprintf(pbuf, "\nparse_error: %s\n", err_text[ps->lasterr]); + append_filter_string(filter, buf); + free_page((unsigned long) buf); } -void filter_print_preds(struct ftrace_event_call *call, struct trace_seq *s) +void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s) { + struct event_filter *filter = call->filter; + mutex_lock(&filter_mutex); - __filter_print_preds(call->filter, s); + if (filter->filter_string) + trace_seq_printf(s, "%s\n", filter->filter_string); + else + trace_seq_printf(s, "none\n"); mutex_unlock(&filter_mutex); } -void filter_print_subsystem_preds(struct event_subsystem *system, +void print_subsystem_event_filter(struct event_subsystem *system, struct trace_seq *s) { + struct event_filter *filter = system->filter; + mutex_lock(&filter_mutex); - __filter_print_preds(system->filter, s); + if (filter->filter_string) + trace_seq_printf(s, "%s\n", filter->filter_string); + else + trace_seq_printf(s, "none\n"); mutex_unlock(&filter_mutex); } @@ -170,7 +303,7 @@ find_event_field(struct ftrace_event_call *call, char *name) return NULL; } -void filter_free_pred(struct filter_pred *pred) +static void filter_free_pred(struct filter_pred *pred) { if (!pred) return; @@ -191,15 +324,17 @@ static int filter_set_pred(struct filter_pred *dest, filter_pred_fn_t fn) { *dest = *src; - dest->field_name = kstrdup(src->field_name, GFP_KERNEL); - if (!dest->field_name) - return -ENOMEM; + if (src->field_name) { + dest->field_name = kstrdup(src->field_name, GFP_KERNEL); + if (!dest->field_name) + return -ENOMEM; + } dest->fn = fn; return 0; } -static void __filter_disable_preds(struct ftrace_event_call *call) +static void filter_disable_preds(struct ftrace_event_call *call) { struct event_filter *filter = call->filter; int i; @@ -211,13 +346,6 @@ static void __filter_disable_preds(struct ftrace_event_call *call) filter->preds[i]->fn = filter_pred_none; } -void filter_disable_preds(struct ftrace_event_call *call) -{ - mutex_lock(&filter_mutex); - __filter_disable_preds(call); - mutex_unlock(&filter_mutex); -} - int init_preds(struct ftrace_event_call *call) { struct event_filter *filter; @@ -258,48 +386,43 @@ oom: } EXPORT_SYMBOL_GPL(init_preds); -static void __filter_free_subsystem_preds(struct event_subsystem *system) +static void filter_free_subsystem_preds(struct event_subsystem *system) { struct event_filter *filter = system->filter; struct ftrace_event_call *call; int i; - if (filter && filter->n_preds) { + if (filter->n_preds) { for (i = 0; i < filter->n_preds; i++) filter_free_pred(filter->preds[i]); kfree(filter->preds); - kfree(filter); - system->filter = NULL; + filter->preds = NULL; + filter->n_preds = 0; } list_for_each_entry(call, &ftrace_events, list) { if (!call->define_fields) continue; - if (!strcmp(call->system, system->name)) - __filter_disable_preds(call); + if (!strcmp(call->system, system->name)) { + filter_disable_preds(call); + remove_filter_string(call->filter); + } } } -void filter_free_subsystem_preds(struct event_subsystem *system) -{ - mutex_lock(&filter_mutex); - __filter_free_subsystem_preds(system); - mutex_unlock(&filter_mutex); -} - -static int filter_add_pred_fn(struct ftrace_event_call *call, +static int filter_add_pred_fn(struct filter_parse_state *ps, + struct ftrace_event_call *call, struct filter_pred *pred, filter_pred_fn_t fn) { struct event_filter *filter = call->filter; int idx, err; - if (filter->n_preds && !pred->compound) - __filter_disable_preds(call); - - if (filter->n_preds == MAX_FILTER_PRED) + if (filter->n_preds == MAX_FILTER_PRED) { + parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0); return -ENOSPC; + } idx = filter->n_preds; filter_clear_pred(filter->preds[idx]); @@ -321,94 +444,132 @@ static int is_string_field(const char *type) return 0; } -static int __filter_add_pred(struct ftrace_event_call *call, - struct filter_pred *pred) +static int is_legal_op(struct ftrace_event_field *field, int op) +{ + if (is_string_field(field->type) && (op != OP_EQ && op != OP_NE)) + return 0; + + return 1; +} + +static filter_pred_fn_t select_comparison_fn(int op, int field_size, + int field_is_signed) +{ + filter_pred_fn_t fn = NULL; + + switch (field_size) { + case 8: + if (op == OP_EQ || op == OP_NE) + fn = filter_pred_64; + else if (field_is_signed) + fn = filter_pred_s64; + else + fn = filter_pred_u64; + break; + case 4: + if (op == OP_EQ || op == OP_NE) + fn = filter_pred_32; + else if (field_is_signed) + fn = filter_pred_s32; + else + fn = filter_pred_u32; + break; + case 2: + if (op == OP_EQ || op == OP_NE) + fn = filter_pred_16; + else if (field_is_signed) + fn = filter_pred_s16; + else + fn = filter_pred_u16; + break; + case 1: + if (op == OP_EQ || op == OP_NE) + fn = filter_pred_8; + else if (field_is_signed) + fn = filter_pred_s8; + else + fn = filter_pred_u8; + break; + } + + return fn; +} + +static int filter_add_pred(struct filter_parse_state *ps, + struct ftrace_event_call *call, + struct filter_pred *pred) { struct ftrace_event_field *field; filter_pred_fn_t fn; unsigned long long val; + pred->fn = filter_pred_none; + + if (pred->op == OP_AND) { + pred->pop_n = 2; + return filter_add_pred_fn(ps, call, pred, filter_pred_and); + } else if (pred->op == OP_OR) { + pred->pop_n = 2; + return filter_add_pred_fn(ps, call, pred, filter_pred_or); + } + field = find_event_field(call, pred->field_name); - if (!field) + if (!field) { + parse_error(ps, FILT_ERR_FIELD_NOT_FOUND, 0); return -EINVAL; + } - pred->fn = filter_pred_none; pred->offset = field->offset; + if (!is_legal_op(field, pred->op)) { + parse_error(ps, FILT_ERR_ILLEGAL_FIELD_OP, 0); + return -EINVAL; + } + if (is_string_field(field->type)) { fn = filter_pred_string; pred->str_len = field->size; - return filter_add_pred_fn(call, pred, fn); + if (pred->op == OP_NE) + pred->not = 1; + return filter_add_pred_fn(ps, call, pred, fn); } else { - if (strict_strtoull(pred->str_val, 0, &val)) + if (strict_strtoull(pred->str_val, 0, &val)) { + parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0); return -EINVAL; + } pred->val = val; } - switch (field->size) { - case 8: - fn = filter_pred_64; - break; - case 4: - fn = filter_pred_32; - break; - case 2: - fn = filter_pred_16; - break; - case 1: - fn = filter_pred_8; - break; - default: + fn = select_comparison_fn(pred->op, field->size, field->is_signed); + if (!fn) { + parse_error(ps, FILT_ERR_INVALID_OP, 0); return -EINVAL; } - return filter_add_pred_fn(call, pred, fn); -} - -int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred) -{ - int err; - - mutex_lock(&filter_mutex); - err = __filter_add_pred(call, pred); - mutex_unlock(&filter_mutex); + if (pred->op == OP_NE) + pred->not = 1; - return err; + return filter_add_pred_fn(ps, call, pred, fn); } -int filter_add_subsystem_pred(struct event_subsystem *system, - struct filter_pred *pred) +static int filter_add_subsystem_pred(struct filter_parse_state *ps, + struct event_subsystem *system, + struct filter_pred *pred, + char *filter_string) { struct event_filter *filter = system->filter; struct ftrace_event_call *call; - mutex_lock(&filter_mutex); - - if (filter && filter->n_preds && !pred->compound) { - __filter_free_subsystem_preds(system); - filter = NULL; - } - - if (!filter) { - system->filter = kzalloc(sizeof(*filter), GFP_KERNEL); - if (!system->filter) { - mutex_unlock(&filter_mutex); - return -ENOMEM; - } - filter = system->filter; + if (!filter->preds) { filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL); - if (!filter->preds) { - kfree(system->filter); - system->filter = NULL; - mutex_unlock(&filter_mutex); + if (!filter->preds) return -ENOMEM; - } } if (filter->n_preds == MAX_FILTER_PRED) { - mutex_unlock(&filter_mutex); + parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0); return -ENOSPC; } @@ -424,97 +585,508 @@ int filter_add_subsystem_pred(struct event_subsystem *system, if (strcmp(call->system, system->name)) continue; - err = __filter_add_pred(call, pred); - if (err == -ENOMEM) { - filter->preds[filter->n_preds] = NULL; - filter->n_preds--; - mutex_unlock(&filter_mutex); + err = filter_add_pred(ps, call, pred); + if (err) { + filter_free_subsystem_preds(system); + parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); return err; } + replace_filter_string(call->filter, filter_string); } - mutex_unlock(&filter_mutex); + return 0; +} + +static void parse_init(struct filter_parse_state *ps, + struct filter_op *ops, + char *infix_string) +{ + memset(ps, '\0', sizeof(*ps)); + + ps->infix.string = infix_string; + ps->infix.cnt = strlen(infix_string); + ps->ops = ops; + + INIT_LIST_HEAD(&ps->opstack); + INIT_LIST_HEAD(&ps->postfix); +} + +static char infix_next(struct filter_parse_state *ps) +{ + ps->infix.cnt--; + + return ps->infix.string[ps->infix.tail++]; +} + +static char infix_peek(struct filter_parse_state *ps) +{ + if (ps->infix.tail == strlen(ps->infix.string)) + return 0; + + return ps->infix.string[ps->infix.tail]; +} + +static void infix_advance(struct filter_parse_state *ps) +{ + ps->infix.cnt--; + ps->infix.tail++; +} + +static inline int is_precedence_lower(struct filter_parse_state *ps, + int a, int b) +{ + return ps->ops[a].precedence < ps->ops[b].precedence; +} + +static inline int is_op_char(struct filter_parse_state *ps, char c) +{ + int i; + + for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) { + if (ps->ops[i].string[0] == c) + return 1; + } return 0; } -/* - * The filter format can be - * - 0, which means remove all filter preds - * - [||/&&] ==/!= - */ -int filter_parse(char **pbuf, struct filter_pred *pred) -{ - char *tok, *val_str = NULL; - int tok_n = 0; - - while ((tok = strsep(pbuf, " \n"))) { - if (tok_n == 0) { - if (!strcmp(tok, "0")) { - pred->clear = 1; - return 0; - } else if (!strcmp(tok, "&&")) { - pred->or = 0; - pred->compound = 1; - } else if (!strcmp(tok, "||")) { - pred->or = 1; - pred->compound = 1; - } else - pred->field_name = tok; - tok_n = 1; - continue; +static int infix_get_op(struct filter_parse_state *ps, char firstc) +{ + char nextc = infix_peek(ps); + char opstr[3]; + int i; + + opstr[0] = firstc; + opstr[1] = nextc; + opstr[2] = '\0'; + + for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) { + if (!strcmp(opstr, ps->ops[i].string)) { + infix_advance(ps); + return ps->ops[i].id; } - if (tok_n == 1) { - if (!pred->field_name) - pred->field_name = tok; - else if (!strcmp(tok, "!=")) - pred->not = 1; - else if (!strcmp(tok, "==")) - pred->not = 0; - else { - pred->field_name = NULL; + } + + opstr[1] = '\0'; + + for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) { + if (!strcmp(opstr, ps->ops[i].string)) + return ps->ops[i].id; + } + + return OP_NONE; +} + +static inline void clear_operand_string(struct filter_parse_state *ps) +{ + memset(ps->operand.string, '\0', MAX_FILTER_STR_VAL); + ps->operand.tail = 0; +} + +static inline int append_operand_char(struct filter_parse_state *ps, char c) +{ + if (ps->operand.tail == MAX_FILTER_STR_VAL) + return -EINVAL; + + ps->operand.string[ps->operand.tail++] = c; + + return 0; +} + +static int filter_opstack_push(struct filter_parse_state *ps, int op) +{ + struct opstack_op *opstack_op; + + opstack_op = kmalloc(sizeof(*opstack_op), GFP_KERNEL); + if (!opstack_op) + return -ENOMEM; + + opstack_op->op = op; + list_add(&opstack_op->list, &ps->opstack); + + return 0; +} + +static int filter_opstack_empty(struct filter_parse_state *ps) +{ + return list_empty(&ps->opstack); +} + +static int filter_opstack_top(struct filter_parse_state *ps) +{ + struct opstack_op *opstack_op; + + if (filter_opstack_empty(ps)) + return OP_NONE; + + opstack_op = list_first_entry(&ps->opstack, struct opstack_op, list); + + return opstack_op->op; +} + +static int filter_opstack_pop(struct filter_parse_state *ps) +{ + struct opstack_op *opstack_op; + int op; + + if (filter_opstack_empty(ps)) + return OP_NONE; + + opstack_op = list_first_entry(&ps->opstack, struct opstack_op, list); + op = opstack_op->op; + list_del(&opstack_op->list); + + kfree(opstack_op); + + return op; +} + +static void filter_opstack_clear(struct filter_parse_state *ps) +{ + while (!filter_opstack_empty(ps)) + filter_opstack_pop(ps); +} + +static char *curr_operand(struct filter_parse_state *ps) +{ + return ps->operand.string; +} + +static int postfix_append_operand(struct filter_parse_state *ps, char *operand) +{ + struct postfix_elt *elt; + + elt = kmalloc(sizeof(*elt), GFP_KERNEL); + if (!elt) + return -ENOMEM; + + elt->op = OP_NONE; + elt->operand = kstrdup(operand, GFP_KERNEL); + if (!elt->operand) { + kfree(elt); + return -ENOMEM; + } + + list_add_tail(&elt->list, &ps->postfix); + + return 0; +} + +static int postfix_append_op(struct filter_parse_state *ps, int op) +{ + struct postfix_elt *elt; + + elt = kmalloc(sizeof(*elt), GFP_KERNEL); + if (!elt) + return -ENOMEM; + + elt->op = op; + elt->operand = NULL; + + list_add_tail(&elt->list, &ps->postfix); + + return 0; +} + +static void postfix_clear(struct filter_parse_state *ps) +{ + struct postfix_elt *elt; + + while (!list_empty(&ps->postfix)) { + elt = list_first_entry(&ps->postfix, struct postfix_elt, list); + kfree(elt->operand); + list_del(&elt->list); + } +} + +static int filter_parse(struct filter_parse_state *ps) +{ + int op, top_op; + char ch; + + while ((ch = infix_next(ps))) { + if (isspace(ch)) + continue; + + if (is_op_char(ps, ch)) { + op = infix_get_op(ps, ch); + if (op == OP_NONE) { + parse_error(ps, FILT_ERR_INVALID_OP, 0); return -EINVAL; } - tok_n = 2; + + if (strlen(curr_operand(ps))) { + postfix_append_operand(ps, curr_operand(ps)); + clear_operand_string(ps); + } + + while (!filter_opstack_empty(ps)) { + top_op = filter_opstack_top(ps); + if (!is_precedence_lower(ps, top_op, op)) { + top_op = filter_opstack_pop(ps); + postfix_append_op(ps, top_op); + continue; + } + break; + } + + filter_opstack_push(ps, op); continue; } - if (tok_n == 2) { - if (pred->compound) { - if (!strcmp(tok, "!=")) - pred->not = 1; - else if (!strcmp(tok, "==")) - pred->not = 0; - else { - pred->field_name = NULL; - return -EINVAL; - } - } else { - val_str = tok; - break; /* done */ + + if (ch == '(') { + filter_opstack_push(ps, OP_OPEN_PAREN); + continue; + } + + if (ch == ')') { + if (strlen(curr_operand(ps))) { + postfix_append_operand(ps, curr_operand(ps)); + clear_operand_string(ps); + } + + top_op = filter_opstack_pop(ps); + while (top_op != OP_NONE) { + if (top_op == OP_OPEN_PAREN) + break; + postfix_append_op(ps, top_op); + top_op = filter_opstack_pop(ps); + } + if (top_op == OP_NONE) { + parse_error(ps, FILT_ERR_UNBALANCED_PAREN, 0); + return -EINVAL; } - tok_n = 3; continue; } - if (tok_n == 3) { - val_str = tok; - break; /* done */ + if (append_operand_char(ps, ch)) { + parse_error(ps, FILT_ERR_OPERAND_TOO_LONG, 0); + return -EINVAL; + } + } + + if (strlen(curr_operand(ps))) + postfix_append_operand(ps, curr_operand(ps)); + + while (!filter_opstack_empty(ps)) { + top_op = filter_opstack_pop(ps); + if (top_op == OP_NONE) + break; + if (top_op == OP_OPEN_PAREN) { + parse_error(ps, FILT_ERR_UNBALANCED_PAREN, 0); + return -EINVAL; + } + postfix_append_op(ps, top_op); + } + + return 0; +} + +static struct filter_pred *create_pred(int op, char *operand1, char *operand2) +{ + struct filter_pred *pred; + + pred = kzalloc(sizeof(*pred), GFP_KERNEL); + if (!pred) + return NULL; + + pred->field_name = kstrdup(operand1, GFP_KERNEL); + if (!pred->field_name) { + kfree(pred); + return NULL; + } + + strcpy(pred->str_val, operand2); + pred->str_len = strlen(operand2); + + pred->op = op; + + return pred; +} + +static struct filter_pred *create_logical_pred(int op) +{ + struct filter_pred *pred; + + pred = kzalloc(sizeof(*pred), GFP_KERNEL); + if (!pred) + return NULL; + + pred->op = op; + + return pred; +} + +static int check_preds(struct filter_parse_state *ps) +{ + int n_normal_preds = 0, n_logical_preds = 0; + struct postfix_elt *elt; + + list_for_each_entry(elt, &ps->postfix, list) { + if (elt->op == OP_NONE) + continue; + + if (elt->op == OP_AND || elt->op == OP_OR) { + n_logical_preds++; + continue; } + n_normal_preds++; } - if (!val_str || !strlen(val_str) - || strlen(val_str) >= MAX_FILTER_STR_VAL) { - pred->field_name = NULL; + if (!n_normal_preds || n_logical_preds >= n_normal_preds) { + parse_error(ps, FILT_ERR_INVALID_FILTER, 0); return -EINVAL; } - strcpy(pred->str_val, val_str); - pred->str_len = strlen(val_str); + return 0; +} - pred->field_name = kstrdup(pred->field_name, GFP_KERNEL); - if (!pred->field_name) - return -ENOMEM; +static int replace_preds(struct event_subsystem *system, + struct ftrace_event_call *call, + struct filter_parse_state *ps, + char *filter_string) +{ + char *operand1 = NULL, *operand2 = NULL; + struct filter_pred *pred; + struct postfix_elt *elt; + int err; + + err = check_preds(ps); + if (err) + return err; + + list_for_each_entry(elt, &ps->postfix, list) { + if (elt->op == OP_NONE) { + if (!operand1) + operand1 = elt->operand; + else if (!operand2) + operand2 = elt->operand; + else { + parse_error(ps, FILT_ERR_TOO_MANY_OPERANDS, 0); + return -EINVAL; + } + continue; + } + + if (elt->op == OP_AND || elt->op == OP_OR) { + pred = create_logical_pred(elt->op); + if (call) { + err = filter_add_pred(ps, call, pred); + filter_free_pred(pred); + } else + err = filter_add_subsystem_pred(ps, system, + pred, filter_string); + if (err) + return err; + + operand1 = operand2 = NULL; + continue; + } + + if (!operand1 || !operand2) { + parse_error(ps, FILT_ERR_MISSING_FIELD, 0); + return -EINVAL; + } + + pred = create_pred(elt->op, operand1, operand2); + if (call) { + err = filter_add_pred(ps, call, pred); + filter_free_pred(pred); + } else + err = filter_add_subsystem_pred(ps, system, pred, + filter_string); + if (err) + return err; + + operand1 = operand2 = NULL; + } return 0; } +int apply_event_filter(struct ftrace_event_call *call, char *filter_string) +{ + int err; + + struct filter_parse_state *ps; + + mutex_lock(&filter_mutex); + + if (!strcmp(strstrip(filter_string), "0")) { + filter_disable_preds(call); + remove_filter_string(call->filter); + mutex_unlock(&filter_mutex); + return 0; + } + + ps = kzalloc(sizeof(*ps), GFP_KERNEL); + if (!ps) + return -ENOMEM; + + filter_disable_preds(call); + replace_filter_string(call->filter, filter_string); + + parse_init(ps, filter_ops, filter_string); + err = filter_parse(ps); + if (err) { + append_filter_err(ps, call->filter); + goto out; + } + + err = replace_preds(NULL, call, ps, filter_string); + if (err) + append_filter_err(ps, call->filter); + +out: + filter_opstack_clear(ps); + postfix_clear(ps); + kfree(ps); + + mutex_unlock(&filter_mutex); + + return err; +} + +int apply_subsystem_event_filter(struct event_subsystem *system, + char *filter_string) +{ + int err; + + struct filter_parse_state *ps; + + mutex_lock(&filter_mutex); + + if (!strcmp(strstrip(filter_string), "0")) { + filter_free_subsystem_preds(system); + remove_filter_string(system->filter); + mutex_unlock(&filter_mutex); + return 0; + } + + ps = kzalloc(sizeof(*ps), GFP_KERNEL); + if (!ps) + return -ENOMEM; + + filter_free_subsystem_preds(system); + replace_filter_string(system->filter, filter_string); + + parse_init(ps, filter_ops, filter_string); + err = filter_parse(ps); + if (err) { + append_filter_err(ps, system->filter); + goto out; + } + + err = replace_preds(system, NULL, ps, filter_string); + if (err) + append_filter_err(ps, system->filter); + +out: + filter_opstack_clear(ps); + postfix_clear(ps); + kfree(ps); + + mutex_unlock(&filter_mutex); + + return err; +} -- cgit v1.2.3-70-g09d2 From 829b42dd395c5801f6ae87da87ecbdcfd5ef1a6c Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 12:46:59 +0200 Subject: perf_counter, x86: declare perf_max_counters only for CONFIG_PERF_COUNTERS This is only needed for CONFIG_PERF_COUNTERS enabled. [ Impact: cleanup ] Signed-off-by: Robert Richter Acked-by: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1241002046-8832-3-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 98143288530..be10b3ffe32 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -512,12 +512,13 @@ struct perf_cpu_context { int recursion[4]; }; +#ifdef CONFIG_PERF_COUNTERS + /* * Set by architecture code: */ extern int perf_max_counters; -#ifdef CONFIG_PERF_COUNTERS extern const struct hw_perf_counter_ops * hw_perf_counter_init(struct perf_counter *counter); -- cgit v1.2.3-70-g09d2 From 4aeb0b4239bb3b67ed402cb9cef3e000c892cadf Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 12:47:03 +0200 Subject: perfcounters: rename struct hw_perf_counter_ops into struct pmu This patch renames struct hw_perf_counter_ops into struct pmu. It introduces a structure to describe a cpu specific pmu (performance monitoring unit). It may contain ops and data. The new name of the structure fits better, is shorter, and thus better to handle. Where it was appropriate, names of function and variable have been changed too. [ Impact: cleanup ] Signed-off-by: Robert Richter Cc: Paul Mackerras Acked-by: Peter Zijlstra LKML-Reference: <1241002046-8832-7-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 25 +++++++------- arch/x86/kernel/cpu/perf_counter.c | 37 ++++++++++----------- include/linux/perf_counter.h | 9 +++-- kernel/perf_counter.c | 68 ++++++++++++++++++-------------------- 4 files changed, 66 insertions(+), 73 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index bd76d0fa2c3..d9bbe5efc64 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -256,7 +256,7 @@ static int check_excludes(struct perf_counter **ctrs, int n_prev, int n_new) return 0; } -static void power_perf_read(struct perf_counter *counter) +static void power_pmu_read(struct perf_counter *counter) { long val, delta, prev; @@ -405,7 +405,7 @@ void hw_perf_restore(u64 disable) for (i = 0; i < cpuhw->n_counters; ++i) { counter = cpuhw->counter[i]; if (counter->hw.idx && counter->hw.idx != hwc_index[i] + 1) { - power_perf_read(counter); + power_pmu_read(counter); write_pmc(counter->hw.idx, 0); counter->hw.idx = 0; } @@ -477,7 +477,7 @@ static void counter_sched_in(struct perf_counter *counter, int cpu) counter->oncpu = cpu; counter->tstamp_running += counter->ctx->time - counter->tstamp_stopped; if (is_software_counter(counter)) - counter->hw_ops->enable(counter); + counter->pmu->enable(counter); } /* @@ -533,7 +533,7 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader, * re-enable the PMU in order to get hw_perf_restore to do the * actual work of reconfiguring the PMU. */ -static int power_perf_enable(struct perf_counter *counter) +static int power_pmu_enable(struct perf_counter *counter) { struct cpu_hw_counters *cpuhw; unsigned long flags; @@ -573,7 +573,7 @@ static int power_perf_enable(struct perf_counter *counter) /* * Remove a counter from the PMU. */ -static void power_perf_disable(struct perf_counter *counter) +static void power_pmu_disable(struct perf_counter *counter) { struct cpu_hw_counters *cpuhw; long i; @@ -583,7 +583,7 @@ static void power_perf_disable(struct perf_counter *counter) local_irq_save(flags); pmudis = hw_perf_save_disable(); - power_perf_read(counter); + power_pmu_read(counter); cpuhw = &__get_cpu_var(cpu_hw_counters); for (i = 0; i < cpuhw->n_counters; ++i) { @@ -607,10 +607,10 @@ static void power_perf_disable(struct perf_counter *counter) local_irq_restore(flags); } -struct hw_perf_counter_ops power_perf_ops = { - .enable = power_perf_enable, - .disable = power_perf_disable, - .read = power_perf_read +struct pmu power_pmu = { + .enable = power_pmu_enable, + .disable = power_pmu_disable, + .read = power_pmu_read, }; /* Number of perf_counters counting hardware events */ @@ -631,8 +631,7 @@ static void hw_perf_counter_destroy(struct perf_counter *counter) } } -const struct hw_perf_counter_ops * -hw_perf_counter_init(struct perf_counter *counter) +const struct pmu *hw_perf_counter_init(struct perf_counter *counter) { unsigned long ev; struct perf_counter *ctrs[MAX_HWCOUNTERS]; @@ -705,7 +704,7 @@ hw_perf_counter_init(struct perf_counter *counter) if (err) return ERR_PTR(err); - return &power_perf_ops; + return &power_pmu; } /* diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index ad663d5ad2d..95de980c74a 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -515,8 +515,8 @@ __pmc_fixed_disable(struct perf_counter *counter, } static inline void -__pmc_generic_disable(struct perf_counter *counter, - struct hw_perf_counter *hwc, unsigned int idx) +__x86_pmu_disable(struct perf_counter *counter, + struct hw_perf_counter *hwc, unsigned int idx) { if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) __pmc_fixed_disable(counter, hwc, idx); @@ -591,8 +591,8 @@ __pmc_fixed_enable(struct perf_counter *counter, } static void -__pmc_generic_enable(struct perf_counter *counter, - struct hw_perf_counter *hwc, int idx) +__x86_pmu_enable(struct perf_counter *counter, + struct hw_perf_counter *hwc, int idx) { if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) __pmc_fixed_enable(counter, hwc, idx); @@ -626,7 +626,7 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc) /* * Find a PMC slot for the freshly enabled / scheduled in counter: */ -static int pmc_generic_enable(struct perf_counter *counter) +static int x86_pmu_enable(struct perf_counter *counter) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); struct hw_perf_counter *hwc = &counter->hw; @@ -667,7 +667,7 @@ try_generic: perf_counters_lapic_init(hwc->nmi); - __pmc_generic_disable(counter, hwc, idx); + __x86_pmu_disable(counter, hwc, idx); cpuc->counters[idx] = counter; /* @@ -676,7 +676,7 @@ try_generic: barrier(); __hw_perf_counter_set_period(counter, hwc, idx); - __pmc_generic_enable(counter, hwc, idx); + __x86_pmu_enable(counter, hwc, idx); return 0; } @@ -731,13 +731,13 @@ void perf_counter_print_debug(void) local_irq_enable(); } -static void pmc_generic_disable(struct perf_counter *counter) +static void x86_pmu_disable(struct perf_counter *counter) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); struct hw_perf_counter *hwc = &counter->hw; unsigned int idx = hwc->idx; - __pmc_generic_disable(counter, hwc, idx); + __x86_pmu_disable(counter, hwc, idx); clear_bit(idx, cpuc->used); cpuc->counters[idx] = NULL; @@ -767,7 +767,7 @@ static void perf_save_and_restart(struct perf_counter *counter) __hw_perf_counter_set_period(counter, hwc, idx); if (counter->state == PERF_COUNTER_STATE_ACTIVE) - __pmc_generic_enable(counter, hwc, idx); + __x86_pmu_enable(counter, hwc, idx); } /* @@ -805,7 +805,7 @@ again: perf_save_and_restart(counter); if (perf_counter_overflow(counter, nmi, regs, 0)) - __pmc_generic_disable(counter, &counter->hw, bit); + __x86_pmu_disable(counter, &counter->hw, bit); } hw_perf_ack_status(ack); @@ -1034,19 +1034,18 @@ void __init init_hw_perf_counters(void) register_die_notifier(&perf_counter_nmi_notifier); } -static void pmc_generic_read(struct perf_counter *counter) +static void x86_pmu_read(struct perf_counter *counter) { x86_perf_counter_update(counter, &counter->hw, counter->hw.idx); } -static const struct hw_perf_counter_ops x86_perf_counter_ops = { - .enable = pmc_generic_enable, - .disable = pmc_generic_disable, - .read = pmc_generic_read, +static const struct pmu pmu = { + .enable = x86_pmu_enable, + .disable = x86_pmu_disable, + .read = x86_pmu_read, }; -const struct hw_perf_counter_ops * -hw_perf_counter_init(struct perf_counter *counter) +const struct pmu *hw_perf_counter_init(struct perf_counter *counter) { int err; @@ -1054,7 +1053,7 @@ hw_perf_counter_init(struct perf_counter *counter) if (err) return ERR_PTR(err); - return &x86_perf_counter_ops; + return &pmu; } /* diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index be10b3ffe32..c3db52dc876 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -334,9 +334,9 @@ struct hw_perf_counter { struct perf_counter; /** - * struct hw_perf_counter_ops - performance counter hw ops + * struct pmu - generic performance monitoring unit */ -struct hw_perf_counter_ops { +struct pmu { int (*enable) (struct perf_counter *counter); void (*disable) (struct perf_counter *counter); void (*read) (struct perf_counter *counter); @@ -381,7 +381,7 @@ struct perf_counter { struct list_head sibling_list; int nr_siblings; struct perf_counter *group_leader; - const struct hw_perf_counter_ops *hw_ops; + const struct pmu *pmu; enum perf_counter_active_state state; enum perf_counter_active_state prev_state; @@ -519,8 +519,7 @@ struct perf_cpu_context { */ extern int perf_max_counters; -extern const struct hw_perf_counter_ops * -hw_perf_counter_init(struct perf_counter *counter); +extern const struct pmu *hw_perf_counter_init(struct perf_counter *counter); extern void perf_counter_task_sched_in(struct task_struct *task, int cpu); extern void perf_counter_task_sched_out(struct task_struct *task, int cpu); diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 09396098dd0..582108addef 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -52,8 +52,7 @@ static DEFINE_MUTEX(perf_resource_mutex); /* * Architecture provided APIs - weak aliases: */ -extern __weak const struct hw_perf_counter_ops * -hw_perf_counter_init(struct perf_counter *counter) +extern __weak const struct pmu *hw_perf_counter_init(struct perf_counter *counter) { return NULL; } @@ -124,7 +123,7 @@ counter_sched_out(struct perf_counter *counter, counter->state = PERF_COUNTER_STATE_INACTIVE; counter->tstamp_stopped = ctx->time; - counter->hw_ops->disable(counter); + counter->pmu->disable(counter); counter->oncpu = -1; if (!is_software_counter(counter)) @@ -417,7 +416,7 @@ counter_sched_in(struct perf_counter *counter, */ smp_wmb(); - if (counter->hw_ops->enable(counter)) { + if (counter->pmu->enable(counter)) { counter->state = PERF_COUNTER_STATE_INACTIVE; counter->oncpu = -1; return -EAGAIN; @@ -1096,7 +1095,7 @@ static void __read(void *info) local_irq_save(flags); if (ctx->is_active) update_context_time(ctx); - counter->hw_ops->read(counter); + counter->pmu->read(counter); update_counter_times(counter); local_irq_restore(flags); } @@ -1922,7 +1921,7 @@ static void perf_counter_output(struct perf_counter *counter, leader = counter->group_leader; list_for_each_entry(sub, &leader->sibling_list, list_entry) { if (sub != counter) - sub->hw_ops->read(sub); + sub->pmu->read(sub); group_entry.event = sub->hw_event.config; group_entry.counter = atomic64_read(&sub->count); @@ -2264,7 +2263,7 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) struct pt_regs *regs; counter = container_of(hrtimer, struct perf_counter, hw.hrtimer); - counter->hw_ops->read(counter); + counter->pmu->read(counter); regs = get_irq_regs(); /* @@ -2410,7 +2409,7 @@ static void perf_swcounter_disable(struct perf_counter *counter) perf_swcounter_update(counter); } -static const struct hw_perf_counter_ops perf_ops_generic = { +static const struct pmu perf_ops_generic = { .enable = perf_swcounter_enable, .disable = perf_swcounter_disable, .read = perf_swcounter_read, @@ -2460,7 +2459,7 @@ static void cpu_clock_perf_counter_read(struct perf_counter *counter) cpu_clock_perf_counter_update(counter); } -static const struct hw_perf_counter_ops perf_ops_cpu_clock = { +static const struct pmu perf_ops_cpu_clock = { .enable = cpu_clock_perf_counter_enable, .disable = cpu_clock_perf_counter_disable, .read = cpu_clock_perf_counter_read, @@ -2522,7 +2521,7 @@ static void task_clock_perf_counter_read(struct perf_counter *counter) task_clock_perf_counter_update(counter, time); } -static const struct hw_perf_counter_ops perf_ops_task_clock = { +static const struct pmu perf_ops_task_clock = { .enable = task_clock_perf_counter_enable, .disable = task_clock_perf_counter_disable, .read = task_clock_perf_counter_read, @@ -2574,7 +2573,7 @@ static void cpu_migrations_perf_counter_disable(struct perf_counter *counter) cpu_migrations_perf_counter_update(counter); } -static const struct hw_perf_counter_ops perf_ops_cpu_migrations = { +static const struct pmu perf_ops_cpu_migrations = { .enable = cpu_migrations_perf_counter_enable, .disable = cpu_migrations_perf_counter_disable, .read = cpu_migrations_perf_counter_read, @@ -2600,8 +2599,7 @@ static void tp_perf_counter_destroy(struct perf_counter *counter) ftrace_profile_disable(perf_event_id(&counter->hw_event)); } -static const struct hw_perf_counter_ops * -tp_perf_counter_init(struct perf_counter *counter) +static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) { int event_id = perf_event_id(&counter->hw_event); int ret; @@ -2616,18 +2614,16 @@ tp_perf_counter_init(struct perf_counter *counter) return &perf_ops_generic; } #else -static const struct hw_perf_counter_ops * -tp_perf_counter_init(struct perf_counter *counter) +static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) { return NULL; } #endif -static const struct hw_perf_counter_ops * -sw_perf_counter_init(struct perf_counter *counter) +static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) { struct perf_counter_hw_event *hw_event = &counter->hw_event; - const struct hw_perf_counter_ops *hw_ops = NULL; + const struct pmu *pmu = NULL; struct hw_perf_counter *hwc = &counter->hw; /* @@ -2639,7 +2635,7 @@ sw_perf_counter_init(struct perf_counter *counter) */ switch (perf_event_id(&counter->hw_event)) { case PERF_COUNT_CPU_CLOCK: - hw_ops = &perf_ops_cpu_clock; + pmu = &perf_ops_cpu_clock; if (hw_event->irq_period && hw_event->irq_period < 10000) hw_event->irq_period = 10000; @@ -2650,9 +2646,9 @@ sw_perf_counter_init(struct perf_counter *counter) * use the cpu_clock counter instead. */ if (counter->ctx->task) - hw_ops = &perf_ops_task_clock; + pmu = &perf_ops_task_clock; else - hw_ops = &perf_ops_cpu_clock; + pmu = &perf_ops_cpu_clock; if (hw_event->irq_period && hw_event->irq_period < 10000) hw_event->irq_period = 10000; @@ -2661,18 +2657,18 @@ sw_perf_counter_init(struct perf_counter *counter) case PERF_COUNT_PAGE_FAULTS_MIN: case PERF_COUNT_PAGE_FAULTS_MAJ: case PERF_COUNT_CONTEXT_SWITCHES: - hw_ops = &perf_ops_generic; + pmu = &perf_ops_generic; break; case PERF_COUNT_CPU_MIGRATIONS: if (!counter->hw_event.exclude_kernel) - hw_ops = &perf_ops_cpu_migrations; + pmu = &perf_ops_cpu_migrations; break; } - if (hw_ops) + if (pmu) hwc->irq_period = hw_event->irq_period; - return hw_ops; + return pmu; } /* @@ -2685,7 +2681,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, struct perf_counter *group_leader, gfp_t gfpflags) { - const struct hw_perf_counter_ops *hw_ops; + const struct pmu *pmu; struct perf_counter *counter; long err; @@ -2713,46 +2709,46 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, counter->cpu = cpu; counter->hw_event = *hw_event; counter->group_leader = group_leader; - counter->hw_ops = NULL; + counter->pmu = NULL; counter->ctx = ctx; counter->state = PERF_COUNTER_STATE_INACTIVE; if (hw_event->disabled) counter->state = PERF_COUNTER_STATE_OFF; - hw_ops = NULL; + pmu = NULL; if (perf_event_raw(hw_event)) { - hw_ops = hw_perf_counter_init(counter); + pmu = hw_perf_counter_init(counter); goto done; } switch (perf_event_type(hw_event)) { case PERF_TYPE_HARDWARE: - hw_ops = hw_perf_counter_init(counter); + pmu = hw_perf_counter_init(counter); break; case PERF_TYPE_SOFTWARE: - hw_ops = sw_perf_counter_init(counter); + pmu = sw_perf_counter_init(counter); break; case PERF_TYPE_TRACEPOINT: - hw_ops = tp_perf_counter_init(counter); + pmu = tp_perf_counter_init(counter); break; } done: err = 0; - if (!hw_ops) + if (!pmu) err = -EINVAL; - else if (IS_ERR(hw_ops)) - err = PTR_ERR(hw_ops); + else if (IS_ERR(pmu)) + err = PTR_ERR(pmu); if (err) { kfree(counter); return ERR_PTR(err); } - counter->hw_ops = hw_ops; + counter->pmu = pmu; if (counter->hw_event.mmap) atomic_inc(&nr_mmap_tracking); -- cgit v1.2.3-70-g09d2 From 6f00cada07bb5da7f751929d3173494dcc5446cc Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 12:47:17 +0200 Subject: perf_counter, x86: consistent use of type int for counter index The type of counter index is sometimes implemented as unsigned int. This patch changes this to have a consistent usage of int. [ Impact: cleanup ] Signed-off-by: Robert Richter Cc: Paul Mackerras Acked-by: Peter Zijlstra LKML-Reference: <1241002046-8832-21-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 8 ++++---- include/linux/perf_counter.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index f7fd4a35515..d8beebeb270 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -459,7 +459,7 @@ static void hw_perf_disable(int idx, u64 config) static inline void __pmc_fixed_disable(struct perf_counter *counter, - struct hw_perf_counter *hwc, unsigned int __idx) + struct hw_perf_counter *hwc, int __idx) { int idx = __idx - X86_PMC_IDX_FIXED; u64 ctrl_val, mask; @@ -474,7 +474,7 @@ __pmc_fixed_disable(struct perf_counter *counter, static inline void __x86_pmu_disable(struct perf_counter *counter, - struct hw_perf_counter *hwc, unsigned int idx) + struct hw_perf_counter *hwc, int idx) { if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) __pmc_fixed_disable(counter, hwc, idx); @@ -523,7 +523,7 @@ x86_perf_counter_set_period(struct perf_counter *counter, static inline void __pmc_fixed_enable(struct perf_counter *counter, - struct hw_perf_counter *hwc, unsigned int __idx) + struct hw_perf_counter *hwc, int __idx) { int idx = __idx - X86_PMC_IDX_FIXED; u64 ctrl_val, bits, mask; @@ -691,7 +691,7 @@ static void x86_pmu_disable(struct perf_counter *counter) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); struct hw_perf_counter *hwc = &counter->hw; - unsigned int idx = hwc->idx; + int idx = hwc->idx; /* * Must be done before we disable, otherwise the nmi handler diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index c3db52dc876..41aed427005 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -318,7 +318,7 @@ struct hw_perf_counter { unsigned long config_base; unsigned long counter_base; int nmi; - unsigned int idx; + int idx; }; union { /* software */ atomic64_t count; -- cgit v1.2.3-70-g09d2 From b1fca26631f76a5e8b18435a43f5d82b8734da4b Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Mon, 23 Mar 2009 18:22:09 +0100 Subject: mutex: add atomic_dec_and_mutex_lock() Much like the atomic_dec_and_lock() function in which we take an hold a spin_lock if we drop the atomic to 0 this function takes and holds the mutex if we dec the atomic to 0. Signed-off-by: Eric Paris Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Orig-LKML-Reference: <20090323172417.410913479@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/mutex.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 3069ec7e0ab..93054fc3635 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -151,4 +151,27 @@ extern int __must_check mutex_lock_killable(struct mutex *lock); extern int mutex_trylock(struct mutex *lock); extern void mutex_unlock(struct mutex *lock); +/** + * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0 + * @cnt: the atomic which we are to dec + * @lock: the mutex to return holding if we dec to 0 + * + * return true and hold lock if we dec to 0, return false otherwise + */ +static inline int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) +{ + /* dec if we can't possibly hit 0 */ + if (atomic_add_unless(cnt, -1, 1)) + return 0; + /* we might hit 0, so take the lock */ + mutex_lock(lock); + if (!atomic_dec_and_test(cnt)) { + /* when we actually did the dec, we didn't hit 0 */ + mutex_unlock(lock); + return 0; + } + /* we hit 0, and we hold the lock */ + return 1; +} + #endif -- cgit v1.2.3-70-g09d2 From c654b8a9cba6002aad1c01919e4928a79a4a6dcf Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 16 Apr 2009 17:33:25 -0400 Subject: nfsd: support ext4 i_version ext4 supports a real NFSv4 change attribute, which is bumped whenever the ctime would be updated, including times when two updates arrive within a jiffy of each other. (Note that although ext4 has space for nanosecond-precision ctime, the real resolution is lower: it actually uses jiffies as the time-source.) This ensures clients will invalidate their caches when they need to. There is some fear that keeping the i_version up-to-date could have performance drawbacks, so for now it's turned on only by a mount option. We hope to do something better eventually. Signed-off-by: J. Bruce Fields Cc: Theodore Tso --- fs/nfsd/nfs3xdr.c | 1 + fs/nfsd/nfs4xdr.c | 63 ++++++++++++++++++++++++++++++---------------- include/linux/nfsd/nfsfh.h | 7 ++++++ include/linux/nfsd/xdr4.h | 17 ++++++++++--- 4 files changed, 63 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 17d0dd99720..01d4ec1c88e 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -272,6 +272,7 @@ void fill_post_wcc(struct svc_fh *fhp) err = vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry, &fhp->fh_post_attr); + fhp->fh_post_change = fhp->fh_dentry->d_inode->i_version; if (err) fhp->fh_post_saved = 0; else diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 4a71fcd3f03..12d36a7361c 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1490,13 +1490,41 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) memcpy(p, ptr, nbytes); \ p += XDR_QUADLEN(nbytes); \ }} while (0) -#define WRITECINFO(c) do { \ - *p++ = htonl(c.atomic); \ - *p++ = htonl(c.before_ctime_sec); \ - *p++ = htonl(c.before_ctime_nsec); \ - *p++ = htonl(c.after_ctime_sec); \ - *p++ = htonl(c.after_ctime_nsec); \ -} while (0) + +static void write32(__be32 **p, u32 n) +{ + *(*p)++ = n; +} + +static void write64(__be32 **p, u64 n) +{ + write32(p, (u32)(n >> 32)); + write32(p, (u32)n); +} + +static void write_change(__be32 **p, struct kstat *stat, struct inode *inode) +{ + if (IS_I_VERSION(inode)) { + write64(p, inode->i_version); + } else { + write32(p, stat->ctime.tv_sec); + write32(p, stat->ctime.tv_nsec); + } +} + +static void write_cinfo(__be32 **p, struct nfsd4_change_info *c) +{ + write32(p, c->atomic); + if (c->change_supported) { + write64(p, c->before_change); + write64(p, c->after_change); + } else { + write32(p, c->before_ctime_sec); + write32(p, c->before_ctime_nsec); + write32(p, c->after_ctime_sec); + write32(p, c->after_ctime_nsec); + } +} #define RESERVE_SPACE(nbytes) do { \ p = resp->p; \ @@ -1849,16 +1877,9 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, WRITE32(NFS4_FH_PERSISTENT|NFS4_FH_VOL_RENAME); } if (bmval0 & FATTR4_WORD0_CHANGE) { - /* - * Note: This _must_ be consistent with the scheme for writing - * change_info, so any changes made here must be reflected there - * as well. (See xdr4.h:set_change_info() and the WRITECINFO() - * macro above.) - */ if ((buflen -= 8) < 0) goto out_resource; - WRITE32(stat.ctime.tv_sec); - WRITE32(stat.ctime.tv_nsec); + write_change(&p, &stat, dentry->d_inode); } if (bmval0 & FATTR4_WORD0_SIZE) { if ((buflen -= 8) < 0) @@ -2364,7 +2385,7 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ if (!nfserr) { RESERVE_SPACE(32); - WRITECINFO(create->cr_cinfo); + write_cinfo(&p, &create->cr_cinfo); WRITE32(2); WRITE32(create->cr_bmval[0]); WRITE32(create->cr_bmval[1]); @@ -2475,7 +2496,7 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li if (!nfserr) { RESERVE_SPACE(20); - WRITECINFO(link->li_cinfo); + write_cinfo(&p, &link->li_cinfo); ADJUST_ARGS(); } return nfserr; @@ -2493,7 +2514,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op nfsd4_encode_stateid(resp, &open->op_stateid); RESERVE_SPACE(40); - WRITECINFO(open->op_cinfo); + write_cinfo(&p, &open->op_cinfo); WRITE32(open->op_rflags); WRITE32(2); WRITE32(open->op_bmval[0]); @@ -2771,7 +2792,7 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ if (!nfserr) { RESERVE_SPACE(20); - WRITECINFO(remove->rm_cinfo); + write_cinfo(&p, &remove->rm_cinfo); ADJUST_ARGS(); } return nfserr; @@ -2784,8 +2805,8 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ if (!nfserr) { RESERVE_SPACE(40); - WRITECINFO(rename->rn_sinfo); - WRITECINFO(rename->rn_tinfo); + write_cinfo(&p, &rename->rn_sinfo); + write_cinfo(&p, &rename->rn_tinfo); ADJUST_ARGS(); } return nfserr; diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h index afa19016c4a..8f641c90845 100644 --- a/include/linux/nfsd/nfsfh.h +++ b/include/linux/nfsd/nfsfh.h @@ -151,9 +151,15 @@ typedef struct svc_fh { __u64 fh_pre_size; /* size before operation */ struct timespec fh_pre_mtime; /* mtime before oper */ struct timespec fh_pre_ctime; /* ctime before oper */ + /* + * pre-op nfsv4 change attr: note must check IS_I_VERSION(inode) + * to find out if it is valid. + */ + u64 fh_pre_change; /* Post-op attributes saved in fh_unlock */ struct kstat fh_post_attr; /* full attrs after operation */ + u64 fh_post_change; /* nfsv4 change; see above */ #endif /* CONFIG_NFSD_V3 */ } svc_fh; @@ -298,6 +304,7 @@ fill_pre_wcc(struct svc_fh *fhp) fhp->fh_pre_mtime = inode->i_mtime; fhp->fh_pre_ctime = inode->i_ctime; fhp->fh_pre_size = inode->i_size; + fhp->fh_pre_change = inode->i_version; fhp->fh_pre_saved = 1; } } diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h index f80d6013fdc..d0f050f01ec 100644 --- a/include/linux/nfsd/xdr4.h +++ b/include/linux/nfsd/xdr4.h @@ -64,10 +64,13 @@ static inline bool nfsd4_has_session(struct nfsd4_compound_state *cs) struct nfsd4_change_info { u32 atomic; + bool change_supported; u32 before_ctime_sec; u32 before_ctime_nsec; + u64 before_change; u32 after_ctime_sec; u32 after_ctime_nsec; + u64 after_change; }; struct nfsd4_access { @@ -503,10 +506,16 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp) { BUG_ON(!fhp->fh_pre_saved || !fhp->fh_post_saved); cinfo->atomic = 1; - cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec; - cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec; - cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec; - cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec; + cinfo->change_supported = IS_I_VERSION(fhp->fh_dentry->d_inode); + if (cinfo->change_supported) { + cinfo->before_change = fhp->fh_pre_change; + cinfo->after_change = fhp->fh_post_change; + } else { + cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec; + cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec; + cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec; + cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec; + } } int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *); -- cgit v1.2.3-70-g09d2 From 3cef9ab266a932899e756f7e1ea7a988a97bf3b2 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 23 Feb 2009 21:42:10 -0800 Subject: nfsd4: lookup up callback cred only once Lookup the callback cred once and then use it for all subsequent callbacks. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 26 ++++++++++++++++++++++++++ fs/nfsd/nfs4state.c | 4 ++++ include/linux/nfsd/state.h | 1 + 3 files changed, 31 insertions(+) (limited to 'include') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 711c6282151..cc10ed35ac8 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -415,6 +415,22 @@ static void warn_no_callback_path(struct nfs4_client *clp, int reason) (int)clp->cl_name.len, clp->cl_name.data, reason); } +static struct rpc_cred *lookup_cb_cred(struct nfs4_callback *cb) +{ + struct auth_cred acred = { + .machine_cred = 1 + }; + + /* + * Note in the gss case this doesn't actually have to wait for a + * gss upcall (or any calls to the client); this just creates a + * non-uptodate cred which the rpc state machine will fill in with + * a refresh_upcall later. + */ + return rpcauth_lookup_credcache(cb->cb_client->cl_auth, &acred, + RPCAUTH_LOOKUP_NEW); +} + static int do_probe_callback(void *data) { struct nfs4_client *clp = data; @@ -423,9 +439,18 @@ static int do_probe_callback(void *data) .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], .rpc_argp = clp, }; + struct rpc_cred *cred; int status; + cred = lookup_cb_cred(cb); + if (IS_ERR(cred)) { + status = PTR_ERR(cred); + goto out; + } + cb->cb_cred = cred; + msg.rpc_cred = cb->cb_cred; status = rpc_call_sync(cb->cb_client, &msg, RPC_TASK_SOFT); +out: if (status) warn_no_callback_path(clp, status); else @@ -475,6 +500,7 @@ nfsd4_cb_recall(struct nfs4_delegation *dp) struct rpc_message msg = { .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL], .rpc_argp = cbr, + .rpc_cred = clp->cl_callback.cb_cred }; int retries = 1; int status = 0; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 7e1fcc3aade..b205c7d7bc6 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -643,6 +643,10 @@ shutdown_callback_client(struct nfs4_client *clp) clp->cl_callback.cb_client = NULL; rpc_shutdown_client(clnt); } + if (clp->cl_callback.cb_cred) { + put_rpccred(clp->cl_callback.cb_cred); + clp->cl_callback.cb_cred = NULL; + } } static inline void diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index 4d61c873fee..8d882a3eb4b 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -97,6 +97,7 @@ struct nfs4_callback { /* RPC client info */ atomic_t cb_set; /* successful CB_NULL call */ struct rpc_clnt * cb_client; + struct rpc_cred * cb_cred; }; /* Maximum number of slots per session. 128 is useful for long haul TCP */ -- cgit v1.2.3-70-g09d2 From 3bcac0263f0b45e67a64034ebcb69eb9abb742f4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 29 Apr 2009 13:45:05 +0100 Subject: SELinux: Don't flush inherited SIGKILL during execve() Don't flush inherited SIGKILL during execve() in SELinux's post cred commit hook. This isn't really a security problem: if the SIGKILL came before the credentials were changed, then we were right to receive it at the time, and should honour it; if it came after the creds were changed, then we definitely should honour it; and in any case, all that will happen is that the process will be scrapped before it ever returns to userspace. Signed-off-by: David Howells Signed-off-by: Oleg Nesterov Signed-off-by: James Morris --- include/linux/sched.h | 1 + kernel/signal.c | 11 ++++++++--- security/selinux/hooks.c | 9 +++++---- 3 files changed, 14 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 1d19c025f9d..d3b787c7aef 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1875,6 +1875,7 @@ extern void sched_dead(struct task_struct *p); extern void proc_caches_init(void); extern void flush_signals(struct task_struct *); +extern void __flush_signals(struct task_struct *); extern void ignore_signals(struct task_struct *); extern void flush_signal_handlers(struct task_struct *, int force_default); extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info); diff --git a/kernel/signal.c b/kernel/signal.c index 1c8814481a1..f93efec14ff 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -238,14 +238,19 @@ void flush_sigqueue(struct sigpending *queue) /* * Flush all pending signals for a task. */ +void __flush_signals(struct task_struct *t) +{ + clear_tsk_thread_flag(t, TIF_SIGPENDING); + flush_sigqueue(&t->pending); + flush_sigqueue(&t->signal->shared_pending); +} + void flush_signals(struct task_struct *t) { unsigned long flags; spin_lock_irqsave(&t->sighand->siglock, flags); - clear_tsk_thread_flag(t, TIF_SIGPENDING); - flush_sigqueue(&t->pending); - flush_sigqueue(&t->signal->shared_pending); + __flush_signals(t); spin_unlock_irqrestore(&t->sighand->siglock, flags); } diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index dd19ba81201..5a345115036 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2394,11 +2394,12 @@ static void selinux_bprm_committed_creds(struct linux_binprm *bprm) memset(&itimer, 0, sizeof itimer); for (i = 0; i < 3; i++) do_setitimer(i, &itimer, NULL); - flush_signals(current); spin_lock_irq(¤t->sighand->siglock); - flush_signal_handlers(current, 1); - sigemptyset(¤t->blocked); - recalc_sigpending(); + if (!(current->signal->flags & SIGNAL_GROUP_EXIT)) { + __flush_signals(current); + flush_signal_handlers(current, 1); + sigemptyset(¤t->blocked); + } spin_unlock_irq(¤t->sighand->siglock); } -- cgit v1.2.3-70-g09d2 From 0821c71751ef88f4251d7206e76ce497ee267a2d Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 29 Apr 2009 08:02:59 +0000 Subject: ethtool: Add port type PORT_OTHER Add a PORT_OTHER to represent all other physical port types. Current NICs generally do not allow switching between multiple port types in software so specific types should not be needed. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/ethtool.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 131b127b70f..5ccb6bd660c 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -605,6 +605,7 @@ struct ethtool_ops { #define PORT_MII 0x02 #define PORT_FIBRE 0x03 #define PORT_BNC 0x04 +#define PORT_OTHER 0xff /* Which transceiver to use. */ #define XCVR_INTERNAL 0x00 -- cgit v1.2.3-70-g09d2 From 52c94dfae11d9ffd70b7bd003a36a4e210f2866a Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 29 Apr 2009 08:04:14 +0000 Subject: mdio: Add register definitions for MDIO (clause 45) IEEE 802.3 clause 45 specifies the MDIO interface and registers for use in 10G and other PHYs, similar to the MII management interface. PHYs may have up to 32 MMDs corresponding to different sub-layers and functions, each with up to 65536 registers. These are addressed by PRTAD (similar to the MII PHY address) and DEVAD. Define a mapping for specifying PRTAD and DEVAD through the existing MII ioctls. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/mdio.h | 237 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 237 insertions(+) create mode 100644 include/linux/mdio.h (limited to 'include') diff --git a/include/linux/mdio.h b/include/linux/mdio.h new file mode 100644 index 00000000000..d57ddb08a1b --- /dev/null +++ b/include/linux/mdio.h @@ -0,0 +1,237 @@ +/* + * linux/mdio.h: definitions for MDIO (clause 45) transceivers + * Copyright 2006-2009 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef __LINUX_MDIO_H__ +#define __LINUX_MDIO_H__ + +#include + +/* MDIO Manageable Devices (MMDs). */ +#define MDIO_MMD_PMAPMD 1 /* Physical Medium Attachment/ + * Physical Medium Dependent */ +#define MDIO_MMD_WIS 2 /* WAN Interface Sublayer */ +#define MDIO_MMD_PCS 3 /* Physical Coding Sublayer */ +#define MDIO_MMD_PHYXS 4 /* PHY Extender Sublayer */ +#define MDIO_MMD_DTEXS 5 /* DTE Extender Sublayer */ +#define MDIO_MMD_TC 6 /* Transmission Convergence */ +#define MDIO_MMD_AN 7 /* Auto-Negotiation */ +#define MDIO_MMD_C22EXT 29 /* Clause 22 extension */ +#define MDIO_MMD_VEND1 30 /* Vendor specific 1 */ +#define MDIO_MMD_VEND2 31 /* Vendor specific 2 */ + +/* Generic MDIO registers. */ +#define MDIO_CTRL1 MII_BMCR +#define MDIO_STAT1 MII_BMSR +#define MDIO_DEVID1 MII_PHYSID1 +#define MDIO_DEVID2 MII_PHYSID2 +#define MDIO_SPEED 4 /* Speed ability */ +#define MDIO_DEVS1 5 /* Devices in package */ +#define MDIO_DEVS2 6 +#define MDIO_CTRL2 7 /* 10G control 2 */ +#define MDIO_STAT2 8 /* 10G status 2 */ +#define MDIO_PMA_TXDIS 9 /* 10G PMA/PMD transmit disable */ +#define MDIO_PMA_RXDET 10 /* 10G PMA/PMD receive signal detect */ +#define MDIO_PMA_EXTABLE 11 /* 10G PMA/PMD extended ability */ +#define MDIO_PKGID1 14 /* Package identifier */ +#define MDIO_PKGID2 15 +#define MDIO_AN_ADVERTISE 16 /* AN advertising (base page) */ +#define MDIO_AN_LPA 19 /* AN LP abilities (base page) */ +#define MDIO_PHYXS_LNSTAT 24 /* PHY XGXS lane state */ + +/* Media-dependent registers. */ +#define MDIO_PMA_10GBT_TXPWR 131 /* 10GBASE-T TX power control */ +#define MDIO_PCS_10GBX_STAT1 24 /* 10GBASE-X PCS status 1 */ +#define MDIO_PCS_10GBRT_STAT1 32 /* 10GBASE-R/-T PCS status 1 */ +#define MDIO_PCS_10GBRT_STAT2 33 /* 10GBASE-R/-T PCS status 2 */ +#define MDIO_AN_10GBT_CTRL 32 /* 10GBASE-T auto-negotiation control */ +#define MDIO_AN_10GBT_STAT 33 /* 10GBASE-T auto-negotiation status */ + +/* Control register 1. */ +/* Enable extended speed selection */ +#define MDIO_CTRL1_SPEEDSELEXT (BMCR_SPEED1000 | BMCR_SPEED100) +/* All speed selection bits */ +#define MDIO_CTRL1_SPEEDSEL (MDIO_CTRL1_SPEEDSELEXT | 0x003c) +#define MDIO_CTRL1_FULLDPLX BMCR_FULLDPLX +#define MDIO_CTRL1_LPOWER BMCR_PDOWN +#define MDIO_CTRL1_RESET BMCR_RESET +#define MDIO_PMA_CTRL1_LOOPBACK 0x0001 +#define MDIO_PMA_CTRL1_SPEED1000 BMCR_SPEED1000 +#define MDIO_PMA_CTRL1_SPEED100 BMCR_SPEED100 +#define MDIO_PCS_CTRL1_LOOPBACK BMCR_LOOPBACK +#define MDIO_PHYXS_CTRL1_LOOPBACK BMCR_LOOPBACK +#define MDIO_AN_CTRL1_RESTART BMCR_ANRESTART +#define MDIO_AN_CTRL1_ENABLE BMCR_ANENABLE +#define MDIO_AN_CTRL1_XNP 0x2000 /* Enable extended next page */ + +/* 10 Gb/s */ +#define MDIO_CTRL1_SPEED10G (MDIO_CTRL1_SPEEDSELEXT | 0x00) +/* 10PASS-TS/2BASE-TL */ +#define MDIO_CTRL1_SPEED10P2B (MDIO_CTRL1_SPEEDSELEXT | 0x04) + +/* Status register 1. */ +#define MDIO_STAT1_LPOWERABLE 0x0002 /* Low-power ability */ +#define MDIO_STAT1_LSTATUS BMSR_LSTATUS +#define MDIO_STAT1_FAULT 0x0080 /* Fault */ +#define MDIO_AN_STAT1_LPABLE 0x0001 /* Link partner AN ability */ +#define MDIO_AN_STAT1_ABLE BMSR_ANEGCAPABLE +#define MDIO_AN_STAT1_RFAULT BMSR_RFAULT +#define MDIO_AN_STAT1_COMPLETE BMSR_ANEGCOMPLETE +#define MDIO_AN_STAT1_PAGE 0x0040 /* Page received */ +#define MDIO_AN_STAT1_XNP 0x0080 /* Extended next page status */ + +/* Speed register. */ +#define MDIO_SPEED_10G 0x0001 /* 10G capable */ +#define MDIO_PMA_SPEED_2B 0x0002 /* 2BASE-TL capable */ +#define MDIO_PMA_SPEED_10P 0x0004 /* 10PASS-TS capable */ +#define MDIO_PMA_SPEED_1000 0x0010 /* 1000M capable */ +#define MDIO_PMA_SPEED_100 0x0020 /* 100M capable */ +#define MDIO_PMA_SPEED_10 0x0040 /* 10M capable */ +#define MDIO_PCS_SPEED_10P2B 0x0002 /* 10PASS-TS/2BASE-TL capable */ + +/* Device present registers. */ +#define MDIO_DEVS_PRESENT(devad) (1 << (devad)) +#define MDIO_DEVS_PMAPMD MDIO_DEVS_PRESENT(MDIO_MMD_PMAPMD) +#define MDIO_DEVS_WIS MDIO_DEVS_PRESENT(MDIO_MMD_WIS) +#define MDIO_DEVS_PCS MDIO_DEVS_PRESENT(MDIO_MMD_PCS) +#define MDIO_DEVS_PHYXS MDIO_DEVS_PRESENT(MDIO_MMD_PHYXS) +#define MDIO_DEVS_DTEXS MDIO_DEVS_PRESENT(MDIO_MMD_DTEXS) +#define MDIO_DEVS_TC MDIO_DEVS_PRESENT(MDIO_MMD_TC) +#define MDIO_DEVS_AN MDIO_DEVS_PRESENT(MDIO_MMD_AN) +#define MDIO_DEVS_C22EXT MDIO_DEVS_PRESENT(MDIO_MMD_C22EXT) + +/* Control register 2. */ +#define MDIO_PMA_CTRL2_TYPE 0x000f /* PMA/PMD type selection */ +#define MDIO_PMA_CTRL2_10GBCX4 0x0000 /* 10GBASE-CX4 type */ +#define MDIO_PMA_CTRL2_10GBEW 0x0001 /* 10GBASE-EW type */ +#define MDIO_PMA_CTRL2_10GBLW 0x0002 /* 10GBASE-LW type */ +#define MDIO_PMA_CTRL2_10GBSW 0x0003 /* 10GBASE-SW type */ +#define MDIO_PMA_CTRL2_10GBLX4 0x0004 /* 10GBASE-LX4 type */ +#define MDIO_PMA_CTRL2_10GBER 0x0005 /* 10GBASE-ER type */ +#define MDIO_PMA_CTRL2_10GBLR 0x0006 /* 10GBASE-LR type */ +#define MDIO_PMA_CTRL2_10GBSR 0x0007 /* 10GBASE-SR type */ +#define MDIO_PMA_CTRL2_10GBLRM 0x0008 /* 10GBASE-LRM type */ +#define MDIO_PMA_CTRL2_10GBT 0x0009 /* 10GBASE-T type */ +#define MDIO_PMA_CTRL2_10GBKX4 0x000a /* 10GBASE-KX4 type */ +#define MDIO_PMA_CTRL2_10GBKR 0x000b /* 10GBASE-KR type */ +#define MDIO_PMA_CTRL2_1000BT 0x000c /* 1000BASE-T type */ +#define MDIO_PMA_CTRL2_1000BKX 0x000d /* 1000BASE-KX type */ +#define MDIO_PMA_CTRL2_100BTX 0x000e /* 100BASE-TX type */ +#define MDIO_PMA_CTRL2_10BT 0x000f /* 10BASE-T type */ +#define MDIO_PCS_CTRL2_TYPE 0x0003 /* PCS type selection */ +#define MDIO_PCS_CTRL2_10GBR 0x0000 /* 10GBASE-R type */ +#define MDIO_PCS_CTRL2_10GBX 0x0001 /* 10GBASE-X type */ +#define MDIO_PCS_CTRL2_10GBW 0x0002 /* 10GBASE-W type */ +#define MDIO_PCS_CTRL2_10GBT 0x0003 /* 10GBASE-T type */ + +/* Status register 2. */ +#define MDIO_STAT2_RXFAULT 0x0400 /* Receive fault */ +#define MDIO_STAT2_TXFAULT 0x0800 /* Transmit fault */ +#define MDIO_STAT2_DEVPRST 0xc000 /* Device present */ +#define MDIO_STAT2_DEVPRST_VAL 0x8000 /* Device present value */ +#define MDIO_PMA_STAT2_LBABLE 0x0001 /* PMA loopback ability */ +#define MDIO_PMA_STAT2_10GBEW 0x0002 /* 10GBASE-EW ability */ +#define MDIO_PMA_STAT2_10GBLW 0x0004 /* 10GBASE-LW ability */ +#define MDIO_PMA_STAT2_10GBSW 0x0008 /* 10GBASE-SW ability */ +#define MDIO_PMA_STAT2_10GBLX4 0x0010 /* 10GBASE-LX4 ability */ +#define MDIO_PMA_STAT2_10GBER 0x0020 /* 10GBASE-ER ability */ +#define MDIO_PMA_STAT2_10GBLR 0x0040 /* 10GBASE-LR ability */ +#define MDIO_PMA_STAT2_10GBSR 0x0080 /* 10GBASE-SR ability */ +#define MDIO_PMD_STAT2_TXDISAB 0x0100 /* PMD TX disable ability */ +#define MDIO_PMA_STAT2_EXTABLE 0x0200 /* Extended abilities */ +#define MDIO_PMA_STAT2_RXFLTABLE 0x1000 /* Receive fault ability */ +#define MDIO_PMA_STAT2_TXFLTABLE 0x2000 /* Transmit fault ability */ +#define MDIO_PCS_STAT2_10GBR 0x0001 /* 10GBASE-R capable */ +#define MDIO_PCS_STAT2_10GBX 0x0002 /* 10GBASE-X capable */ +#define MDIO_PCS_STAT2_10GBW 0x0004 /* 10GBASE-W capable */ +#define MDIO_PCS_STAT2_RXFLTABLE 0x1000 /* Receive fault ability */ +#define MDIO_PCS_STAT2_TXFLTABLE 0x2000 /* Transmit fault ability */ + +/* Transmit disable register. */ +#define MDIO_PMD_TXDIS_GLOBAL 0x0001 /* Global PMD TX disable */ +#define MDIO_PMD_TXDIS_0 0x0002 /* PMD TX disable 0 */ +#define MDIO_PMD_TXDIS_1 0x0004 /* PMD TX disable 1 */ +#define MDIO_PMD_TXDIS_2 0x0008 /* PMD TX disable 2 */ +#define MDIO_PMD_TXDIS_3 0x0010 /* PMD TX disable 3 */ + +/* Receive signal detect register. */ +#define MDIO_PMD_RXDET_GLOBAL 0x0001 /* Global PMD RX signal detect */ +#define MDIO_PMD_RXDET_0 0x0002 /* PMD RX signal detect 0 */ +#define MDIO_PMD_RXDET_1 0x0004 /* PMD RX signal detect 1 */ +#define MDIO_PMD_RXDET_2 0x0008 /* PMD RX signal detect 2 */ +#define MDIO_PMD_RXDET_3 0x0010 /* PMD RX signal detect 3 */ + +/* Extended abilities register. */ +#define MDIO_PMA_EXTABLE_10GCX4 0x0001 /* 10GBASE-CX4 ability */ +#define MDIO_PMA_EXTABLE_10GBLRM 0x0002 /* 10GBASE-LRM ability */ +#define MDIO_PMA_EXTABLE_10GBT 0x0004 /* 10GBASE-T ability */ +#define MDIO_PMA_EXTABLE_10GBKX4 0x0008 /* 10GBASE-KX4 ability */ +#define MDIO_PMA_EXTABLE_10GBKR 0x0010 /* 10GBASE-KR ability */ +#define MDIO_PMA_EXTABLE_1000BT 0x0020 /* 1000BASE-T ability */ +#define MDIO_PMA_EXTABLE_1000BKX 0x0040 /* 1000BASE-KX ability */ +#define MDIO_PMA_EXTABLE_100BTX 0x0080 /* 100BASE-TX ability */ +#define MDIO_PMA_EXTABLE_10BT 0x0100 /* 10BASE-T ability */ + +/* PHY XGXS lane state register. */ +#define MDIO_PHYXS_LNSTAT_SYNC0 0x0001 +#define MDIO_PHYXS_LNSTAT_SYNC1 0x0002 +#define MDIO_PHYXS_LNSTAT_SYNC2 0x0004 +#define MDIO_PHYXS_LNSTAT_SYNC3 0x0008 +#define MDIO_PHYXS_LNSTAT_ALIGN 0x1000 + +/* PMA 10GBASE-T TX power register. */ +#define MDIO_PMA_10GBT_TXPWR_SHORT 0x0001 /* Short-reach mode */ + +/* PCS 10GBASE-R/-T status register 1. */ +#define MDIO_PCS_10GBRT_STAT1_BLKLK 0x0001 /* Block lock attained */ + +/* PCS 10GBASE-R/-T status register 2. */ +#define MDIO_PCS_10GBRT_STAT2_ERR 0x00ff +#define MDIO_PCS_10GBRT_STAT2_BER 0x3f00 + +/* AN 10GBASE-T control register. */ +#define MDIO_AN_10GBT_CTRL_ADV10G 0x1000 /* Advertise 10GBASE-T */ + +/* AN 10GBASE-T status register. */ +#define MDIO_AN_10GBT_STAT_LPTRR 0x0200 /* LP training reset req. */ +#define MDIO_AN_10GBT_STAT_LPLTABLE 0x0400 /* LP loop timing ability */ +#define MDIO_AN_10GBT_STAT_LP10G 0x0800 /* LP is 10GBT capable */ +#define MDIO_AN_10GBT_STAT_REMOK 0x1000 /* Remote OK */ +#define MDIO_AN_10GBT_STAT_LOCOK 0x2000 /* Local OK */ +#define MDIO_AN_10GBT_STAT_MS 0x4000 /* Master/slave config */ +#define MDIO_AN_10GBT_STAT_MSFLT 0x8000 /* Master/slave config fault */ + +/* Mapping between MDIO PRTAD/DEVAD and mii_ioctl_data::phy_id */ + +#define MDIO_PHY_ID_C45 0x8000 +#define MDIO_PHY_ID_PRTAD 0x03e0 +#define MDIO_PHY_ID_DEVAD 0x001f +#define MDIO_PHY_ID_C45_MASK \ + (MDIO_PHY_ID_C45 | MDIO_PHY_ID_PRTAD | MDIO_PHY_ID_DEVAD) + +static inline __u16 mdio_phy_id_c45(int prtad, int devad) +{ + return MDIO_PHY_ID_C45 | (prtad << 5) | devad; +} + +static inline bool mdio_phy_id_is_c45(int phy_id) +{ + return (phy_id & MDIO_PHY_ID_C45) && !(phy_id & ~MDIO_PHY_ID_C45_MASK); +} + +static inline __u16 mdio_phy_id_prtad(int phy_id) +{ + return (phy_id & MDIO_PHY_ID_PRTAD) >> 5; +} + +static inline __u16 mdio_phy_id_devad(int phy_id) +{ + return phy_id & MDIO_PHY_ID_DEVAD; +} + +#endif /* __LINUX_MDIO_H__ */ -- cgit v1.2.3-70-g09d2 From 1b1c2e95103ce391c2ea39a9460968fcb73deb30 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 29 Apr 2009 08:04:46 +0000 Subject: mdio: Add generic MDIO (clause 45) support functions These roughly mirror many of the MII library functions and are based on code from the sfc driver. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- drivers/net/Kconfig | 3 + drivers/net/Makefile | 1 + drivers/net/mdio.c | 359 +++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/mdio.h | 66 ++++++++++ 4 files changed, 429 insertions(+) create mode 100644 drivers/net/mdio.c (limited to 'include') diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index bc7eef12d95..2f81db51b45 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -2443,6 +2443,9 @@ menuconfig NETDEV_10000 if NETDEV_10000 +config MDIO + tristate + config CHELSIO_T1 tristate "Chelsio 10Gb Ethernet support" depends on PCI diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 80420f6d079..dcd5f15dd9e 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -95,6 +95,7 @@ obj-$(CONFIG_SH_ETH) += sh_eth.o # obj-$(CONFIG_MII) += mii.o +obj-$(CONFIG_MDIO) += mdio.o obj-$(CONFIG_PHYLIB) += phy/ obj-$(CONFIG_SUNDANCE) += sundance.o diff --git a/drivers/net/mdio.c b/drivers/net/mdio.c new file mode 100644 index 00000000000..0604a25748e --- /dev/null +++ b/drivers/net/mdio.c @@ -0,0 +1,359 @@ +/* + * mdio.c: Generic support for MDIO-compatible transceivers + * Copyright 2006-2009 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include +#include +#include +#include +#include +#include + +/** + * mdio45_probe - probe for an MDIO (clause 45) device + * @mdio: MDIO interface + * @prtad: Expected PHY address + * + * This sets @prtad and @mmds in the MDIO interface if successful. + * Returns 0 on success, negative on error. + */ +int mdio45_probe(struct mdio_if_info *mdio, int prtad) +{ + int mmd, stat2, devs1, devs2; + + /* Assume PHY must have at least one of PMA/PMD, WIS, PCS, PHY + * XS or DTE XS; give up if none is present. */ + for (mmd = 1; mmd <= 5; mmd++) { + /* Is this MMD present? */ + stat2 = mdio->mdio_read(mdio->dev, prtad, mmd, MDIO_STAT2); + if (stat2 < 0 || + (stat2 & MDIO_STAT2_DEVPRST) != MDIO_STAT2_DEVPRST_VAL) + continue; + + /* It should tell us about all the other MMDs */ + devs1 = mdio->mdio_read(mdio->dev, prtad, mmd, MDIO_DEVS1); + devs2 = mdio->mdio_read(mdio->dev, prtad, mmd, MDIO_DEVS2); + if (devs1 < 0 || devs2 < 0) + continue; + + mdio->prtad = prtad; + mdio->mmds = devs1 | (devs2 << 16); + return 0; + } + + return -ENODEV; +} +EXPORT_SYMBOL(mdio45_probe); + +/** + * mdio_set_flag - set or clear flag in an MDIO register + * @mdio: MDIO interface + * @prtad: PHY address + * @devad: MMD address + * @addr: Register address + * @mask: Mask for flag (single bit set) + * @sense: New value of flag + * + * This debounces changes: it does not write the register if the flag + * already has the proper value. Returns 0 on success, negative on error. + */ +int mdio_set_flag(const struct mdio_if_info *mdio, + int prtad, int devad, u16 addr, int mask, + bool sense) +{ + int old_val = mdio->mdio_read(mdio->dev, prtad, devad, addr); + int new_val; + + if (old_val < 0) + return old_val; + if (sense) + new_val = old_val | mask; + else + new_val = old_val & ~mask; + if (old_val == new_val) + return 0; + return mdio->mdio_write(mdio->dev, prtad, devad, addr, new_val); +} +EXPORT_SYMBOL(mdio_set_flag); + +/** + * mdio_link_ok - is link status up/OK + * @mdio: MDIO interface + * @mmd_mask: Mask for MMDs to check + * + * Returns 1 if the PHY reports link status up/OK, 0 otherwise. + * @mmd_mask is normally @mdio->mmds, but if loopback is enabled + * the MMDs being bypassed should be excluded from the mask. + */ +int mdio45_links_ok(const struct mdio_if_info *mdio, u32 mmd_mask) +{ + int devad, reg; + + if (!mmd_mask) { + /* Use absence of XGMII faults in lieu of link state */ + reg = mdio->mdio_read(mdio->dev, mdio->prtad, + MDIO_MMD_PHYXS, MDIO_STAT2); + return reg >= 0 && !(reg & MDIO_STAT2_RXFAULT); + } + + for (devad = 0; mmd_mask; devad++) { + if (mmd_mask & (1 << devad)) { + mmd_mask &= ~(1 << devad); + + /* Read twice because link state is latched and a + * read moves the current state into the register */ + mdio->mdio_read(mdio->dev, mdio->prtad, + devad, MDIO_STAT1); + reg = mdio->mdio_read(mdio->dev, mdio->prtad, + devad, MDIO_STAT1); + if (reg < 0 || !(reg & MDIO_STAT1_LSTATUS)) + return false; + } + } + + return true; +} +EXPORT_SYMBOL(mdio45_links_ok); + +/** + * mdio45_nway_restart - restart auto-negotiation for this interface + * @mdio: MDIO interface + * + * Returns 0 on success, negative on error. + */ +int mdio45_nway_restart(const struct mdio_if_info *mdio) +{ + if (!(mdio->mmds & MDIO_DEVS_AN)) + return -EOPNOTSUPP; + + mdio_set_flag(mdio, mdio->prtad, MDIO_MMD_AN, MDIO_CTRL1, + MDIO_AN_CTRL1_RESTART, true); + return 0; +} +EXPORT_SYMBOL(mdio45_nway_restart); + +static u32 mdio45_get_an(const struct mdio_if_info *mdio, u16 addr) +{ + u32 result = 0; + int reg; + + reg = mdio->mdio_read(mdio->dev, mdio->prtad, MDIO_MMD_AN, addr); + if (reg & ADVERTISE_10HALF) + result |= ADVERTISED_10baseT_Half; + if (reg & ADVERTISE_10FULL) + result |= ADVERTISED_10baseT_Full; + if (reg & ADVERTISE_100HALF) + result |= ADVERTISED_100baseT_Half; + if (reg & ADVERTISE_100FULL) + result |= ADVERTISED_100baseT_Full; + return result; +} + +/** + * mdio45_ethtool_gset_npage - get settings for ETHTOOL_GSET + * @mdio: MDIO interface + * @ecmd: Ethtool request structure + * @npage_adv: Modes currently advertised on next pages + * @npage_lpa: Modes advertised by link partner on next pages + * + * Since the CSRs for auto-negotiation using next pages are not fully + * standardised, this function does not attempt to decode them. The + * caller must pass them in. + */ +void mdio45_ethtool_gset_npage(const struct mdio_if_info *mdio, + struct ethtool_cmd *ecmd, + u32 npage_adv, u32 npage_lpa) +{ + int reg; + + ecmd->transceiver = XCVR_INTERNAL; + ecmd->phy_address = mdio->prtad; + + reg = mdio->mdio_read(mdio->dev, mdio->prtad, MDIO_MMD_PMAPMD, + MDIO_CTRL2); + switch (reg & MDIO_PMA_CTRL2_TYPE) { + case MDIO_PMA_CTRL2_10GBT: + case MDIO_PMA_CTRL2_1000BT: + case MDIO_PMA_CTRL2_100BTX: + case MDIO_PMA_CTRL2_10BT: + ecmd->port = PORT_TP; + ecmd->supported = SUPPORTED_TP; + reg = mdio->mdio_read(mdio->dev, mdio->prtad, MDIO_MMD_PMAPMD, + MDIO_SPEED); + if (reg & MDIO_SPEED_10G) + ecmd->supported |= SUPPORTED_10000baseT_Full; + if (reg & MDIO_PMA_SPEED_1000) + ecmd->supported |= (SUPPORTED_1000baseT_Full | + SUPPORTED_1000baseT_Half); + if (reg & MDIO_PMA_SPEED_100) + ecmd->supported |= (SUPPORTED_100baseT_Full | + SUPPORTED_100baseT_Half); + if (reg & MDIO_PMA_SPEED_10) + ecmd->supported |= (SUPPORTED_10baseT_Full | + SUPPORTED_10baseT_Half); + ecmd->advertising = ADVERTISED_TP; + break; + + case MDIO_PMA_CTRL2_10GBCX4: + case MDIO_PMA_CTRL2_10GBKX4: + case MDIO_PMA_CTRL2_10GBKR: + case MDIO_PMA_CTRL2_1000BKX: + ecmd->port = PORT_OTHER; + ecmd->supported = 0; + ecmd->advertising = 0; + break; + + /* All the other defined modes are flavours of optical */ + default: + ecmd->port = PORT_FIBRE; + ecmd->supported = SUPPORTED_FIBRE; + ecmd->advertising = ADVERTISED_FIBRE; + break; + } + + if (mdio->mmds & MDIO_DEVS_AN) { + ecmd->supported |= SUPPORTED_Autoneg; + reg = mdio->mdio_read(mdio->dev, mdio->prtad, MDIO_MMD_AN, + MDIO_CTRL1); + if (reg & MDIO_AN_CTRL1_ENABLE) { + ecmd->autoneg = AUTONEG_ENABLE; + ecmd->advertising |= + ADVERTISED_Autoneg | + mdio45_get_an(mdio, MDIO_AN_ADVERTISE) | + npage_adv; + } else { + ecmd->autoneg = AUTONEG_DISABLE; + } + } else { + ecmd->autoneg = AUTONEG_DISABLE; + } + + if (ecmd->autoneg) { + u32 modes = 0; + + /* If AN is complete and successful, report best common + * mode, otherwise report best advertised mode. */ + if (mdio->mdio_read(mdio->dev, mdio->prtad, MDIO_MMD_AN, + MDIO_STAT1) & + MDIO_AN_STAT1_COMPLETE) + modes = (ecmd->advertising & + (mdio45_get_an(mdio, MDIO_AN_LPA) | + npage_lpa)); + if (modes == 0) + modes = ecmd->advertising; + + if (modes & ADVERTISED_10000baseT_Full) { + ecmd->speed = SPEED_10000; + ecmd->duplex = DUPLEX_FULL; + } else if (modes & (ADVERTISED_1000baseT_Full | + ADVERTISED_1000baseT_Half)) { + ecmd->speed = SPEED_1000; + ecmd->duplex = !!(modes & ADVERTISED_1000baseT_Full); + } else if (modes & (ADVERTISED_100baseT_Full | + ADVERTISED_100baseT_Half)) { + ecmd->speed = SPEED_100; + ecmd->duplex = !!(modes & ADVERTISED_100baseT_Full); + } else { + ecmd->speed = SPEED_10; + ecmd->duplex = !!(modes & ADVERTISED_10baseT_Full); + } + } else { + /* Report forced settings */ + reg = mdio->mdio_read(mdio->dev, mdio->prtad, MDIO_MMD_PMAPMD, + MDIO_CTRL1); + ecmd->speed = (((reg & MDIO_PMA_CTRL1_SPEED1000) ? 100 : 1) * + ((reg & MDIO_PMA_CTRL1_SPEED100) ? 100 : 10)); + ecmd->duplex = (reg & MDIO_CTRL1_FULLDPLX || + ecmd->speed == SPEED_10000); + } +} +EXPORT_SYMBOL(mdio45_ethtool_gset_npage); + +/** + * mdio_mii_ioctl - MII ioctl interface for MDIO (clause 22 or 45) PHYs + * @mdio: MDIO interface + * @mii_data: MII ioctl data structure + * @cmd: MII ioctl command + * + * Returns 0 on success, negative on error. + */ +int mdio_mii_ioctl(const struct mdio_if_info *mdio, + struct mii_ioctl_data *mii_data, int cmd) +{ + int prtad, devad; + u16 addr = mii_data->reg_num; + + /* Validate/convert cmd to one of SIOC{G,S}MIIREG */ + switch (cmd) { + case SIOCGMIIPHY: + if (mdio->prtad == MDIO_PRTAD_NONE) + return -EOPNOTSUPP; + mii_data->phy_id = mdio->prtad; + cmd = SIOCGMIIREG; + break; + case SIOCGMIIREG: + break; + case SIOCSMIIREG: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + break; + default: + return -EOPNOTSUPP; + } + + /* Validate/convert phy_id */ + if ((mdio->mode_support & MDIO_SUPPORTS_C45) && + mdio_phy_id_is_c45(mii_data->phy_id)) { + prtad = mdio_phy_id_prtad(mii_data->phy_id); + devad = mdio_phy_id_devad(mii_data->phy_id); + } else if ((mdio->mode_support & MDIO_SUPPORTS_C22) && + mii_data->phy_id < 0x20) { + prtad = mii_data->phy_id; + devad = MDIO_DEVAD_NONE; + addr &= 0x1f; + } else if ((mdio->mode_support & MDIO_EMULATE_C22) && + mdio->prtad != MDIO_PRTAD_NONE && + mii_data->phy_id == mdio->prtad) { + /* Remap commonly-used MII registers. */ + prtad = mdio->prtad; + switch (addr) { + case MII_BMCR: + case MII_BMSR: + case MII_PHYSID1: + case MII_PHYSID2: + devad = __ffs(mdio->mmds); + break; + case MII_ADVERTISE: + case MII_LPA: + if (!(mdio->mmds & MDIO_DEVS_AN)) + return -EINVAL; + devad = MDIO_MMD_AN; + if (addr == MII_ADVERTISE) + addr = MDIO_AN_ADVERTISE; + else + addr = MDIO_AN_LPA; + break; + default: + return -EINVAL; + } + } else { + return -EINVAL; + } + + if (cmd == SIOCGMIIREG) { + int rc = mdio->mdio_read(mdio->dev, prtad, devad, addr); + if (rc < 0) + return rc; + mii_data->val_out = rc; + return 0; + } else { + return mdio->mdio_write(mdio->dev, prtad, devad, addr, + mii_data->val_in); + } +} +EXPORT_SYMBOL(mdio_mii_ioctl); diff --git a/include/linux/mdio.h b/include/linux/mdio.h index d57ddb08a1b..ba41537eaa8 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -234,4 +234,70 @@ static inline __u16 mdio_phy_id_devad(int phy_id) return phy_id & MDIO_PHY_ID_DEVAD; } +#ifdef __KERNEL__ + +/** + * struct mdio_if_info - Ethernet controller MDIO interface + * @prtad: PRTAD of the PHY (%MDIO_PRTAD_NONE if not present/unknown) + * @mmds: Mask of MMDs expected to be present in the PHY. This must be + * non-zero unless @prtad = %MDIO_PRTAD_NONE. + * @mode_support: MDIO modes supported. If %MDIO_SUPPORTS_C22 is set then + * MII register access will be passed through with @devad = + * %MDIO_DEVAD_NONE. If %MDIO_EMULATE_C22 is set then access to + * commonly used clause 22 registers will be translated into + * clause 45 registers. + * @dev: Net device structure + * @mdio_read: Register read function; returns value or negative error code + * @mdio_write: Register write function; returns 0 or negative error code + */ +struct mdio_if_info { + int prtad; + u32 __bitwise mmds; + unsigned mode_support; + + struct net_device *dev; + int (*mdio_read)(struct net_device *dev, int prtad, int devad, + u16 addr); + int (*mdio_write)(struct net_device *dev, int prtad, int devad, + u16 addr, u16 val); +}; + +#define MDIO_PRTAD_NONE (-1) +#define MDIO_DEVAD_NONE (-1) +#define MDIO_SUPPORTS_C22 1 +#define MDIO_SUPPORTS_C45 2 +#define MDIO_EMULATE_C22 4 + +struct ethtool_cmd; +struct ethtool_pauseparam; +extern int mdio45_probe(struct mdio_if_info *mdio, int prtad); +extern int mdio_set_flag(const struct mdio_if_info *mdio, + int prtad, int devad, u16 addr, int mask, + bool sense); +extern int mdio45_links_ok(const struct mdio_if_info *mdio, u32 mmds); +extern int mdio45_nway_restart(const struct mdio_if_info *mdio); +extern void mdio45_ethtool_gset_npage(const struct mdio_if_info *mdio, + struct ethtool_cmd *ecmd, + u32 npage_adv, u32 npage_lpa); + +/** + * mdio45_ethtool_gset - get settings for ETHTOOL_GSET + * @mdio: MDIO interface + * @ecmd: Ethtool request structure + * + * Since the CSRs for auto-negotiation using next pages are not fully + * standardised, this function does not attempt to decode them. Use + * mdio45_ethtool_gset_npage() to specify advertisement bits from next + * pages. + */ +static inline void mdio45_ethtool_gset(const struct mdio_if_info *mdio, + struct ethtool_cmd *ecmd) +{ + mdio45_ethtool_gset_npage(mdio, ecmd, 0, 0); +} + +extern int mdio_mii_ioctl(const struct mdio_if_info *mdio, + struct mii_ioctl_data *mii_data, int cmd); + +#endif /* __KERNEL__ */ #endif /* __LINUX_MDIO_H__ */ -- cgit v1.2.3-70-g09d2 From 44c22ee91b56d7cad3b48c439dd96aad2e910fbc Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 29 Apr 2009 08:15:05 +0000 Subject: mii: Simplify mii_resolve_flowctrl_fdx() This is a shorter and more comprehensible formulation of the conditions for each flow control mode. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/mii.h | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/mii.h b/include/linux/mii.h index ad748588faf..14ecb2e114f 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -250,18 +250,12 @@ static inline u8 mii_resolve_flowctrl_fdx(u16 lcladv, u16 rmtadv) { u8 cap = 0; - if (lcladv & ADVERTISE_PAUSE_CAP) { - if (lcladv & ADVERTISE_PAUSE_ASYM) { - if (rmtadv & LPA_PAUSE_CAP) - cap = FLOW_CTRL_TX | FLOW_CTRL_RX; - else if (rmtadv & LPA_PAUSE_ASYM) - cap = FLOW_CTRL_RX; - } else { - if (rmtadv & LPA_PAUSE_CAP) - cap = FLOW_CTRL_TX | FLOW_CTRL_RX; - } - } else if (lcladv & ADVERTISE_PAUSE_ASYM) { - if ((rmtadv & LPA_PAUSE_CAP) && (rmtadv & LPA_PAUSE_ASYM)) + if (lcladv & rmtadv & ADVERTISE_PAUSE_CAP) { + cap = FLOW_CTRL_TX | FLOW_CTRL_RX; + } else if (lcladv & rmtadv & ADVERTISE_PAUSE_ASYM) { + if (lcladv & ADVERTISE_PAUSE_CAP) + cap = FLOW_CTRL_RX; + else if (rmtadv & ADVERTISE_PAUSE_CAP) cap = FLOW_CTRL_TX; } -- cgit v1.2.3-70-g09d2 From a8c30832b5b12e5d4e9d1c20cdac3cc2880e08b8 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 29 Apr 2009 08:19:03 +0000 Subject: mii: Add mii_advertise_flowctrl() This converts flow control capabilites to an advertising mask and can be useful in combination with mii_resolve_flowctrl_fdx(). Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/mii.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/mii.h b/include/linux/mii.h index 14ecb2e114f..359fba88027 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -239,6 +239,22 @@ static inline unsigned int mii_duplex (unsigned int duplex_lock, return 0; } +/** + * mii_advertise_flowctrl - get flow control advertisement flags + * @cap: Flow control capabilities (FLOW_CTRL_RX, FLOW_CTRL_TX or both) + */ +static inline u16 mii_advertise_flowctrl(int cap) +{ + u16 adv = 0; + + if (cap & FLOW_CTRL_RX) + adv = ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM; + if (cap & FLOW_CTRL_TX) + adv ^= ADVERTISE_PAUSE_ASYM; + + return adv; +} + /** * mii_resolve_flowctrl_fdx * @lcladv: value of MII ADVERTISE register -- cgit v1.2.3-70-g09d2 From af2a3eac2fe6a6d8e9fdf6927284b34466a7d808 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 29 Apr 2009 08:19:36 +0000 Subject: mdio: Add mdio45_ethtool_spauseparam_an() This implements the ETHTOOL_SPAUSEPARAM operation for MDIO (clause 45) PHYs with auto-negotiation MMDs. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- drivers/net/mdio.c | 30 ++++++++++++++++++++++++++++++ include/linux/mdio.h | 3 +++ 2 files changed, 33 insertions(+) (limited to 'include') diff --git a/drivers/net/mdio.c b/drivers/net/mdio.c index 0604a25748e..2fb0d16b61f 100644 --- a/drivers/net/mdio.c +++ b/drivers/net/mdio.c @@ -274,6 +274,36 @@ void mdio45_ethtool_gset_npage(const struct mdio_if_info *mdio, } EXPORT_SYMBOL(mdio45_ethtool_gset_npage); +/** + * mdio45_ethtool_spauseparam_an - set auto-negotiated pause parameters + * @mdio: MDIO interface + * @ecmd: Ethtool request structure + * + * This function assumes that the PHY has an auto-negotiation MMD. It + * will enable and disable advertising of flow control as appropriate. + */ +void mdio45_ethtool_spauseparam_an(const struct mdio_if_info *mdio, + const struct ethtool_pauseparam *ecmd) +{ + int adv, old_adv; + + WARN_ON(!(mdio->mmds & MDIO_DEVS_AN)); + + old_adv = mdio->mdio_read(mdio->dev, mdio->prtad, MDIO_MMD_AN, + MDIO_AN_ADVERTISE); + adv = old_adv & ~(ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM); + if (ecmd->autoneg) + adv |= mii_advertise_flowctrl( + (ecmd->rx_pause ? FLOW_CTRL_RX : 0) | + (ecmd->tx_pause ? FLOW_CTRL_TX : 0)); + if (adv != old_adv) { + mdio->mdio_write(mdio->dev, mdio->prtad, MDIO_MMD_AN, + MDIO_AN_ADVERTISE, adv); + mdio45_nway_restart(mdio); + } +} +EXPORT_SYMBOL(mdio45_ethtool_spauseparam_an); + /** * mdio_mii_ioctl - MII ioctl interface for MDIO (clause 22 or 45) PHYs * @mdio: MDIO interface diff --git a/include/linux/mdio.h b/include/linux/mdio.h index ba41537eaa8..5645c0f863d 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -279,6 +279,9 @@ extern int mdio45_nway_restart(const struct mdio_if_info *mdio); extern void mdio45_ethtool_gset_npage(const struct mdio_if_info *mdio, struct ethtool_cmd *ecmd, u32 npage_adv, u32 npage_lpa); +extern void +mdio45_ethtool_spauseparam_an(const struct mdio_if_info *mdio, + const struct ethtool_pauseparam *ecmd); /** * mdio45_ethtool_gset - get settings for ETHTOOL_GSET -- cgit v1.2.3-70-g09d2 From 0c09c1a49cc7b819b33566a49d9901f7cfdd6889 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 29 Apr 2009 08:21:53 +0000 Subject: ethtool/mdio: Report MDIO mode support and link partner advertising Add mdio_support and lp_advertising fields to ethtool_cmd. Set these in mdio45_ethtool_gset{,_npage}(). Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- drivers/net/mdio.c | 19 ++++++++++++------- include/linux/ethtool.h | 4 +++- include/linux/mdio.h | 5 +++-- 3 files changed, 18 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/net/mdio.c b/drivers/net/mdio.c index 2fb0d16b61f..ee383c25693 100644 --- a/drivers/net/mdio.c +++ b/drivers/net/mdio.c @@ -173,6 +173,8 @@ void mdio45_ethtool_gset_npage(const struct mdio_if_info *mdio, ecmd->transceiver = XCVR_INTERNAL; ecmd->phy_address = mdio->prtad; + ecmd->mdio_support = + mdio->mode_support & (MDIO_SUPPORTS_C45 | MDIO_SUPPORTS_C22); reg = mdio->mdio_read(mdio->dev, mdio->prtad, MDIO_MMD_PMAPMD, MDIO_CTRL2); @@ -235,16 +237,19 @@ void mdio45_ethtool_gset_npage(const struct mdio_if_info *mdio, if (ecmd->autoneg) { u32 modes = 0; + int an_stat = mdio->mdio_read(mdio->dev, mdio->prtad, + MDIO_MMD_AN, MDIO_STAT1); /* If AN is complete and successful, report best common * mode, otherwise report best advertised mode. */ - if (mdio->mdio_read(mdio->dev, mdio->prtad, MDIO_MMD_AN, - MDIO_STAT1) & - MDIO_AN_STAT1_COMPLETE) - modes = (ecmd->advertising & - (mdio45_get_an(mdio, MDIO_AN_LPA) | - npage_lpa)); - if (modes == 0) + if (an_stat & MDIO_AN_STAT1_COMPLETE) { + ecmd->lp_advertising = + mdio45_get_an(mdio, MDIO_AN_LPA) | npage_lpa; + if (an_stat & MDIO_AN_STAT1_LPABLE) + ecmd->lp_advertising |= ADVERTISED_Autoneg; + modes = ecmd->advertising & ecmd->lp_advertising; + } + if ((modes & ~ADVERTISED_Autoneg) == 0) modes = ecmd->advertising; if (modes & ADVERTISED_10000baseT_Full) { diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 5ccb6bd660c..14e6bc86011 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -26,11 +26,13 @@ struct ethtool_cmd { __u8 phy_address; __u8 transceiver; /* Which transceiver to use */ __u8 autoneg; /* Enable or disable autonegotiation */ + __u8 mdio_support; __u32 maxtxpkt; /* Tx pkts before generating tx int */ __u32 maxrxpkt; /* Rx pkts before generating rx int */ __u16 speed_hi; __u16 reserved2; - __u32 reserved[3]; + __u32 lp_advertising; /* Features the link partner advertises */ + __u32 reserved[2]; }; static inline void ethtool_cmd_speed_set(struct ethtool_cmd *ep, diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 5645c0f863d..1bff2f2d0e1 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -234,6 +234,9 @@ static inline __u16 mdio_phy_id_devad(int phy_id) return phy_id & MDIO_PHY_ID_DEVAD; } +#define MDIO_SUPPORTS_C22 1 +#define MDIO_SUPPORTS_C45 2 + #ifdef __KERNEL__ /** @@ -264,8 +267,6 @@ struct mdio_if_info { #define MDIO_PRTAD_NONE (-1) #define MDIO_DEVAD_NONE (-1) -#define MDIO_SUPPORTS_C22 1 -#define MDIO_SUPPORTS_C45 2 #define MDIO_EMULATE_C22 4 struct ethtool_cmd; -- cgit v1.2.3-70-g09d2 From 894b19a6b343ce3589237167a56e6df0fe72ef0d Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 29 Apr 2009 08:25:57 +0000 Subject: ethtool/mdio: Support backplane mode negotiation Compile-tested only. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- drivers/net/mdio.c | 30 +++++++++++++++++++++++++----- include/linux/ethtool.h | 10 ++++++++++ include/linux/mdio.h | 5 +++++ 3 files changed, 40 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/net/mdio.c b/drivers/net/mdio.c index ee383c25693..66483035f68 100644 --- a/drivers/net/mdio.c +++ b/drivers/net/mdio.c @@ -202,12 +202,29 @@ void mdio45_ethtool_gset_npage(const struct mdio_if_info *mdio, break; case MDIO_PMA_CTRL2_10GBCX4: + ecmd->port = PORT_OTHER; + ecmd->supported = 0; + ecmd->advertising = 0; + break; + case MDIO_PMA_CTRL2_10GBKX4: case MDIO_PMA_CTRL2_10GBKR: case MDIO_PMA_CTRL2_1000BKX: ecmd->port = PORT_OTHER; - ecmd->supported = 0; - ecmd->advertising = 0; + ecmd->supported = SUPPORTED_Backplane; + reg = mdio->mdio_read(mdio->dev, mdio->prtad, MDIO_MMD_PMAPMD, + MDIO_PMA_EXTABLE); + if (reg & MDIO_PMA_EXTABLE_10GBKX4) + ecmd->supported |= SUPPORTED_10000baseKX4_Full; + if (reg & MDIO_PMA_EXTABLE_10GBKR) + ecmd->supported |= SUPPORTED_10000baseKR_Full; + if (reg & MDIO_PMA_EXTABLE_1000BKX) + ecmd->supported |= SUPPORTED_1000baseKX_Full; + reg = mdio->mdio_read(mdio->dev, mdio->prtad, MDIO_MMD_PMAPMD, + MDIO_PMA_10GBR_FECABLE); + if (reg & MDIO_PMA_10GBR_FECABLE_ABLE) + ecmd->supported |= SUPPORTED_10000baseR_FEC; + ecmd->advertising = ADVERTISED_Backplane; break; /* All the other defined modes are flavours of optical */ @@ -252,13 +269,16 @@ void mdio45_ethtool_gset_npage(const struct mdio_if_info *mdio, if ((modes & ~ADVERTISED_Autoneg) == 0) modes = ecmd->advertising; - if (modes & ADVERTISED_10000baseT_Full) { + if (modes & (ADVERTISED_10000baseT_Full | + ADVERTISED_10000baseKX4_Full | + ADVERTISED_10000baseKR_Full)) { ecmd->speed = SPEED_10000; ecmd->duplex = DUPLEX_FULL; } else if (modes & (ADVERTISED_1000baseT_Full | - ADVERTISED_1000baseT_Half)) { + ADVERTISED_1000baseT_Half | + ADVERTISED_1000baseKX_Full)) { ecmd->speed = SPEED_1000; - ecmd->duplex = !!(modes & ADVERTISED_1000baseT_Full); + ecmd->duplex = !(modes & ADVERTISED_1000baseT_Half); } else if (modes & (ADVERTISED_100baseT_Full | ADVERTISED_100baseT_Half)) { ecmd->speed = SPEED_100; diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 14e6bc86011..380b04272bf 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -565,6 +565,11 @@ struct ethtool_ops { #define SUPPORTED_Pause (1 << 13) #define SUPPORTED_Asym_Pause (1 << 14) #define SUPPORTED_2500baseX_Full (1 << 15) +#define SUPPORTED_Backplane (1 << 16) +#define SUPPORTED_1000baseKX_Full (1 << 17) +#define SUPPORTED_10000baseKX4_Full (1 << 18) +#define SUPPORTED_10000baseKR_Full (1 << 19) +#define SUPPORTED_10000baseR_FEC (1 << 20) /* Indicates what features are advertised by the interface. */ #define ADVERTISED_10baseT_Half (1 << 0) @@ -583,6 +588,11 @@ struct ethtool_ops { #define ADVERTISED_Pause (1 << 13) #define ADVERTISED_Asym_Pause (1 << 14) #define ADVERTISED_2500baseX_Full (1 << 15) +#define ADVERTISED_Backplane (1 << 16) +#define ADVERTISED_1000baseKX_Full (1 << 17) +#define ADVERTISED_10000baseKX4_Full (1 << 18) +#define ADVERTISED_10000baseKR_Full (1 << 19) +#define ADVERTISED_10000baseR_FEC (1 << 20) /* The following are all involved in forcing a particular link * mode for the device for setting things. When getting the diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 1bff2f2d0e1..26b4eb3bbee 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -46,6 +46,7 @@ /* Media-dependent registers. */ #define MDIO_PMA_10GBT_TXPWR 131 /* 10GBASE-T TX power control */ +#define MDIO_PMA_10GBR_FECABLE 170 /* 10GBASE-R FEC ability */ #define MDIO_PCS_10GBX_STAT1 24 /* 10GBASE-X PCS status 1 */ #define MDIO_PCS_10GBRT_STAT1 32 /* 10GBASE-R/-T PCS status 1 */ #define MDIO_PCS_10GBRT_STAT2 33 /* 10GBASE-R/-T PCS status 2 */ @@ -187,6 +188,10 @@ /* PMA 10GBASE-T TX power register. */ #define MDIO_PMA_10GBT_TXPWR_SHORT 0x0001 /* Short-reach mode */ +/* PMA 10GBASE-R FEC ability register. */ +#define MDIO_PMA_10GBR_FECABLE_ABLE 0x0001 /* FEC ability */ +#define MDIO_PMA_10GBR_FECABLE_ERRABLE 0x0002 /* FEC error indic. ability */ + /* PCS 10GBASE-R/-T status register 1. */ #define MDIO_PCS_10GBRT_STAT1_BLKLK 0x0001 /* Block lock attained */ -- cgit v1.2.3-70-g09d2 From a511e3f968c462a55ef58697257f5347c73d306e Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 29 Apr 2009 15:59:58 -0700 Subject: mutex: add atomic_dec_and_mutex_lock(), fix include/linux/mutex.h:136: warning: 'mutex_lock' declared inline after being called include/linux/mutex.h:136: warning: previous declaration of 'mutex_lock' was here uninline it. [ Impact: clean up and uninline, address compiler warning ] Signed-off-by: Andrew Morton Cc: Al Viro Cc: Christoph Hellwig Cc: Eric Paris Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <200904292318.n3TNIsi6028340@imap1.linux-foundation.org> Signed-off-by: Ingo Molnar --- include/linux/mutex.h | 24 +----------------------- kernel/mutex.c | 25 ++++++++++++++++++++++++- 2 files changed, 25 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 93054fc3635..878cab4f5fc 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -150,28 +150,6 @@ extern int __must_check mutex_lock_killable(struct mutex *lock); */ extern int mutex_trylock(struct mutex *lock); extern void mutex_unlock(struct mutex *lock); - -/** - * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0 - * @cnt: the atomic which we are to dec - * @lock: the mutex to return holding if we dec to 0 - * - * return true and hold lock if we dec to 0, return false otherwise - */ -static inline int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) -{ - /* dec if we can't possibly hit 0 */ - if (atomic_add_unless(cnt, -1, 1)) - return 0; - /* we might hit 0, so take the lock */ - mutex_lock(lock); - if (!atomic_dec_and_test(cnt)) { - /* when we actually did the dec, we didn't hit 0 */ - mutex_unlock(lock); - return 0; - } - /* we hit 0, and we hold the lock */ - return 1; -} +extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); #endif diff --git a/kernel/mutex.c b/kernel/mutex.c index 507cf2b5e9f..e2d25e9e62a 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -471,5 +471,28 @@ int __sched mutex_trylock(struct mutex *lock) return ret; } - EXPORT_SYMBOL(mutex_trylock); + +/** + * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0 + * @cnt: the atomic which we are to dec + * @lock: the mutex to return holding if we dec to 0 + * + * return true and hold lock if we dec to 0, return false otherwise + */ +int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) +{ + /* dec if we can't possibly hit 0 */ + if (atomic_add_unless(cnt, -1, 1)) + return 0; + /* we might hit 0, so take the lock */ + mutex_lock(lock); + if (!atomic_dec_and_test(cnt)) { + /* when we actually did the dec, we didn't hit 0 */ + mutex_unlock(lock); + return 0; + } + /* we hit 0, and we hold the lock */ + return 1; +} +EXPORT_SYMBOL(atomic_dec_and_mutex_lock); -- cgit v1.2.3-70-g09d2 From ba9c22f2c01cf5c88beed5a6b9e07d42e10bd358 Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Mon, 20 Apr 2009 22:22:22 -0700 Subject: futex: remove FUTEX_REQUEUE_PI (non CMP) The new requeue PI futex op codes were modeled after the existing FUTEX_REQUEUE and FUTEX_CMP_REQUEUE calls. I was unaware at the time that FUTEX_REQUEUE was only around for compatibility reasons and shouldn't be used in new code. Ulrich Drepper elaborates on this in his Futexes are Tricky paper: http://people.redhat.com/drepper/futex.pdf. The deprecated call doesn't catch changes to the futex corresponding to the destination futex which can lead to deadlock. Therefor, I feel it best to remove FUTEX_REQUEUE_PI and leave only FUTEX_CMP_REQUEUE_PI as there are not yet any existing users of the API. This patch does change the OP code value of FUTEX_CMP_REQUEUE_PI to 12 from 13. Since my test case is the only known user of this API, I felt this was the right thing to do, rather than leave a hole in the enumeration. I chose to continue using the _CMP_ modifier in the OP code to make it explicit to the user that the test is being done. Builds, boots, and ran several hundred iterations requeue_pi.c. Signed-off-by: Darren Hart LKML-Reference: <49ED580E.1050502@us.ibm.com> Signed-off-by: Thomas Gleixner --- include/linux/futex.h | 4 +--- kernel/futex.c | 6 +----- 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/futex.h b/include/linux/futex.h index b05519ca9e5..34956c8fdeb 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -24,8 +24,7 @@ union ktime; #define FUTEX_WAIT_BITSET 9 #define FUTEX_WAKE_BITSET 10 #define FUTEX_WAIT_REQUEUE_PI 11 -#define FUTEX_REQUEUE_PI 12 -#define FUTEX_CMP_REQUEUE_PI 13 +#define FUTEX_CMP_REQUEUE_PI 12 #define FUTEX_PRIVATE_FLAG 128 #define FUTEX_CLOCK_REALTIME 256 @@ -43,7 +42,6 @@ union ktime; #define FUTEX_WAKE_BITSET_PRIVATE (FUTEX_WAKE_BITS | FUTEX_PRIVATE_FLAG) #define FUTEX_WAIT_REQUEUE_PI_PRIVATE (FUTEX_WAIT_REQUEUE_PI | \ FUTEX_PRIVATE_FLAG) -#define FUTEX_REQUEUE_PI_PRIVATE (FUTEX_REQUEUE_PI | FUTEX_PRIVATE_FLAG) #define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \ FUTEX_PRIVATE_FLAG) diff --git a/kernel/futex.c b/kernel/futex.c index 6d2daa46f9f..aec8bf89bf4 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2555,9 +2555,6 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, ret = futex_wait_requeue_pi(uaddr, fshared, val, timeout, val3, clockrt, uaddr2); break; - case FUTEX_REQUEUE_PI: - ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 1); - break; case FUTEX_CMP_REQUEUE_PI: ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, 1); @@ -2596,8 +2593,7 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, * number of waiters to wake in 'utime' if cmd == FUTEX_WAKE_OP. */ if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE || - cmd == FUTEX_REQUEUE_PI || cmd == FUTEX_CMP_REQUEUE_PI || - cmd == FUTEX_WAKE_OP) + cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP) val2 = (u32) (unsigned long) utime; return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); -- cgit v1.2.3-70-g09d2 From 62ab4505e3efaf67784f84059e0fb9cedb1728ea Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 4 Apr 2009 21:01:06 +0000 Subject: signals: implement sys_rt_tgsigqueueinfo sys_kill has the per thread counterpart sys_tgkill. sigqueueinfo is missing a thread directed counterpart. Such an interface is important for migrating applications from other OSes which have the per thread delivery implemented. Signed-off-by: Thomas Gleixner Reviewed-by: Oleg Nesterov Acked-by: Roland McGrath Acked-by: Ulrich Drepper --- include/linux/compat.h | 2 ++ include/linux/signal.h | 2 ++ kernel/compat.c | 11 +++++++++++ kernel/signal.c | 26 ++++++++++++++++++++++++++ 4 files changed, 41 insertions(+) (limited to 'include') diff --git a/include/linux/compat.h b/include/linux/compat.h index f2ded21f9a3..af931ee43dd 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -222,6 +222,8 @@ int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from); int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from); int get_compat_sigevent(struct sigevent *event, const struct compat_sigevent __user *u_event); +long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig, + struct compat_siginfo __user *uinfo); static inline int compat_timeval_compare(struct compat_timeval *lhs, struct compat_timeval *rhs) diff --git a/include/linux/signal.h b/include/linux/signal.h index 84f997f8aa5..c7552836bd9 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -235,6 +235,8 @@ static inline int valid_signal(unsigned long sig) extern int next_signal(struct sigpending *pending, sigset_t *mask); extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p); extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *); +extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, + siginfo_t *info); extern long do_sigpending(void __user *, unsigned long); extern int sigprocmask(int, sigset_t *, sigset_t *); extern int show_unhandled_signals; diff --git a/kernel/compat.c b/kernel/compat.c index 42d56544460..f6c204f07ea 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -882,6 +882,17 @@ compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese, } +asmlinkage long +compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig, + struct compat_siginfo __user *uinfo) +{ + siginfo_t info; + + if (copy_siginfo_from_user32(&info, uinfo)) + return -EFAULT; + return do_rt_tgsigqueueinfo(tgid, pid, sig, &info); +} + #ifdef __ARCH_WANT_COMPAT_SYS_TIME /* compat_time_t is a 32 bit "long" and needs to get converted. */ diff --git a/kernel/signal.c b/kernel/signal.c index 56d27acad87..f79b3b9f837 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2369,6 +2369,32 @@ SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig, return kill_proc_info(sig, &info, pid); } +long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info) +{ + /* This is only valid for single tasks */ + if (pid <= 0 || tgid <= 0) + return -EINVAL; + + /* Not even root can pretend to send signals from the kernel. + Nor can they impersonate a kill(), which adds source info. */ + if (info->si_code >= 0) + return -EPERM; + info->si_signo = sig; + + return do_send_specific(tgid, pid, sig, info); +} + +SYSCALL_DEFINE4(rt_tgsigqueueinfo, pid_t, tgid, pid_t, pid, int, sig, + siginfo_t __user *, uinfo) +{ + siginfo_t info; + + if (copy_from_user(&info, uinfo, sizeof(siginfo_t))) + return -EFAULT; + + return do_rt_tgsigqueueinfo(tgid, pid, sig, &info); +} + int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) { struct task_struct *t = current; -- cgit v1.2.3-70-g09d2 From c33a0bc4e41ef169d6e807d8abb9502544b518e5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 1 May 2009 12:23:16 +0200 Subject: perf_counter: fix race in perf_output_* When two (or more) contexts output to the same buffer, it is possible to observe half written output. Suppose we have CPU0 doing perf_counter_mmap(), CPU1 doing perf_counter_overflow(). If CPU1 does a wakeup and exposes head to user-space, then CPU2 can observe the data CPU0 is still writing. [ Impact: fix occasionally corrupted profiling records ] Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090501102533.007821627@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 5 +- kernel/perf_counter.c | 130 +++++++++++++++++++++++++++++++++---------- 2 files changed, 105 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 41aed427005..f776851f8c4 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -358,10 +358,13 @@ struct perf_mmap_data { struct rcu_head rcu_head; int nr_pages; /* nr of data pages */ - atomic_t wakeup; /* POLL_ for wakeups */ + atomic_t poll; /* POLL_ for wakeups */ atomic_t head; /* write position */ atomic_t events; /* event limit */ + atomic_t wakeup_head; /* completed head */ + atomic_t lock; /* concurrent writes */ + struct perf_counter_mmap_page *user_page; void *data_pages[0]; }; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 75f2b6c8239..8660ae57953 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1279,14 +1279,12 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) { struct perf_counter *counter = file->private_data; struct perf_mmap_data *data; - unsigned int events; + unsigned int events = POLL_HUP; rcu_read_lock(); data = rcu_dereference(counter->data); if (data) - events = atomic_xchg(&data->wakeup, 0); - else - events = POLL_HUP; + events = atomic_xchg(&data->poll, 0); rcu_read_unlock(); poll_wait(file, &counter->waitq, wait); @@ -1568,22 +1566,6 @@ static const struct file_operations perf_fops = { void perf_counter_wakeup(struct perf_counter *counter) { - struct perf_mmap_data *data; - - rcu_read_lock(); - data = rcu_dereference(counter->data); - if (data) { - atomic_set(&data->wakeup, POLL_IN); - /* - * Ensure all data writes are issued before updating the - * user-space data head information. The matching rmb() - * will be in userspace after reading this value. - */ - smp_wmb(); - data->user_page->data_head = atomic_read(&data->head); - } - rcu_read_unlock(); - wake_up_all(&counter->waitq); if (counter->pending_kill) { @@ -1721,10 +1703,14 @@ struct perf_output_handle { int wakeup; int nmi; int overflow; + int locked; + unsigned long flags; }; -static inline void __perf_output_wakeup(struct perf_output_handle *handle) +static void perf_output_wakeup(struct perf_output_handle *handle) { + atomic_set(&handle->data->poll, POLL_IN); + if (handle->nmi) { handle->counter->pending_wakeup = 1; perf_pending_queue(&handle->counter->pending, @@ -1733,6 +1719,86 @@ static inline void __perf_output_wakeup(struct perf_output_handle *handle) perf_counter_wakeup(handle->counter); } +/* + * Curious locking construct. + * + * We need to ensure a later event doesn't publish a head when a former + * event isn't done writing. However since we need to deal with NMIs we + * cannot fully serialize things. + * + * What we do is serialize between CPUs so we only have to deal with NMI + * nesting on a single CPU. + * + * We only publish the head (and generate a wakeup) when the outer-most + * event completes. + */ +static void perf_output_lock(struct perf_output_handle *handle) +{ + struct perf_mmap_data *data = handle->data; + int cpu; + + handle->locked = 0; + + local_irq_save(handle->flags); + cpu = smp_processor_id(); + + if (in_nmi() && atomic_read(&data->lock) == cpu) + return; + + while (atomic_cmpxchg(&data->lock, 0, cpu) != 0) + cpu_relax(); + + handle->locked = 1; +} + +static void perf_output_unlock(struct perf_output_handle *handle) +{ + struct perf_mmap_data *data = handle->data; + int head, cpu; + + if (handle->wakeup) + data->wakeup_head = data->head; + + if (!handle->locked) + goto out; + +again: + /* + * The xchg implies a full barrier that ensures all writes are done + * before we publish the new head, matched by a rmb() in userspace when + * reading this position. + */ + while ((head = atomic_xchg(&data->wakeup_head, 0))) { + data->user_page->data_head = head; + handle->wakeup = 1; + } + + /* + * NMI can happen here, which means we can miss a wakeup_head update. + */ + + cpu = atomic_xchg(&data->lock, 0); + WARN_ON_ONCE(cpu != smp_processor_id()); + + /* + * Therefore we have to validate we did not indeed do so. + */ + if (unlikely(atomic_read(&data->wakeup_head))) { + /* + * Since we had it locked, we can lock it again. + */ + while (atomic_cmpxchg(&data->lock, 0, cpu) != 0) + cpu_relax(); + + goto again; + } + + if (handle->wakeup) + perf_output_wakeup(handle); +out: + local_irq_restore(handle->flags); +} + static int perf_output_begin(struct perf_output_handle *handle, struct perf_counter *counter, unsigned int size, int nmi, int overflow) @@ -1745,6 +1811,7 @@ static int perf_output_begin(struct perf_output_handle *handle, if (!data) goto out; + handle->data = data; handle->counter = counter; handle->nmi = nmi; handle->overflow = overflow; @@ -1752,12 +1819,13 @@ static int perf_output_begin(struct perf_output_handle *handle, if (!data->nr_pages) goto fail; + perf_output_lock(handle); + do { offset = head = atomic_read(&data->head); head += size; } while (atomic_cmpxchg(&data->head, offset, head) != offset); - handle->data = data; handle->offset = offset; handle->head = head; handle->wakeup = (offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT); @@ -1765,7 +1833,7 @@ static int perf_output_begin(struct perf_output_handle *handle, return 0; fail: - __perf_output_wakeup(handle); + perf_output_wakeup(handle); out: rcu_read_unlock(); @@ -1809,16 +1877,20 @@ static void perf_output_copy(struct perf_output_handle *handle, static void perf_output_end(struct perf_output_handle *handle) { - int wakeup_events = handle->counter->hw_event.wakeup_events; + struct perf_counter *counter = handle->counter; + struct perf_mmap_data *data = handle->data; + + int wakeup_events = counter->hw_event.wakeup_events; if (handle->overflow && wakeup_events) { - int events = atomic_inc_return(&handle->data->events); + int events = atomic_inc_return(&data->events); if (events >= wakeup_events) { - atomic_sub(wakeup_events, &handle->data->events); - __perf_output_wakeup(handle); + atomic_sub(wakeup_events, &data->events); + handle->wakeup = 1; } - } else if (handle->wakeup) - __perf_output_wakeup(handle); + } + + perf_output_unlock(handle); rcu_read_unlock(); } -- cgit v1.2.3-70-g09d2 From 9ee1983c9aa18f12388ef660d0c76a23dc112959 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Thu, 30 Apr 2009 13:29:47 -0400 Subject: tracing: add irq tracepoint documentation Document irqs for the newly created docbook. [ Impact: add documentation ] Signed-off-by: Jason Baron Acked-by: Randy Dunlap Cc: akpm@linux-foundation.org Cc: rostedt@goodmis.org Cc: fweisbec@gmail.com Cc: mathieu.desnoyers@polymtl.ca Cc: wcohen@redhat.com LKML-Reference: <73ff42be3420157667ec548e9b0e409c3cfad05f.1241107197.git.jbaron@redhat.com> Signed-off-by: Ingo Molnar --- Documentation/DocBook/tracepoint.tmpl | 5 ++++ include/trace/events/irq.h | 46 ++++++++++++++++++++++++++++++++--- 2 files changed, 47 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/Documentation/DocBook/tracepoint.tmpl b/Documentation/DocBook/tracepoint.tmpl index 70891bc6849..b0756d0fd57 100644 --- a/Documentation/DocBook/tracepoint.tmpl +++ b/Documentation/DocBook/tracepoint.tmpl @@ -81,4 +81,9 @@ + + IRQ +!Iinclude/trace/events/irq.h + + diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index 76868646751..32a9f7ef432 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -7,8 +7,16 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM irq -/* - * Tracepoint for entry of interrupt handler: +/** + * irq_handler_entry - called immediately before the irq action handler + * @irq: irq number + * @action: pointer to struct irqaction + * + * The struct irqaction pointed to by @action contains various + * information about the handler, including the device name, + * @action->name, and the device id, @action->dev_id. When used in + * conjunction with the irq_handler_exit tracepoint, we can figure + * out irq handler latencies. */ TRACE_EVENT(irq_handler_entry, @@ -29,8 +37,16 @@ TRACE_EVENT(irq_handler_entry, TP_printk("irq=%d handler=%s", __entry->irq, __get_str(name)) ); -/* - * Tracepoint for return of an interrupt handler: +/** + * irq_handler_exit - called immediately after the irq action handler returns + * @irq: irq number + * @action: pointer to struct irqaction + * @ret: return value + * + * If the @ret value is set to IRQ_HANDLED, then we know that the corresponding + * @action->handler scuccessully handled this irq. Otherwise, the irq might be + * a shared irq line, or the irq was not handled successfully. Can be used in + * conjunction with the irq_handler_entry to understand irq handler latencies. */ TRACE_EVENT(irq_handler_exit, @@ -52,6 +68,17 @@ TRACE_EVENT(irq_handler_exit, __entry->irq, __entry->ret ? "handled" : "unhandled") ); +/** + * softirq_entry - called immediately before the softirq handler + * @h: pointer to struct softirq_action + * @vec: pointer to first struct softirq_action in softirq_vec array + * + * The @h parameter, contains a pointer to the struct softirq_action + * which has a pointer to the action handler that is called. By subtracting + * the @vec pointer from the @h pointer, we can determine the softirq + * number. Also, when used in combination with the softirq_exit tracepoint + * we can determine the softirq latency. + */ TRACE_EVENT(softirq_entry, TP_PROTO(struct softirq_action *h, struct softirq_action *vec), @@ -71,6 +98,17 @@ TRACE_EVENT(softirq_entry, TP_printk("softirq=%d action=%s", __entry->vec, __get_str(name)) ); +/** + * softirq_exit - called immediately after the softirq handler returns + * @h: pointer to struct softirq_action + * @vec: pointer to first struct softirq_action in softirq_vec array + * + * The @h parameter contains a pointer to the struct softirq_action + * that has handled the softirq. By subtracting the @vec pointer from + * the @h pointer, we can determine the softirq number. Also, when used in + * combination with the softirq_entry tracepoint we can determine the softirq + * latency. + */ TRACE_EVENT(softirq_exit, TP_PROTO(struct softirq_action *h, struct softirq_action *vec), -- cgit v1.2.3-70-g09d2 From 15e957d08dd4a841359cfec59ecb74041e0097aa Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 30 Apr 2009 01:17:50 -0700 Subject: x86/irq: use move_irq_desc() in create_irq_nr() move_irq_desc() will try to move irq_desc to the home node if the allocated one is not correct, in create_irq_nr(). ( This can happen on devices that are on different nodes that are using MSI, when drivers are loaded and unloaded randomly. ) v2: fix non-smp build v3: add NUMA_IRQ_DESC to eliminate #ifdefs [ Impact: improve irq descriptor locality on NUMA systems ] Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: Suresh Siddha Cc: "Eric W. Biederman" Cc: Rusty Russell LKML-Reference: <49F95EAE.2050903@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 4 ++++ arch/x86/kernel/apic/io_apic.c | 6 +----- include/linux/irq.h | 11 +++++++++-- kernel/irq/Makefile | 2 +- 4 files changed, 15 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e1b2543f8ed..674e21e9f0a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -274,6 +274,10 @@ config SPARSE_IRQ If you don't know what to do here, say N. +config NUMA_IRQ_DESC + def_bool y + depends on SPARSE_IRQ && NUMA + config X86_MPPARSE bool "Enable MPS table" if ACPI default y diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 9cd4806cdf5..e583291fe6c 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3197,11 +3197,7 @@ unsigned int create_irq_nr(unsigned int irq_want, int node) if (cfg_new->vector != 0) continue; -#ifdef CONFIG_NUMA_IRQ_DESC - /* different node ?*/ - if (desc_new->node != node) - desc = move_irq_desc(desc, node); -#endif + desc_new = move_irq_desc(desc_new, node); if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0) irq = new; diff --git a/include/linux/irq.h b/include/linux/irq.h index 4b95ddb5304..eedbb8e5e0c 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -206,9 +206,16 @@ extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc #ifndef CONFIG_SPARSE_IRQ extern struct irq_desc irq_desc[NR_IRQS]; -#else /* CONFIG_SPARSE_IRQ */ +#endif + +#ifdef CONFIG_NUMA_IRQ_DESC extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int node); -#endif /* CONFIG_SPARSE_IRQ */ +#else +static inline struct irq_desc *move_irq_desc(struct irq_desc *desc, int node) +{ + return desc; +} +#endif extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node); diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile index 2f065277f8e..7d047808419 100644 --- a/kernel/irq/Makefile +++ b/kernel/irq/Makefile @@ -3,5 +3,5 @@ obj-y := handle.o manage.o spurious.o resend.o chip.o devres.o obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o -obj-$(CONFIG_SPARSE_IRQ) += numa_migrate.o +obj-$(CONFIG_NUMA_IRQ_DESC) += numa_migrate.o obj-$(CONFIG_PM_SLEEP) += pm.o -- cgit v1.2.3-70-g09d2 From c237dc0303bcf6f4cc2e0efe4fe4e341c6f34dac Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 29 Apr 2009 19:09:19 -0400 Subject: nfsd4: rename callback struct to cb_conn I want to use the name for a struct that actually does represent a single callback. (Actually, I've never been sure it helps to a separate struct for the callback information. Some day maybe those fields could just be dumped into struct nfs4_client. I don't know.) Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 16 ++++++++-------- fs/nfsd/nfs4state.c | 22 +++++++++++----------- include/linux/nfsd/state.h | 4 ++-- 3 files changed, 21 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 0aaf68beedb..ed860d7ddd1 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -369,7 +369,7 @@ static int max_cb_time(void) int setup_callback_client(struct nfs4_client *clp) { struct sockaddr_in addr; - struct nfs4_callback *cb = &clp->cl_callback; + struct nfs4_cb_conn *cb = &clp->cl_cb_conn; struct rpc_timeout timeparms = { .to_initval = max_cb_time(), .to_retries = 0, @@ -422,7 +422,7 @@ static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata) if (task->tk_status) warn_no_callback_path(clp, task->tk_status); else - atomic_set(&clp->cl_callback.cb_set, 1); + atomic_set(&clp->cl_cb_conn.cb_set, 1); put_nfs4_client(clp); } @@ -430,7 +430,7 @@ static const struct rpc_call_ops nfsd4_cb_probe_ops = { .rpc_call_done = nfsd4_cb_probe_done, }; -static struct rpc_cred *lookup_cb_cred(struct nfs4_callback *cb) +static struct rpc_cred *lookup_cb_cred(struct nfs4_cb_conn *cb) { struct auth_cred acred = { .machine_cred = 1 @@ -448,7 +448,7 @@ static struct rpc_cred *lookup_cb_cred(struct nfs4_callback *cb) void do_probe_callback(struct nfs4_client *clp) { - struct nfs4_callback *cb = &clp->cl_callback; + struct nfs4_cb_conn *cb = &clp->cl_cb_conn; struct rpc_message msg = { .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], .rpc_argp = clp, @@ -480,7 +480,7 @@ nfsd4_probe_callback(struct nfs4_client *clp) { int status; - BUG_ON(atomic_read(&clp->cl_callback.cb_set)); + BUG_ON(atomic_read(&clp->cl_cb_conn.cb_set)); status = setup_callback_client(clp); if (status) { @@ -501,12 +501,12 @@ void nfsd4_cb_recall(struct nfs4_delegation *dp) { struct nfs4_client *clp = dp->dl_client; - struct rpc_clnt *clnt = clp->cl_callback.cb_client; + struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client; struct nfs4_cb_recall *cbr = &dp->dl_recall; struct rpc_message msg = { .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL], .rpc_argp = cbr, - .rpc_cred = clp->cl_callback.cb_cred + .rpc_cred = clp->cl_cb_conn.cb_cred }; int retries = 1; int status = 0; @@ -519,7 +519,7 @@ nfsd4_cb_recall(struct nfs4_delegation *dp) switch (status) { case -EIO: /* Network partition? */ - atomic_set(&clp->cl_callback.cb_set, 0); + atomic_set(&clp->cl_cb_conn.cb_set, 0); case -EBADHANDLE: case -NFS4ERR_BAD_STATEID: /* Race: client probably got cb_recall diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b205c7d7bc6..d7b5e6b8920 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -182,7 +182,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f { struct nfs4_delegation *dp; struct nfs4_file *fp = stp->st_file; - struct nfs4_callback *cb = &stp->st_stateowner->so_client->cl_callback; + struct nfs4_cb_conn *cb = &stp->st_stateowner->so_client->cl_cb_conn; dprintk("NFSD alloc_init_deleg\n"); if (fp->fi_had_conflict) @@ -633,19 +633,19 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) static void shutdown_callback_client(struct nfs4_client *clp) { - struct rpc_clnt *clnt = clp->cl_callback.cb_client; + struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client; if (clnt) { /* * Callback threads take a reference on the client, so there * should be no outstanding callbacks at this point. */ - clp->cl_callback.cb_client = NULL; + clp->cl_cb_conn.cb_client = NULL; rpc_shutdown_client(clnt); } - if (clp->cl_callback.cb_cred) { - put_rpccred(clp->cl_callback.cb_cred); - clp->cl_callback.cb_cred = NULL; + if (clp->cl_cb_conn.cb_cred) { + put_rpccred(clp->cl_cb_conn.cb_cred); + clp->cl_cb_conn.cb_cred = NULL; } } @@ -719,7 +719,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir) return NULL; memcpy(clp->cl_recdir, recdir, HEXDIR_LEN); atomic_set(&clp->cl_count, 1); - atomic_set(&clp->cl_callback.cb_set, 0); + atomic_set(&clp->cl_cb_conn.cb_set, 0); INIT_LIST_HEAD(&clp->cl_idhash); INIT_LIST_HEAD(&clp->cl_strhash); INIT_LIST_HEAD(&clp->cl_openowners); @@ -971,7 +971,7 @@ parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigne static void gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se) { - struct nfs4_callback *cb = &clp->cl_callback; + struct nfs4_cb_conn *cb = &clp->cl_cb_conn; /* Currently, we only support tcp for the callback channel */ if ((se->se_callback_netid_len != 3) || memcmp((char *)se->se_callback_netid_val, "tcp", 3)) @@ -1691,7 +1691,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, else { /* XXX: We just turn off callbacks until we can handle * change request correctly. */ - atomic_set(&conf->cl_callback.cb_set, 0); + atomic_set(&conf->cl_cb_conn.cb_set, 0); expire_client(unconf); status = nfs_ok; @@ -2425,7 +2425,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta { struct nfs4_delegation *dp; struct nfs4_stateowner *sop = stp->st_stateowner; - struct nfs4_callback *cb = &sop->so_client->cl_callback; + struct nfs4_cb_conn *cb = &sop->so_client->cl_cb_conn; struct file_lock fl, *flp = &fl; int status, flag = 0; @@ -2617,7 +2617,7 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, renew_client(clp); status = nfserr_cb_path_down; if (!list_empty(&clp->cl_delegations) - && !atomic_read(&clp->cl_callback.cb_set)) + && !atomic_read(&clp->cl_cb_conn.cb_set)) goto out; status = nfs_ok; out: diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index 8d882a3eb4b..563c367a301 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -88,7 +88,7 @@ struct nfs4_delegation { #define dl_fh dl_recall.cbr_fh /* client delegation callback info */ -struct nfs4_callback { +struct nfs4_cb_conn { /* SETCLIENTID info */ u32 cb_addr; unsigned short cb_port; @@ -186,7 +186,7 @@ struct nfs4_client { struct svc_cred cl_cred; /* setclientid principal */ clientid_t cl_clientid; /* generated by server */ nfs4_verifier cl_confirm; /* generated by server */ - struct nfs4_callback cl_callback; /* callback info */ + struct nfs4_cb_conn cl_cb_conn; /* callback info */ atomic_t cl_count; /* ref count */ u32 cl_firststate; /* recovery dir creation */ -- cgit v1.2.3-70-g09d2 From b53d40c5070bffde1b2bcaf848412a50d8894794 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 1 May 2009 19:50:00 -0400 Subject: nfsd4: eliminate struct nfs4_cb_recall The nfs4_cb_recall struct is used only in nfs4_delegation, so its pointer to the containing delegation is unnecessary--we could just use container_of(). But there's no real reason to have this a separate struct at all--just move these fields to nfs4_delegation. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 24 +++++++++++------------- fs/nfsd/nfs4state.c | 5 ++--- include/linux/nfsd/state.h | 18 +++++------------- 3 files changed, 18 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index ed860d7ddd1..2509305f6f5 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -215,18 +215,18 @@ encode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr) } static int -encode_cb_recall(struct xdr_stream *xdr, struct nfs4_cb_recall *cb_rec) +encode_cb_recall(struct xdr_stream *xdr, struct nfs4_delegation *dp) { __be32 *p; - int len = cb_rec->cbr_fh.fh_size; + int len = dp->dl_fh.fh_size; - RESERVE_SPACE(12+sizeof(cb_rec->cbr_stateid) + len); + RESERVE_SPACE(12+sizeof(dp->dl_stateid) + len); WRITE32(OP_CB_RECALL); - WRITE32(cb_rec->cbr_stateid.si_generation); - WRITEMEM(&cb_rec->cbr_stateid.si_opaque, sizeof(stateid_opaque_t)); - WRITE32(cb_rec->cbr_trunc); + WRITE32(dp->dl_stateid.si_generation); + WRITEMEM(&dp->dl_stateid.si_opaque, sizeof(stateid_opaque_t)); + WRITE32(dp->dl_trunc); WRITE32(len); - WRITEMEM(&cb_rec->cbr_fh.fh_base, len); + WRITEMEM(&dp->dl_fh.fh_base, len); return 0; } @@ -241,11 +241,11 @@ nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p) } static int -nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, struct nfs4_cb_recall *args) +nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, struct nfs4_delegation *args) { struct xdr_stream xdr; struct nfs4_cb_compound_hdr hdr = { - .ident = args->cbr_ident, + .ident = args->dl_ident, .nops = 1, }; @@ -502,17 +502,15 @@ nfsd4_cb_recall(struct nfs4_delegation *dp) { struct nfs4_client *clp = dp->dl_client; struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client; - struct nfs4_cb_recall *cbr = &dp->dl_recall; struct rpc_message msg = { .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL], - .rpc_argp = cbr, + .rpc_argp = dp, .rpc_cred = clp->cl_cb_conn.cb_cred }; int retries = 1; int status = 0; - cbr->cbr_trunc = 0; /* XXX need to implement truncate optimization */ - cbr->cbr_dp = dp; + dp->dl_trunc = 0; /* XXX need to implement truncate optimization */ status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT); while (retries--) { diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index d7b5e6b8920..3e5345e01b1 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -203,9 +203,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f get_file(stp->st_vfs_file); dp->dl_vfs_file = stp->st_vfs_file; dp->dl_type = type; - dp->dl_recall.cbr_dp = NULL; - dp->dl_recall.cbr_ident = cb->cb_ident; - dp->dl_recall.cbr_trunc = 0; + dp->dl_ident = cb->cb_ident; + dp->dl_trunc = 0; dp->dl_stateid.si_boot = get_seconds(); dp->dl_stateid.si_stateownerid = current_delegid++; dp->dl_stateid.si_fileid = 0; diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index 563c367a301..233b60d39b8 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -61,15 +61,6 @@ typedef struct { #define si_stateownerid si_opaque.so_stateownerid #define si_fileid si_opaque.so_fileid - -struct nfs4_cb_recall { - u32 cbr_ident; - int cbr_trunc; - stateid_t cbr_stateid; - struct knfsd_fh cbr_fh; - struct nfs4_delegation *cbr_dp; -}; - struct nfs4_delegation { struct list_head dl_perfile; struct list_head dl_perclnt; @@ -81,12 +72,13 @@ struct nfs4_delegation { struct file *dl_vfs_file; u32 dl_type; time_t dl_time; - struct nfs4_cb_recall dl_recall; +/* For recall: */ + u32 dl_ident; + int dl_trunc; + stateid_t dl_stateid; + struct knfsd_fh dl_fh; }; -#define dl_stateid dl_recall.cbr_stateid -#define dl_fh dl_recall.cbr_fh - /* client delegation callback info */ struct nfs4_cb_conn { /* SETCLIENTID info */ -- cgit v1.2.3-70-g09d2 From 6707bd3d420f53ae8f090dac871843f6f43c9980 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 1 May 2009 19:57:46 -0400 Subject: nfsd4: remove unused dl_trunc There's no point in keeping this field around--it's always zero. (Background: the protocol allows you to tell the client that the file is about to be truncated, as an optimization to save the client from writing back dirty pages that will just be discarded. We don't implement this hint. If we do some day, adding this field back in will be the least of the work involved.) Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 4 +--- fs/nfsd/nfs4state.c | 1 - include/linux/nfsd/state.h | 1 - 3 files changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 2509305f6f5..0420b5e6e20 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -224,7 +224,7 @@ encode_cb_recall(struct xdr_stream *xdr, struct nfs4_delegation *dp) WRITE32(OP_CB_RECALL); WRITE32(dp->dl_stateid.si_generation); WRITEMEM(&dp->dl_stateid.si_opaque, sizeof(stateid_opaque_t)); - WRITE32(dp->dl_trunc); + WRITE32(0); /* truncate optimization not implemented */ WRITE32(len); WRITEMEM(&dp->dl_fh.fh_base, len); return 0; @@ -510,8 +510,6 @@ nfsd4_cb_recall(struct nfs4_delegation *dp) int retries = 1; int status = 0; - dp->dl_trunc = 0; /* XXX need to implement truncate optimization */ - status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT); while (retries--) { switch (status) { diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3e5345e01b1..cbb16e191d5 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -204,7 +204,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f dp->dl_vfs_file = stp->st_vfs_file; dp->dl_type = type; dp->dl_ident = cb->cb_ident; - dp->dl_trunc = 0; dp->dl_stateid.si_boot = get_seconds(); dp->dl_stateid.si_stateownerid = current_delegid++; dp->dl_stateid.si_fileid = 0; diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index 233b60d39b8..346b603072c 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -74,7 +74,6 @@ struct nfs4_delegation { time_t dl_time; /* For recall: */ u32 dl_ident; - int dl_trunc; stateid_t dl_stateid; struct knfsd_fh dl_fh; }; -- cgit v1.2.3-70-g09d2 From 3aea09dc9106407d8bc18e593fbffda9ad632844 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 1 May 2009 20:11:12 -0400 Subject: nfsd4: track recall retries in nfs4_delegation Move this out of a local variable into the nfs4_delegation object in preparation for making this an async rpc call (at which point we'll need any state like this in a common object that's preserved across function calls). Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 4 ++-- include/linux/nfsd/state.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 0420b5e6e20..b88b207d75d 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -507,11 +507,11 @@ nfsd4_cb_recall(struct nfs4_delegation *dp) .rpc_argp = dp, .rpc_cred = clp->cl_cb_conn.cb_cred }; - int retries = 1; int status = 0; + dp->dl_retries = 1; status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT); - while (retries--) { + while (dp->dl_retries--) { switch (status) { case -EIO: /* Network partition? */ diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index 346b603072c..c0c49215ddc 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -76,6 +76,7 @@ struct nfs4_delegation { u32 dl_ident; stateid_t dl_stateid; struct knfsd_fh dl_fh; + int dl_retries; }; /* client delegation callback info */ -- cgit v1.2.3-70-g09d2 From a25cbd045a2ffc42787d4dbcbb9c7118f5f42732 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Fri, 1 May 2009 14:45:46 +0900 Subject: clocksource: setup mult_orig in clocksource_enable() Setup clocksource mult_orig in clocksource_enable(). Clocksource drivers can save power by using keeping the device clock disabled while the clocksource is unused. In practice this means that the enable() and disable() callbacks perform clk_enable() and clk_disable(). The enable() callback may also use clk_get_rate() to get the clock rate from the clock framework. This information can then be used to calculate the shift and mult variables. Currently the mult_orig variable is setup from mult at registration time only. This is conflicting with the above case since the clock is disabled and the mult variable is not yet calculated at the time of registration. Moving the mult_orig setup code to clocksource_enable() allows us to both handle the common case with no enable() callback and the mult-changed-after-enable() case. [ Impact: allow dynamic clock source usage ] Signed-off-by: Magnus Damm LKML-Reference: <20090501054546.8193.10688.sendpatchset@rx1.opensource.se> Signed-off-by: Thomas Gleixner --- include/linux/clocksource.h | 10 +++++++++- kernel/time/clocksource.c | 3 --- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 5a40d14daa9..c56457c8334 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -288,7 +288,15 @@ static inline cycle_t clocksource_read(struct clocksource *cs) */ static inline int clocksource_enable(struct clocksource *cs) { - return cs->enable ? cs->enable(cs) : 0; + int ret = 0; + + if (cs->enable) + ret = cs->enable(cs); + + /* save mult_orig on enable */ + cs->mult_orig = cs->mult; + + return ret; } /** diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index ecfd7b5187e..80189f6f1c5 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -402,9 +402,6 @@ int clocksource_register(struct clocksource *c) unsigned long flags; int ret; - /* save mult_orig on registration */ - c->mult_orig = c->mult; - spin_lock_irqsave(&clocksource_lock, flags); ret = clocksource_enqueue(c); if (!ret) -- cgit v1.2.3-70-g09d2 From 7d27558c4138ac6b3684dea35c2f4379b940a7dd Mon Sep 17 00:00:00 2001 From: john stultz Date: Fri, 1 May 2009 13:10:26 -0700 Subject: timekeeping: create arch_gettimeoffset infrastructure Some arches don't supply their own clocksource. This is mainly the case in architectures that get their inter-tick times by reading the counter on their interval timer. Since these timers wrap every tick, they're not really useful as clocksources. Wrapping them to act like one is possible but not very efficient. So we provide a callout these arches can implement for use with the jiffies clocksource to provide finer then tick granular time. [ Impact: ease the migration to generic time keeping ] Signed-off-by: John Stultz Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/linux/time.h | 15 +++++++++++++++ kernel/time/timekeeping.c | 7 +++++++ 2 files changed, 22 insertions(+) (limited to 'include') diff --git a/include/linux/time.h b/include/linux/time.h index 242f62499bb..ea16c1a01d5 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -113,6 +113,21 @@ struct timespec current_kernel_time(void); #define CURRENT_TIME (current_kernel_time()) #define CURRENT_TIME_SEC ((struct timespec) { get_seconds(), 0 }) +/* Some architectures do not supply their own clocksource. + * This is mainly the case in architectures that get their + * inter-tick times by reading the counter on their interval + * timer. Since these timers wrap every tick, they're not really + * useful as clocksources. Wrapping them to act like one is possible + * but not very efficient. So we provide a callout these arches + * can implement for use with the jiffies clocksource to provide + * finer then tick granular time. + */ +#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET +extern u32 arch_gettimeoffset(void); +#else +static inline u32 arch_gettimeoffset(void) { return 0; } +#endif + extern void do_gettimeofday(struct timeval *tv); extern int do_settimeofday(struct timespec *tv); extern int do_sys_settimeofday(struct timespec *tv, struct timezone *tz); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 687dff49f6e..e97c50f8458 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -77,6 +77,10 @@ static void clocksource_forward_now(void) clock->cycle_last = cycle_now; nsec = cyc2ns(clock, cycle_delta); + + /* If arch requires, add in gettimeoffset() */ + nsec += arch_gettimeoffset(); + timespec_add_ns(&xtime, nsec); nsec = ((s64)cycle_delta * clock->mult_orig) >> clock->shift; @@ -111,6 +115,9 @@ void getnstimeofday(struct timespec *ts) /* convert to nanoseconds: */ nsecs = cyc2ns(clock, cycle_delta); + /* If arch requires, add in gettimeoffset() */ + nsecs += arch_gettimeoffset(); + } while (read_seqretry(&xtime_lock, seq)); timespec_add_ns(ts, nsecs); -- cgit v1.2.3-70-g09d2 From 33f503c96c976fd585dedb76514ca6cb286e60d9 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 2 May 2009 12:24:55 +0100 Subject: ASoC: Use a shared define for AC97 CODEC data formats The AC97 wire format is completely fixed so CODECs don't have any choice about the formats they accept but controllers accept a variety of data formats and render them down onto the bus. Have a shared define so all the CODEC drivers will interoperate with any of our controller drivers. Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 3 +++ sound/soc/codecs/ac97.c | 4 ++-- sound/soc/codecs/ad1980.c | 4 ++-- sound/soc/codecs/wm9705.c | 4 ++-- sound/soc/codecs/wm9712.c | 6 +++--- sound/soc/codecs/wm9713.c | 6 +++--- 6 files changed, 15 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index 22b729fbbf8..ea07b4bd516 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -96,6 +96,9 @@ struct snd_pcm_substream; #define SND_SOC_CLOCK_IN 0 #define SND_SOC_CLOCK_OUT 1 +#define SND_SOC_STD_AC97_FMTS (SNDRV_PCM_FMTBIT_S16_LE |\ + SNDRV_PCM_FMTBIT_S32_LE) + struct snd_soc_dai_ops; struct snd_soc_dai; struct snd_ac97_bus_ops; diff --git a/sound/soc/codecs/ac97.c b/sound/soc/codecs/ac97.c index b0d4af145b8..932299bb5d1 100644 --- a/sound/soc/codecs/ac97.c +++ b/sound/soc/codecs/ac97.c @@ -53,13 +53,13 @@ struct snd_soc_dai ac97_dai = { .channels_min = 1, .channels_max = 2, .rates = STD_AC97_RATES, - .formats = SNDRV_PCM_FMTBIT_S16_LE,}, + .formats = SND_SOC_STD_AC97_FMTS,}, .capture = { .stream_name = "AC97 Capture", .channels_min = 1, .channels_max = 2, .rates = STD_AC97_RATES, - .formats = SNDRV_PCM_FMTBIT_S16_LE,}, + .formats = SND_SOC_STD_AC97_FMTS,}, .ops = &ac97_dai_ops, }; EXPORT_SYMBOL_GPL(ac97_dai); diff --git a/sound/soc/codecs/ad1980.c b/sound/soc/codecs/ad1980.c index ddb3b08ac23..d7440a982d2 100644 --- a/sound/soc/codecs/ad1980.c +++ b/sound/soc/codecs/ad1980.c @@ -137,13 +137,13 @@ struct snd_soc_dai ad1980_dai = { .channels_min = 2, .channels_max = 6, .rates = SNDRV_PCM_RATE_48000, - .formats = SNDRV_PCM_FMTBIT_S16_LE, }, + .formats = SND_SOC_STD_AC97_FMTS, }, .capture = { .stream_name = "Capture", .channels_min = 2, .channels_max = 2, .rates = SNDRV_PCM_RATE_48000, - .formats = SNDRV_PCM_FMTBIT_S16_LE, }, + .formats = SND_SOC_STD_AC97_FMTS, }, }; EXPORT_SYMBOL_GPL(ad1980_dai); diff --git a/sound/soc/codecs/wm9705.c b/sound/soc/codecs/wm9705.c index c2d1a7a18fa..fa88b463e71 100644 --- a/sound/soc/codecs/wm9705.c +++ b/sound/soc/codecs/wm9705.c @@ -282,14 +282,14 @@ struct snd_soc_dai wm9705_dai[] = { .channels_min = 1, .channels_max = 2, .rates = WM9705_AC97_RATES, - .formats = SNDRV_PCM_FMTBIT_S16_LE, + .formats = SND_SOC_STD_AC97_FMTS, }, .capture = { .stream_name = "HiFi Capture", .channels_min = 1, .channels_max = 2, .rates = WM9705_AC97_RATES, - .formats = SNDRV_PCM_FMTBIT_S16_LE, + .formats = SND_SOC_STD_AC97_FMTS, }, .ops = &wm9705_dai_ops, }, diff --git a/sound/soc/codecs/wm9712.c b/sound/soc/codecs/wm9712.c index 765cf1e7369..550c903f23b 100644 --- a/sound/soc/codecs/wm9712.c +++ b/sound/soc/codecs/wm9712.c @@ -534,13 +534,13 @@ struct snd_soc_dai wm9712_dai[] = { .channels_min = 1, .channels_max = 2, .rates = WM9712_AC97_RATES, - .formats = SNDRV_PCM_FMTBIT_S16_LE,}, + .formats = SND_SOC_STD_AC97_FMTS,}, .capture = { .stream_name = "HiFi Capture", .channels_min = 1, .channels_max = 2, .rates = WM9712_AC97_RATES, - .formats = SNDRV_PCM_FMTBIT_S16_LE,}, + .formats = SND_SOC_STD_AC97_FMTS,}, .ops = &wm9712_dai_ops_hifi, }, { @@ -550,7 +550,7 @@ struct snd_soc_dai wm9712_dai[] = { .channels_min = 1, .channels_max = 1, .rates = WM9712_AC97_RATES, - .formats = SNDRV_PCM_FMTBIT_S16_LE,}, + .formats = SND_SOC_STD_AC97_FMTS,}, .ops = &wm9712_dai_ops_aux, } }; diff --git a/sound/soc/codecs/wm9713.c b/sound/soc/codecs/wm9713.c index a6feb784231..d1744e96f30 100644 --- a/sound/soc/codecs/wm9713.c +++ b/sound/soc/codecs/wm9713.c @@ -1040,13 +1040,13 @@ struct snd_soc_dai wm9713_dai[] = { .channels_min = 1, .channels_max = 2, .rates = WM9713_RATES, - .formats = SNDRV_PCM_FMTBIT_S16_LE,}, + .formats = SND_SOC_STD_AC97_FMTS,}, .capture = { .stream_name = "HiFi Capture", .channels_min = 1, .channels_max = 2, .rates = WM9713_RATES, - .formats = SNDRV_PCM_FMTBIT_S16_LE,}, + .formats = SND_SOC_STD_AC97_FMTS,}, .ops = &wm9713_dai_ops_hifi, }, { @@ -1056,7 +1056,7 @@ struct snd_soc_dai wm9713_dai[] = { .channels_min = 1, .channels_max = 1, .rates = WM9713_RATES, - .formats = SNDRV_PCM_FMTBIT_S16_LE,}, + .formats = SND_SOC_STD_AC97_FMTS,}, .ops = &wm9713_dai_ops_aux, }, { -- cgit v1.2.3-70-g09d2 From 4072604b9dd18f25a98cc0f4d3d4553ed1ad4152 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 2 May 2009 12:28:25 +0100 Subject: ASoC: Remove unused DAI format defines The defines for TDM and synchronous clocks are not used - they are mostly a legacy of the automatic clocking configuration. TDM will require configuration of the number of timeslots and which ones to use so can't be fit into the DAI format and synchronous mode is handled by symmetric_rates (and needs to be done by constraints rather than when the DAI format is being configured). Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index ea07b4bd516..a997c2cac63 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -44,24 +44,6 @@ struct snd_pcm_substream; #define SND_SOC_DAIFMT_CONT (0 << 4) /* continuous clock */ #define SND_SOC_DAIFMT_GATED (1 << 4) /* clock is gated */ -/* - * DAI Left/Right Clocks. - * - * Specifies whether the DAI can support different samples for similtanious - * playback and capture. This usually requires a seperate physical frame - * clock for playback and capture. - */ -#define SND_SOC_DAIFMT_SYNC (0 << 5) /* Tx FRM = Rx FRM */ -#define SND_SOC_DAIFMT_ASYNC (1 << 5) /* Tx FRM ~ Rx FRM */ - -/* - * TDM - * - * Time Division Multiplexing. Allows PCM data to be multplexed with other - * data on the DAI. - */ -#define SND_SOC_DAIFMT_TDM (1 << 6) - /* * DAI hardware signal inversions. * -- cgit v1.2.3-70-g09d2 From d5ed4c2e5ce9f5f6fd6a5a39ee1196a1f8a46eed Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Thu, 30 Apr 2009 07:02:49 +0000 Subject: clocksource: SuperH MTU2 Timer driver This patch adds a MTU2 driver for the SuperH architecture. The MTU2 driver is a platform driver with early platform support to allow using a MTU2 channel as only clockevent during system bootup. Clocksource on sh2a is currently unsupported due to code generation issues with 64-bit math, so at this point only periodic clockevent support is in place. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- arch/sh/Kconfig | 11 ++ drivers/clocksource/Makefile | 1 + drivers/clocksource/sh_mtu2.c | 357 ++++++++++++++++++++++++++++++++++++++++++ include/linux/sh_mtu2.h | 12 ++ 4 files changed, 381 insertions(+) create mode 100644 drivers/clocksource/sh_mtu2.c create mode 100644 include/linux/sh_mtu2.h (limited to 'include') diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 6f91478826d..6d0dd378eca 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -113,6 +113,9 @@ config SYS_SUPPORTS_PCI config SYS_SUPPORTS_CMT bool +config SYS_SUPPORTS_MTU2 + bool + config STACKTRACE_SUPPORT def_bool y @@ -478,6 +481,14 @@ config SH_MTU2 help This enables the use of the MTU2 as the system timer. +config SH_TIMER_MTU2 + bool "MTU2 timer driver" + depends on SYS_SUPPORTS_MTU2 && !SH_MTU2 + default y + select GENERIC_CLOCKEVENTS + help + This enables build of the MTU2 timer driver. + config SH_TIMER_IRQ int default "28" if CPU_SUBTYPE_SH7780 || CPU_SUBTYPE_SH7785 || \ diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index 1efb2879a94..9785586dc8c 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -3,3 +3,4 @@ obj-$(CONFIG_X86_CYCLONE_TIMER) += cyclone.o obj-$(CONFIG_X86_PM_TIMER) += acpi_pm.o obj-$(CONFIG_SCx200HR_TIMER) += scx200_hrt.o obj-$(CONFIG_SH_TIMER_CMT) += sh_cmt.o +obj-$(CONFIG_SH_TIMER_MTU2) += sh_mtu2.o diff --git a/drivers/clocksource/sh_mtu2.c b/drivers/clocksource/sh_mtu2.c new file mode 100644 index 00000000000..420566f4c50 --- /dev/null +++ b/drivers/clocksource/sh_mtu2.c @@ -0,0 +1,357 @@ +/* + * SuperH Timer Support - MTU2 + * + * Copyright (C) 2009 Magnus Damm + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct sh_mtu2_priv { + void __iomem *mapbase; + struct clk *clk; + struct irqaction irqaction; + struct platform_device *pdev; + unsigned long rate; + unsigned long periodic; + struct clock_event_device ced; +}; + +static DEFINE_SPINLOCK(sh_mtu2_lock); + +#define TSTR -1 /* shared register */ +#define TCR 0 /* channel register */ +#define TMDR 1 /* channel register */ +#define TIOR 2 /* channel register */ +#define TIER 3 /* channel register */ +#define TSR 4 /* channel register */ +#define TCNT 5 /* channel register */ +#define TGR 6 /* channel register */ + +static unsigned long mtu2_reg_offs[] = { + [TCR] = 0, + [TMDR] = 1, + [TIOR] = 2, + [TIER] = 4, + [TSR] = 5, + [TCNT] = 6, + [TGR] = 8, +}; + +static inline unsigned long sh_mtu2_read(struct sh_mtu2_priv *p, int reg_nr) +{ + struct sh_mtu2_config *cfg = p->pdev->dev.platform_data; + void __iomem *base = p->mapbase; + unsigned long offs; + + if (reg_nr == TSTR) + return ioread8(base + cfg->channel_offset); + + offs = mtu2_reg_offs[reg_nr]; + + if ((reg_nr == TCNT) || (reg_nr == TGR)) + return ioread16(base + offs); + else + return ioread8(base + offs); +} + +static inline void sh_mtu2_write(struct sh_mtu2_priv *p, int reg_nr, + unsigned long value) +{ + struct sh_mtu2_config *cfg = p->pdev->dev.platform_data; + void __iomem *base = p->mapbase; + unsigned long offs; + + if (reg_nr == TSTR) { + iowrite8(value, base + cfg->channel_offset); + return; + } + + offs = mtu2_reg_offs[reg_nr]; + + if ((reg_nr == TCNT) || (reg_nr == TGR)) + iowrite16(value, base + offs); + else + iowrite8(value, base + offs); +} + +static void sh_mtu2_start_stop_ch(struct sh_mtu2_priv *p, int start) +{ + struct sh_mtu2_config *cfg = p->pdev->dev.platform_data; + unsigned long flags, value; + + /* start stop register shared by multiple timer channels */ + spin_lock_irqsave(&sh_mtu2_lock, flags); + value = sh_mtu2_read(p, TSTR); + + if (start) + value |= 1 << cfg->timer_bit; + else + value &= ~(1 << cfg->timer_bit); + + sh_mtu2_write(p, TSTR, value); + spin_unlock_irqrestore(&sh_mtu2_lock, flags); +} + +static int sh_mtu2_enable(struct sh_mtu2_priv *p) +{ + struct sh_mtu2_config *cfg = p->pdev->dev.platform_data; + int ret; + + /* enable clock */ + ret = clk_enable(p->clk); + if (ret) { + pr_err("sh_mtu2: cannot enable clock \"%s\"\n", cfg->clk); + return ret; + } + + /* make sure channel is disabled */ + sh_mtu2_start_stop_ch(p, 0); + + p->rate = clk_get_rate(p->clk) / 64; + p->periodic = (p->rate + HZ/2) / HZ; + + /* "Periodic Counter Operation" */ + sh_mtu2_write(p, TCR, 0x23); /* TGRA clear, divide clock by 64 */ + sh_mtu2_write(p, TIOR, 0); + sh_mtu2_write(p, TGR, p->periodic); + sh_mtu2_write(p, TCNT, 0); + sh_mtu2_write(p, TMDR, 0); + sh_mtu2_write(p, TIER, 0x01); + + /* enable channel */ + sh_mtu2_start_stop_ch(p, 1); + + return 0; +} + +static void sh_mtu2_disable(struct sh_mtu2_priv *p) +{ + /* disable channel */ + sh_mtu2_start_stop_ch(p, 0); + + /* stop clock */ + clk_disable(p->clk); +} + +static irqreturn_t sh_mtu2_interrupt(int irq, void *dev_id) +{ + struct sh_mtu2_priv *p = dev_id; + + /* acknowledge interrupt */ + sh_mtu2_read(p, TSR); + sh_mtu2_write(p, TSR, 0xfe); + + /* notify clockevent layer */ + p->ced.event_handler(&p->ced); + return IRQ_HANDLED; +} + +static struct sh_mtu2_priv *ced_to_sh_mtu2(struct clock_event_device *ced) +{ + return container_of(ced, struct sh_mtu2_priv, ced); +} + +static void sh_mtu2_clock_event_mode(enum clock_event_mode mode, + struct clock_event_device *ced) +{ + struct sh_mtu2_priv *p = ced_to_sh_mtu2(ced); + int disabled = 0; + + /* deal with old setting first */ + switch (ced->mode) { + case CLOCK_EVT_MODE_PERIODIC: + sh_mtu2_disable(p); + disabled = 1; + break; + default: + break; + } + + switch (mode) { + case CLOCK_EVT_MODE_PERIODIC: + pr_info("sh_mtu2: %s used for periodic clock events\n", + ced->name); + sh_mtu2_enable(p); + break; + case CLOCK_EVT_MODE_UNUSED: + if (!disabled) + sh_mtu2_disable(p); + break; + case CLOCK_EVT_MODE_SHUTDOWN: + default: + break; + } +} + +static void sh_mtu2_register_clockevent(struct sh_mtu2_priv *p, + char *name, unsigned long rating) +{ + struct clock_event_device *ced = &p->ced; + int ret; + + memset(ced, 0, sizeof(*ced)); + + ced->name = name; + ced->features = CLOCK_EVT_FEAT_PERIODIC; + ced->rating = rating; + ced->cpumask = cpumask_of(0); + ced->set_mode = sh_mtu2_clock_event_mode; + + ret = setup_irq(p->irqaction.irq, &p->irqaction); + if (ret) { + pr_err("sh_mtu2: failed to request irq %d\n", + p->irqaction.irq); + return; + } + + pr_info("sh_mtu2: %s used for clock events\n", ced->name); + clockevents_register_device(ced); +} + +int sh_mtu2_register(struct sh_mtu2_priv *p, char *name, + unsigned long clockevent_rating) +{ + if (clockevent_rating) + sh_mtu2_register_clockevent(p, name, clockevent_rating); + + return 0; +} + +static int sh_mtu2_setup(struct sh_mtu2_priv *p, struct platform_device *pdev) +{ + struct sh_mtu2_config *cfg = pdev->dev.platform_data; + struct resource *res; + int irq, ret; + ret = -ENXIO; + + memset(p, 0, sizeof(*p)); + p->pdev = pdev; + + if (!cfg) { + dev_err(&p->pdev->dev, "missing platform data\n"); + goto err0; + } + + platform_set_drvdata(pdev, p); + + res = platform_get_resource(p->pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(&p->pdev->dev, "failed to get I/O memory\n"); + goto err0; + } + + irq = platform_get_irq(p->pdev, 0); + if (irq < 0) { + dev_err(&p->pdev->dev, "failed to get irq\n"); + goto err0; + } + + /* map memory, let mapbase point to our channel */ + p->mapbase = ioremap_nocache(res->start, resource_size(res)); + if (p->mapbase == NULL) { + pr_err("sh_mtu2: failed to remap I/O memory\n"); + goto err0; + } + + /* setup data for setup_irq() (too early for request_irq()) */ + p->irqaction.name = cfg->name; + p->irqaction.handler = sh_mtu2_interrupt; + p->irqaction.dev_id = p; + p->irqaction.irq = irq; + p->irqaction.flags = IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL; + p->irqaction.mask = CPU_MASK_NONE; + + /* get hold of clock */ + p->clk = clk_get(&p->pdev->dev, cfg->clk); + if (IS_ERR(p->clk)) { + pr_err("sh_mtu2: cannot get clock \"%s\"\n", cfg->clk); + ret = PTR_ERR(p->clk); + goto err1; + } + + return sh_mtu2_register(p, cfg->name, cfg->clockevent_rating); + err1: + iounmap(p->mapbase); + err0: + return ret; +} + +static int __devinit sh_mtu2_probe(struct platform_device *pdev) +{ + struct sh_mtu2_priv *p = platform_get_drvdata(pdev); + struct sh_mtu2_config *cfg = pdev->dev.platform_data; + int ret; + + if (p) { + pr_info("sh_mtu2: %s kept as earlytimer\n", cfg->name); + return 0; + } + + p = kmalloc(sizeof(*p), GFP_KERNEL); + if (p == NULL) { + dev_err(&pdev->dev, "failed to allocate driver data\n"); + return -ENOMEM; + } + + ret = sh_mtu2_setup(p, pdev); + if (ret) { + kfree(p); + platform_set_drvdata(pdev, NULL); + } + return ret; +} + +static int __devexit sh_mtu2_remove(struct platform_device *pdev) +{ + return -EBUSY; /* cannot unregister clockevent */ +} + +static struct platform_driver sh_mtu2_device_driver = { + .probe = sh_mtu2_probe, + .remove = __devexit_p(sh_mtu2_remove), + .driver = { + .name = "sh_mtu2", + } +}; + +static int __init sh_mtu2_init(void) +{ + return platform_driver_register(&sh_mtu2_device_driver); +} + +static void __exit sh_mtu2_exit(void) +{ + platform_driver_unregister(&sh_mtu2_device_driver); +} + +early_platform_init("earlytimer", &sh_mtu2_device_driver); +module_init(sh_mtu2_init); +module_exit(sh_mtu2_exit); + +MODULE_AUTHOR("Magnus Damm"); +MODULE_DESCRIPTION("SuperH MTU2 Timer Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/sh_mtu2.h b/include/linux/sh_mtu2.h new file mode 100644 index 00000000000..a98876340ff --- /dev/null +++ b/include/linux/sh_mtu2.h @@ -0,0 +1,12 @@ +#ifndef __SH_MTU2_H__ +#define __SH_MTU2_H__ + +struct sh_mtu2_config { + char *name; + int channel_offset; + int timer_bit; + char *clk; + unsigned long clockevent_rating; +}; + +#endif /* __SH_MTU2_H__ */ -- cgit v1.2.3-70-g09d2 From 9570ef20423b549757aa484ad388f9a7d5bdc4d9 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Fri, 1 May 2009 06:51:00 +0000 Subject: clocksource: SuperH TMU Timer driver This patch adds a TMU driver for the SuperH architecture. The TMU driver is a platform driver with early platform support to allow using a TMU channel as clockevent or clocksource during system bootup or later. Clocksource or clockevent can be selected. Both periodic and oneshot clockevents are supported. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- arch/sh/Kconfig | 12 ++ drivers/clocksource/Makefile | 1 + drivers/clocksource/sh_tmu.c | 461 +++++++++++++++++++++++++++++++++++++++++++ include/linux/sh_tmu.h | 13 ++ 4 files changed, 487 insertions(+) create mode 100644 drivers/clocksource/sh_tmu.c create mode 100644 include/linux/sh_tmu.h (limited to 'include') diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index bda3dc1b0c2..1f74737c4f2 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -116,6 +116,9 @@ config SYS_SUPPORTS_CMT config SYS_SUPPORTS_MTU2 bool +config SYS_SUPPORTS_TMU + bool + config STACKTRACE_SUPPORT def_bool y @@ -470,6 +473,15 @@ config SH_TMU help This enables the use of the TMU as the system timer. +config SH_TIMER_TMU + bool "TMU timer driver" + depends on !SH_TMU && SYS_SUPPORTS_TMU + default y + select GENERIC_TIME + select GENERIC_CLOCKEVENTS + help + This enables the build of the TMU timer driver. + config SH_TIMER_CMT bool "CMT timer driver" depends on SYS_SUPPORTS_CMT diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index 9785586dc8c..eef216f7f61 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -4,3 +4,4 @@ obj-$(CONFIG_X86_PM_TIMER) += acpi_pm.o obj-$(CONFIG_SCx200HR_TIMER) += scx200_hrt.o obj-$(CONFIG_SH_TIMER_CMT) += sh_cmt.o obj-$(CONFIG_SH_TIMER_MTU2) += sh_mtu2.o +obj-$(CONFIG_SH_TIMER_TMU) += sh_tmu.o diff --git a/drivers/clocksource/sh_tmu.c b/drivers/clocksource/sh_tmu.c new file mode 100644 index 00000000000..21bd77aa6a3 --- /dev/null +++ b/drivers/clocksource/sh_tmu.c @@ -0,0 +1,461 @@ +/* + * SuperH Timer Support - TMU + * + * Copyright (C) 2009 Magnus Damm + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct sh_tmu_priv { + void __iomem *mapbase; + struct clk *clk; + struct irqaction irqaction; + struct platform_device *pdev; + unsigned long rate; + unsigned long periodic; + struct clock_event_device ced; + struct clocksource cs; +}; + +static DEFINE_SPINLOCK(sh_tmu_lock); + +#define TSTR -1 /* shared register */ +#define TCOR 0 /* channel register */ +#define TCNT 1 /* channel register */ +#define TCR 2 /* channel register */ + +static inline unsigned long sh_tmu_read(struct sh_tmu_priv *p, int reg_nr) +{ + struct sh_tmu_config *cfg = p->pdev->dev.platform_data; + void __iomem *base = p->mapbase; + unsigned long offs; + + if (reg_nr == TSTR) + return ioread8(base - cfg->channel_offset); + + offs = reg_nr << 2; + + if (reg_nr == TCR) + return ioread16(base + offs); + else + return ioread32(base + offs); +} + +static inline void sh_tmu_write(struct sh_tmu_priv *p, int reg_nr, + unsigned long value) +{ + struct sh_tmu_config *cfg = p->pdev->dev.platform_data; + void __iomem *base = p->mapbase; + unsigned long offs; + + if (reg_nr == TSTR) { + iowrite8(value, base - cfg->channel_offset); + return; + } + + offs = reg_nr << 2; + + if (reg_nr == TCR) + iowrite16(value, base + offs); + else + iowrite32(value, base + offs); +} + +static void sh_tmu_start_stop_ch(struct sh_tmu_priv *p, int start) +{ + struct sh_tmu_config *cfg = p->pdev->dev.platform_data; + unsigned long flags, value; + + /* start stop register shared by multiple timer channels */ + spin_lock_irqsave(&sh_tmu_lock, flags); + value = sh_tmu_read(p, TSTR); + + if (start) + value |= 1 << cfg->timer_bit; + else + value &= ~(1 << cfg->timer_bit); + + sh_tmu_write(p, TSTR, value); + spin_unlock_irqrestore(&sh_tmu_lock, flags); +} + +static int sh_tmu_enable(struct sh_tmu_priv *p) +{ + struct sh_tmu_config *cfg = p->pdev->dev.platform_data; + int ret; + + /* enable clock */ + ret = clk_enable(p->clk); + if (ret) { + pr_err("sh_tmu: cannot enable clock \"%s\"\n", cfg->clk); + return ret; + } + + /* make sure channel is disabled */ + sh_tmu_start_stop_ch(p, 0); + + /* maximum timeout */ + sh_tmu_write(p, TCOR, 0xffffffff); + sh_tmu_write(p, TCNT, 0xffffffff); + + /* configure channel to parent clock / 4, irq off */ + p->rate = clk_get_rate(p->clk) / 4; + sh_tmu_write(p, TCR, 0x0000); + + /* enable channel */ + sh_tmu_start_stop_ch(p, 1); + + return 0; +} + +static void sh_tmu_disable(struct sh_tmu_priv *p) +{ + /* disable channel */ + sh_tmu_start_stop_ch(p, 0); + + /* stop clock */ + clk_disable(p->clk); +} + +static void sh_tmu_set_next(struct sh_tmu_priv *p, unsigned long delta, + int periodic) +{ + /* stop timer */ + sh_tmu_start_stop_ch(p, 0); + + /* acknowledge interrupt */ + sh_tmu_read(p, TCR); + + /* enable interrupt */ + sh_tmu_write(p, TCR, 0x0020); + + /* reload delta value in case of periodic timer */ + if (periodic) + sh_tmu_write(p, TCOR, delta); + else + sh_tmu_write(p, TCOR, 0); + + sh_tmu_write(p, TCNT, delta); + + /* start timer */ + sh_tmu_start_stop_ch(p, 1); +} + +static irqreturn_t sh_tmu_interrupt(int irq, void *dev_id) +{ + struct sh_tmu_priv *p = dev_id; + + /* disable or acknowledge interrupt */ + if (p->ced.mode == CLOCK_EVT_MODE_ONESHOT) + sh_tmu_write(p, TCR, 0x0000); + else + sh_tmu_write(p, TCR, 0x0020); + + /* notify clockevent layer */ + p->ced.event_handler(&p->ced); + return IRQ_HANDLED; +} + +static struct sh_tmu_priv *cs_to_sh_tmu(struct clocksource *cs) +{ + return container_of(cs, struct sh_tmu_priv, cs); +} + +static cycle_t sh_tmu_clocksource_read(struct clocksource *cs) +{ + struct sh_tmu_priv *p = cs_to_sh_tmu(cs); + + return sh_tmu_read(p, TCNT) ^ 0xffffffff; +} + +static int sh_tmu_clocksource_enable(struct clocksource *cs) +{ + struct sh_tmu_priv *p = cs_to_sh_tmu(cs); + int ret; + + ret = sh_tmu_enable(p); + if (ret) + return ret; + + /* TODO: calculate good shift from rate and counter bit width */ + cs->shift = 10; + cs->mult = clocksource_hz2mult(p->rate, cs->shift); + return 0; +} + +static void sh_tmu_clocksource_disable(struct clocksource *cs) +{ + sh_tmu_disable(cs_to_sh_tmu(cs)); +} + +static int sh_tmu_register_clocksource(struct sh_tmu_priv *p, + char *name, unsigned long rating) +{ + struct clocksource *cs = &p->cs; + + cs->name = name; + cs->rating = rating; + cs->read = sh_tmu_clocksource_read; + cs->enable = sh_tmu_clocksource_enable; + cs->disable = sh_tmu_clocksource_disable; + cs->mask = CLOCKSOURCE_MASK(32); + cs->flags = CLOCK_SOURCE_IS_CONTINUOUS; + pr_info("sh_tmu: %s used as clock source\n", cs->name); + clocksource_register(cs); + return 0; +} + +static struct sh_tmu_priv *ced_to_sh_tmu(struct clock_event_device *ced) +{ + return container_of(ced, struct sh_tmu_priv, ced); +} + +static void sh_tmu_clock_event_start(struct sh_tmu_priv *p, int periodic) +{ + struct clock_event_device *ced = &p->ced; + + sh_tmu_enable(p); + + /* TODO: calculate good shift from rate and counter bit width */ + + ced->shift = 32; + ced->mult = div_sc(p->rate, NSEC_PER_SEC, ced->shift); + ced->max_delta_ns = clockevent_delta2ns(0xffffffff, ced); + ced->min_delta_ns = 5000; + + if (periodic) { + p->periodic = (p->rate + HZ/2) / HZ; + sh_tmu_set_next(p, p->periodic, 1); + } +} + +static void sh_tmu_clock_event_mode(enum clock_event_mode mode, + struct clock_event_device *ced) +{ + struct sh_tmu_priv *p = ced_to_sh_tmu(ced); + int disabled = 0; + + /* deal with old setting first */ + switch (ced->mode) { + case CLOCK_EVT_MODE_PERIODIC: + case CLOCK_EVT_MODE_ONESHOT: + sh_tmu_disable(p); + disabled = 1; + break; + default: + break; + } + + switch (mode) { + case CLOCK_EVT_MODE_PERIODIC: + pr_info("sh_tmu: %s used for periodic clock events\n", + ced->name); + sh_tmu_clock_event_start(p, 1); + break; + case CLOCK_EVT_MODE_ONESHOT: + pr_info("sh_tmu: %s used for oneshot clock events\n", + ced->name); + sh_tmu_clock_event_start(p, 0); + break; + case CLOCK_EVT_MODE_UNUSED: + if (!disabled) + sh_tmu_disable(p); + break; + case CLOCK_EVT_MODE_SHUTDOWN: + default: + break; + } +} + +static int sh_tmu_clock_event_next(unsigned long delta, + struct clock_event_device *ced) +{ + struct sh_tmu_priv *p = ced_to_sh_tmu(ced); + + BUG_ON(ced->mode != CLOCK_EVT_MODE_ONESHOT); + + /* program new delta value */ + sh_tmu_set_next(p, delta, 0); + return 0; +} + +static void sh_tmu_register_clockevent(struct sh_tmu_priv *p, + char *name, unsigned long rating) +{ + struct clock_event_device *ced = &p->ced; + int ret; + + memset(ced, 0, sizeof(*ced)); + + ced->name = name; + ced->features = CLOCK_EVT_FEAT_PERIODIC; + ced->features |= CLOCK_EVT_FEAT_ONESHOT; + ced->rating = rating; + ced->cpumask = cpumask_of(0); + ced->set_next_event = sh_tmu_clock_event_next; + ced->set_mode = sh_tmu_clock_event_mode; + + ret = setup_irq(p->irqaction.irq, &p->irqaction); + if (ret) { + pr_err("sh_tmu: failed to request irq %d\n", + p->irqaction.irq); + return; + } + + pr_info("sh_tmu: %s used for clock events\n", ced->name); + clockevents_register_device(ced); +} + +static int sh_tmu_register(struct sh_tmu_priv *p, char *name, + unsigned long clockevent_rating, + unsigned long clocksource_rating) +{ + if (clockevent_rating) + sh_tmu_register_clockevent(p, name, clockevent_rating); + else if (clocksource_rating) + sh_tmu_register_clocksource(p, name, clocksource_rating); + + return 0; +} + +static int sh_tmu_setup(struct sh_tmu_priv *p, struct platform_device *pdev) +{ + struct sh_tmu_config *cfg = pdev->dev.platform_data; + struct resource *res; + int irq, ret; + ret = -ENXIO; + + memset(p, 0, sizeof(*p)); + p->pdev = pdev; + + if (!cfg) { + dev_err(&p->pdev->dev, "missing platform data\n"); + goto err0; + } + + platform_set_drvdata(pdev, p); + + res = platform_get_resource(p->pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(&p->pdev->dev, "failed to get I/O memory\n"); + goto err0; + } + + irq = platform_get_irq(p->pdev, 0); + if (irq < 0) { + dev_err(&p->pdev->dev, "failed to get irq\n"); + goto err0; + } + + /* map memory, let mapbase point to our channel */ + p->mapbase = ioremap_nocache(res->start, resource_size(res)); + if (p->mapbase == NULL) { + pr_err("sh_tmu: failed to remap I/O memory\n"); + goto err0; + } + + /* setup data for setup_irq() (too early for request_irq()) */ + p->irqaction.name = cfg->name; + p->irqaction.handler = sh_tmu_interrupt; + p->irqaction.dev_id = p; + p->irqaction.irq = irq; + p->irqaction.flags = IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL; + p->irqaction.mask = CPU_MASK_NONE; + + /* get hold of clock */ + p->clk = clk_get(&p->pdev->dev, cfg->clk); + if (IS_ERR(p->clk)) { + pr_err("sh_tmu: cannot get clock \"%s\"\n", cfg->clk); + ret = PTR_ERR(p->clk); + goto err1; + } + + return sh_tmu_register(p, cfg->name, + cfg->clockevent_rating, + cfg->clocksource_rating); + err1: + iounmap(p->mapbase); + err0: + return ret; +} + +static int __devinit sh_tmu_probe(struct platform_device *pdev) +{ + struct sh_tmu_priv *p = platform_get_drvdata(pdev); + struct sh_tmu_config *cfg = pdev->dev.platform_data; + int ret; + + if (p) { + pr_info("sh_tmu: %s kept as earlytimer\n", cfg->name); + return 0; + } + + p = kmalloc(sizeof(*p), GFP_KERNEL); + if (p == NULL) { + dev_err(&pdev->dev, "failed to allocate driver data\n"); + return -ENOMEM; + } + + ret = sh_tmu_setup(p, pdev); + if (ret) { + kfree(p); + platform_set_drvdata(pdev, NULL); + } + return ret; +} + +static int __devexit sh_tmu_remove(struct platform_device *pdev) +{ + return -EBUSY; /* cannot unregister clockevent and clocksource */ +} + +static struct platform_driver sh_tmu_device_driver = { + .probe = sh_tmu_probe, + .remove = __devexit_p(sh_tmu_remove), + .driver = { + .name = "sh_tmu", + } +}; + +static int __init sh_tmu_init(void) +{ + return platform_driver_register(&sh_tmu_device_driver); +} + +static void __exit sh_tmu_exit(void) +{ + platform_driver_unregister(&sh_tmu_device_driver); +} + +early_platform_init("earlytimer", &sh_tmu_device_driver); +module_init(sh_tmu_init); +module_exit(sh_tmu_exit); + +MODULE_AUTHOR("Magnus Damm"); +MODULE_DESCRIPTION("SuperH TMU Timer Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/sh_tmu.h b/include/linux/sh_tmu.h new file mode 100644 index 00000000000..b1195e8a37d --- /dev/null +++ b/include/linux/sh_tmu.h @@ -0,0 +1,13 @@ +#ifndef __SH_TMU_H__ +#define __SH_TMU_H__ + +struct sh_tmu_config { + char *name; + unsigned long channel_offset; + int timer_bit; + char *clk; + unsigned long clockevent_rating; + unsigned long clocksource_rating; +}; + +#endif /* __SH_TMU_H__ */ -- cgit v1.2.3-70-g09d2 From 46a12f7426d71cabc08972cf8d3ffdd441d26a3a Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Sun, 3 May 2009 17:57:17 +0900 Subject: sh: Consolidate MTU2/CMT/TMU timer platform data. All of the SH timers use a roughly identical structure for platform data, which presently is broken out for each block. Consolidate all of these definitions, as there is no reason for them to be broken out in the first place. Signed-off-by: Paul Mundt --- arch/sh/kernel/cpu/sh2/setup-sh7619.c | 6 +++--- arch/sh/kernel/cpu/sh2a/setup-mxg.c | 8 ++++---- arch/sh/kernel/cpu/sh2a/setup-sh7201.c | 8 ++++---- arch/sh/kernel/cpu/sh2a/setup-sh7203.c | 11 +++++------ arch/sh/kernel/cpu/sh2a/setup-sh7206.c | 13 ++++++------- arch/sh/kernel/cpu/sh4a/setup-sh7343.c | 4 ++-- arch/sh/kernel/cpu/sh4a/setup-sh7366.c | 4 ++-- arch/sh/kernel/cpu/sh4a/setup-sh7722.c | 11 +++++------ arch/sh/kernel/cpu/sh4a/setup-sh7723.c | 4 ++-- arch/sh/kernel/cpu/sh4a/setup-sh7724.c | 4 ++-- drivers/clocksource/sh_cmt.c | 14 +++++++------- drivers/clocksource/sh_mtu2.c | 14 +++++++------- drivers/clocksource/sh_tmu.c | 14 +++++++------- include/linux/sh_cmt.h | 13 ------------- include/linux/sh_mtu2.h | 12 ------------ include/linux/sh_timer.h | 13 +++++++++++++ include/linux/sh_tmu.h | 13 ------------- 17 files changed, 69 insertions(+), 97 deletions(-) delete mode 100644 include/linux/sh_cmt.h delete mode 100644 include/linux/sh_mtu2.h create mode 100644 include/linux/sh_timer.h delete mode 100644 include/linux/sh_tmu.h (limited to 'include') diff --git a/arch/sh/kernel/cpu/sh2/setup-sh7619.c b/arch/sh/kernel/cpu/sh2/setup-sh7619.c index d70c263eb07..94ac27fc223 100644 --- a/arch/sh/kernel/cpu/sh2/setup-sh7619.c +++ b/arch/sh/kernel/cpu/sh2/setup-sh7619.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include enum { @@ -111,7 +111,7 @@ static struct platform_device eth_device = { .resource = eth_resources, }; -static struct sh_cmt_config cmt0_platform_data = { +static struct sh_timer_config cmt0_platform_data = { .name = "CMT0", .channel_offset = 0x02, .timer_bit = 0, @@ -143,7 +143,7 @@ static struct platform_device cmt0_device = { .num_resources = ARRAY_SIZE(cmt0_resources), }; -static struct sh_cmt_config cmt1_platform_data = { +static struct sh_timer_config cmt1_platform_data = { .name = "CMT1", .channel_offset = 0x08, .timer_bit = 1, diff --git a/arch/sh/kernel/cpu/sh2a/setup-mxg.c b/arch/sh/kernel/cpu/sh2a/setup-mxg.c index 870030aa05b..a452d964906 100644 --- a/arch/sh/kernel/cpu/sh2a/setup-mxg.c +++ b/arch/sh/kernel/cpu/sh2a/setup-mxg.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include enum { UNUSED = 0, @@ -114,7 +114,7 @@ static struct intc_mask_reg mask_registers[] __initdata = { static DECLARE_INTC_DESC(intc_desc, "mxg", vectors, groups, mask_registers, prio_registers, NULL); -static struct sh_mtu2_config mtu2_0_platform_data = { +static struct sh_timer_config mtu2_0_platform_data = { .name = "MTU2_0", .channel_offset = -0x80, .timer_bit = 0, @@ -145,7 +145,7 @@ static struct platform_device mtu2_0_device = { .num_resources = ARRAY_SIZE(mtu2_0_resources), }; -static struct sh_mtu2_config mtu2_1_platform_data = { +static struct sh_timer_config mtu2_1_platform_data = { .name = "MTU2_1", .channel_offset = -0x100, .timer_bit = 1, @@ -176,7 +176,7 @@ static struct platform_device mtu2_1_device = { .num_resources = ARRAY_SIZE(mtu2_1_resources), }; -static struct sh_mtu2_config mtu2_2_platform_data = { +static struct sh_timer_config mtu2_2_platform_data = { .name = "MTU2_2", .channel_offset = 0x80, .timer_bit = 2, diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7201.c b/arch/sh/kernel/cpu/sh2a/setup-sh7201.c index 074aa9062e4..772358b7685 100644 --- a/arch/sh/kernel/cpu/sh2a/setup-sh7201.c +++ b/arch/sh/kernel/cpu/sh2a/setup-sh7201.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include enum { @@ -251,7 +251,7 @@ static struct platform_device rtc_device = { .resource = rtc_resources, }; -static struct sh_mtu2_config mtu2_0_platform_data = { +static struct sh_timer_config mtu2_0_platform_data = { .name = "MTU2_0", .channel_offset = -0x80, .timer_bit = 0, @@ -282,7 +282,7 @@ static struct platform_device mtu2_0_device = { .num_resources = ARRAY_SIZE(mtu2_0_resources), }; -static struct sh_mtu2_config mtu2_1_platform_data = { +static struct sh_timer_config mtu2_1_platform_data = { .name = "MTU2_1", .channel_offset = -0x100, .timer_bit = 1, @@ -313,7 +313,7 @@ static struct platform_device mtu2_1_device = { .num_resources = ARRAY_SIZE(mtu2_1_resources), }; -static struct sh_mtu2_config mtu2_2_platform_data = { +static struct sh_timer_config mtu2_2_platform_data = { .name = "MTU2_2", .channel_offset = 0x80, .timer_bit = 2, diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7203.c b/arch/sh/kernel/cpu/sh2a/setup-sh7203.c index 3448164b573..d7493418ba6 100644 --- a/arch/sh/kernel/cpu/sh2a/setup-sh7203.c +++ b/arch/sh/kernel/cpu/sh2a/setup-sh7203.c @@ -11,8 +11,7 @@ #include #include #include -#include -#include +#include #include enum { @@ -208,7 +207,7 @@ static struct platform_device sci_device = { }, }; -static struct sh_cmt_config cmt0_platform_data = { +static struct sh_timer_config cmt0_platform_data = { .name = "CMT0", .channel_offset = 0x02, .timer_bit = 0, @@ -240,7 +239,7 @@ static struct platform_device cmt0_device = { .num_resources = ARRAY_SIZE(cmt0_resources), }; -static struct sh_cmt_config cmt1_platform_data = { +static struct sh_timer_config cmt1_platform_data = { .name = "CMT1", .channel_offset = 0x08, .timer_bit = 1, @@ -272,7 +271,7 @@ static struct platform_device cmt1_device = { .num_resources = ARRAY_SIZE(cmt1_resources), }; -static struct sh_mtu2_config mtu2_0_platform_data = { +static struct sh_timer_config mtu2_0_platform_data = { .name = "MTU2_0", .channel_offset = -0x80, .timer_bit = 0, @@ -303,7 +302,7 @@ static struct platform_device mtu2_0_device = { .num_resources = ARRAY_SIZE(mtu2_0_resources), }; -static struct sh_mtu2_config mtu2_1_platform_data = { +static struct sh_timer_config mtu2_1_platform_data = { .name = "MTU2_1", .channel_offset = -0x100, .timer_bit = 1, diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7206.c b/arch/sh/kernel/cpu/sh2a/setup-sh7206.c index e700559b6b8..2fc6bff5c5f 100644 --- a/arch/sh/kernel/cpu/sh2a/setup-sh7206.c +++ b/arch/sh/kernel/cpu/sh2a/setup-sh7206.c @@ -12,8 +12,7 @@ #include #include #include -#include -#include +#include #include enum { @@ -168,7 +167,7 @@ static struct platform_device sci_device = { }, }; -static struct sh_cmt_config cmt0_platform_data = { +static struct sh_timer_config cmt0_platform_data = { .name = "CMT0", .channel_offset = 0x02, .timer_bit = 0, @@ -200,7 +199,7 @@ static struct platform_device cmt0_device = { .num_resources = ARRAY_SIZE(cmt0_resources), }; -static struct sh_cmt_config cmt1_platform_data = { +static struct sh_timer_config cmt1_platform_data = { .name = "CMT1", .channel_offset = 0x08, .timer_bit = 1, @@ -232,7 +231,7 @@ static struct platform_device cmt1_device = { .num_resources = ARRAY_SIZE(cmt1_resources), }; -static struct sh_mtu2_config mtu2_0_platform_data = { +static struct sh_timer_config mtu2_0_platform_data = { .name = "MTU2_0", .channel_offset = -0x80, .timer_bit = 0, @@ -263,7 +262,7 @@ static struct platform_device mtu2_0_device = { .num_resources = ARRAY_SIZE(mtu2_0_resources), }; -static struct sh_mtu2_config mtu2_1_platform_data = { +static struct sh_timer_config mtu2_1_platform_data = { .name = "MTU2_1", .channel_offset = -0x100, .timer_bit = 1, @@ -294,7 +293,7 @@ static struct platform_device mtu2_1_device = { .num_resources = ARRAY_SIZE(mtu2_1_resources), }; -static struct sh_mtu2_config mtu2_2_platform_data = { +static struct sh_timer_config mtu2_2_platform_data = { .name = "MTU2_2", .channel_offset = 0x80, .timer_bit = 2, diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7343.c b/arch/sh/kernel/cpu/sh4a/setup-sh7343.c index cb5b4db1ca2..f327b7eaf47 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7343.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7343.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include static struct resource iic0_resources[] = { @@ -141,7 +141,7 @@ static struct platform_device jpu_device = { .num_resources = ARRAY_SIZE(jpu_resources), }; -static struct sh_cmt_config cmt_platform_data = { +static struct sh_timer_config cmt_platform_data = { .name = "CMT", .channel_offset = 0x60, .timer_bit = 5, diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c index 2a771f48e9e..7361ea989b9 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include static struct resource iic_resources[] = { @@ -148,7 +148,7 @@ static struct platform_device veu1_device = { .num_resources = ARRAY_SIZE(veu1_resources), }; -static struct sh_cmt_config cmt_platform_data = { +static struct sh_timer_config cmt_platform_data = { .name = "CMT", .channel_offset = 0x60, .timer_bit = 5, diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c index 512735c5cc8..624e8e5a605 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c @@ -13,8 +13,7 @@ #include #include #include -#include -#include +#include #include #include @@ -178,7 +177,7 @@ static struct platform_device jpu_device = { .num_resources = ARRAY_SIZE(jpu_resources), }; -static struct sh_cmt_config cmt_platform_data = { +static struct sh_timer_config cmt_platform_data = { .name = "CMT", .channel_offset = 0x60, .timer_bit = 5, @@ -210,7 +209,7 @@ static struct platform_device cmt_device = { .num_resources = ARRAY_SIZE(cmt_resources), }; -static struct sh_tmu_config tmu0_platform_data = { +static struct sh_timer_config tmu0_platform_data = { .name = "TMU0", .channel_offset = 0x04, .timer_bit = 0, @@ -241,7 +240,7 @@ static struct platform_device tmu0_device = { .num_resources = ARRAY_SIZE(tmu0_resources), }; -static struct sh_tmu_config tmu1_platform_data = { +static struct sh_timer_config tmu1_platform_data = { .name = "TMU1", .channel_offset = 0x10, .timer_bit = 1, @@ -272,7 +271,7 @@ static struct platform_device tmu1_device = { .num_resources = ARRAY_SIZE(tmu1_resources), }; -static struct sh_tmu_config tmu2_platform_data = { +static struct sh_timer_config tmu2_platform_data = { .name = "TMU2", .channel_offset = 0x1c, .timer_bit = 2, diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c index dbb44949ed1..fb9b5427a06 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include @@ -101,7 +101,7 @@ static struct platform_device veu1_device = { .num_resources = ARRAY_SIZE(veu1_resources), }; -static struct sh_cmt_config cmt_platform_data = { +static struct sh_timer_config cmt_platform_data = { .name = "CMT", .channel_offset = 0x60, .timer_bit = 5, diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c index 8429396acb5..e074951b88b 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include @@ -230,7 +230,7 @@ static struct platform_device veu1_device = { .num_resources = ARRAY_SIZE(veu1_resources), }; -static struct sh_cmt_config cmt_platform_data = { +static struct sh_timer_config cmt_platform_data = { .name = "CMT", .channel_offset = 0x60, .timer_bit = 5, diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c index 4ff1508e5ab..aeb8c9b27b5 100644 --- a/drivers/clocksource/sh_cmt.c +++ b/drivers/clocksource/sh_cmt.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include struct sh_cmt_priv { void __iomem *mapbase; @@ -59,7 +59,7 @@ static DEFINE_SPINLOCK(sh_cmt_lock); static inline unsigned long sh_cmt_read(struct sh_cmt_priv *p, int reg_nr) { - struct sh_cmt_config *cfg = p->pdev->dev.platform_data; + struct sh_timer_config *cfg = p->pdev->dev.platform_data; void __iomem *base = p->mapbase; unsigned long offs; @@ -83,7 +83,7 @@ static inline unsigned long sh_cmt_read(struct sh_cmt_priv *p, int reg_nr) static inline void sh_cmt_write(struct sh_cmt_priv *p, int reg_nr, unsigned long value) { - struct sh_cmt_config *cfg = p->pdev->dev.platform_data; + struct sh_timer_config *cfg = p->pdev->dev.platform_data; void __iomem *base = p->mapbase; unsigned long offs; @@ -131,7 +131,7 @@ static unsigned long sh_cmt_get_counter(struct sh_cmt_priv *p, static void sh_cmt_start_stop_ch(struct sh_cmt_priv *p, int start) { - struct sh_cmt_config *cfg = p->pdev->dev.platform_data; + struct sh_timer_config *cfg = p->pdev->dev.platform_data; unsigned long flags, value; /* start stop register shared by multiple timer channels */ @@ -149,7 +149,7 @@ static void sh_cmt_start_stop_ch(struct sh_cmt_priv *p, int start) static int sh_cmt_enable(struct sh_cmt_priv *p, unsigned long *rate) { - struct sh_cmt_config *cfg = p->pdev->dev.platform_data; + struct sh_timer_config *cfg = p->pdev->dev.platform_data; int ret; /* enable clock */ @@ -560,7 +560,7 @@ int sh_cmt_register(struct sh_cmt_priv *p, char *name, static int sh_cmt_setup(struct sh_cmt_priv *p, struct platform_device *pdev) { - struct sh_cmt_config *cfg = pdev->dev.platform_data; + struct sh_timer_config *cfg = pdev->dev.platform_data; struct resource *res; int irq, ret; ret = -ENXIO; @@ -638,7 +638,7 @@ static int sh_cmt_setup(struct sh_cmt_priv *p, struct platform_device *pdev) static int __devinit sh_cmt_probe(struct platform_device *pdev) { struct sh_cmt_priv *p = platform_get_drvdata(pdev); - struct sh_cmt_config *cfg = pdev->dev.platform_data; + struct sh_timer_config *cfg = pdev->dev.platform_data; int ret; if (p) { diff --git a/drivers/clocksource/sh_mtu2.c b/drivers/clocksource/sh_mtu2.c index 420566f4c50..ef02185a827 100644 --- a/drivers/clocksource/sh_mtu2.c +++ b/drivers/clocksource/sh_mtu2.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include struct sh_mtu2_priv { void __iomem *mapbase; @@ -63,7 +63,7 @@ static unsigned long mtu2_reg_offs[] = { static inline unsigned long sh_mtu2_read(struct sh_mtu2_priv *p, int reg_nr) { - struct sh_mtu2_config *cfg = p->pdev->dev.platform_data; + struct sh_timer_config *cfg = p->pdev->dev.platform_data; void __iomem *base = p->mapbase; unsigned long offs; @@ -81,7 +81,7 @@ static inline unsigned long sh_mtu2_read(struct sh_mtu2_priv *p, int reg_nr) static inline void sh_mtu2_write(struct sh_mtu2_priv *p, int reg_nr, unsigned long value) { - struct sh_mtu2_config *cfg = p->pdev->dev.platform_data; + struct sh_timer_config *cfg = p->pdev->dev.platform_data; void __iomem *base = p->mapbase; unsigned long offs; @@ -100,7 +100,7 @@ static inline void sh_mtu2_write(struct sh_mtu2_priv *p, int reg_nr, static void sh_mtu2_start_stop_ch(struct sh_mtu2_priv *p, int start) { - struct sh_mtu2_config *cfg = p->pdev->dev.platform_data; + struct sh_timer_config *cfg = p->pdev->dev.platform_data; unsigned long flags, value; /* start stop register shared by multiple timer channels */ @@ -118,7 +118,7 @@ static void sh_mtu2_start_stop_ch(struct sh_mtu2_priv *p, int start) static int sh_mtu2_enable(struct sh_mtu2_priv *p) { - struct sh_mtu2_config *cfg = p->pdev->dev.platform_data; + struct sh_timer_config *cfg = p->pdev->dev.platform_data; int ret; /* enable clock */ @@ -243,7 +243,7 @@ int sh_mtu2_register(struct sh_mtu2_priv *p, char *name, static int sh_mtu2_setup(struct sh_mtu2_priv *p, struct platform_device *pdev) { - struct sh_mtu2_config *cfg = pdev->dev.platform_data; + struct sh_timer_config *cfg = pdev->dev.platform_data; struct resource *res; int irq, ret; ret = -ENXIO; @@ -303,7 +303,7 @@ static int sh_mtu2_setup(struct sh_mtu2_priv *p, struct platform_device *pdev) static int __devinit sh_mtu2_probe(struct platform_device *pdev) { struct sh_mtu2_priv *p = platform_get_drvdata(pdev); - struct sh_mtu2_config *cfg = pdev->dev.platform_data; + struct sh_timer_config *cfg = pdev->dev.platform_data; int ret; if (p) { diff --git a/drivers/clocksource/sh_tmu.c b/drivers/clocksource/sh_tmu.c index 21bd77aa6a3..d6ea4398bf6 100644 --- a/drivers/clocksource/sh_tmu.c +++ b/drivers/clocksource/sh_tmu.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include struct sh_tmu_priv { void __iomem *mapbase; @@ -51,7 +51,7 @@ static DEFINE_SPINLOCK(sh_tmu_lock); static inline unsigned long sh_tmu_read(struct sh_tmu_priv *p, int reg_nr) { - struct sh_tmu_config *cfg = p->pdev->dev.platform_data; + struct sh_timer_config *cfg = p->pdev->dev.platform_data; void __iomem *base = p->mapbase; unsigned long offs; @@ -69,7 +69,7 @@ static inline unsigned long sh_tmu_read(struct sh_tmu_priv *p, int reg_nr) static inline void sh_tmu_write(struct sh_tmu_priv *p, int reg_nr, unsigned long value) { - struct sh_tmu_config *cfg = p->pdev->dev.platform_data; + struct sh_timer_config *cfg = p->pdev->dev.platform_data; void __iomem *base = p->mapbase; unsigned long offs; @@ -88,7 +88,7 @@ static inline void sh_tmu_write(struct sh_tmu_priv *p, int reg_nr, static void sh_tmu_start_stop_ch(struct sh_tmu_priv *p, int start) { - struct sh_tmu_config *cfg = p->pdev->dev.platform_data; + struct sh_timer_config *cfg = p->pdev->dev.platform_data; unsigned long flags, value; /* start stop register shared by multiple timer channels */ @@ -106,7 +106,7 @@ static void sh_tmu_start_stop_ch(struct sh_tmu_priv *p, int start) static int sh_tmu_enable(struct sh_tmu_priv *p) { - struct sh_tmu_config *cfg = p->pdev->dev.platform_data; + struct sh_timer_config *cfg = p->pdev->dev.platform_data; int ret; /* enable clock */ @@ -345,7 +345,7 @@ static int sh_tmu_register(struct sh_tmu_priv *p, char *name, static int sh_tmu_setup(struct sh_tmu_priv *p, struct platform_device *pdev) { - struct sh_tmu_config *cfg = pdev->dev.platform_data; + struct sh_timer_config *cfg = pdev->dev.platform_data; struct resource *res; int irq, ret; ret = -ENXIO; @@ -407,7 +407,7 @@ static int sh_tmu_setup(struct sh_tmu_priv *p, struct platform_device *pdev) static int __devinit sh_tmu_probe(struct platform_device *pdev) { struct sh_tmu_priv *p = platform_get_drvdata(pdev); - struct sh_tmu_config *cfg = pdev->dev.platform_data; + struct sh_timer_config *cfg = pdev->dev.platform_data; int ret; if (p) { diff --git a/include/linux/sh_cmt.h b/include/linux/sh_cmt.h deleted file mode 100644 index 68cacde5954..00000000000 --- a/include/linux/sh_cmt.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef __SH_CMT_H__ -#define __SH_CMT_H__ - -struct sh_cmt_config { - char *name; - unsigned long channel_offset; - int timer_bit; - char *clk; - unsigned long clockevent_rating; - unsigned long clocksource_rating; -}; - -#endif /* __SH_CMT_H__ */ diff --git a/include/linux/sh_mtu2.h b/include/linux/sh_mtu2.h deleted file mode 100644 index a98876340ff..00000000000 --- a/include/linux/sh_mtu2.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef __SH_MTU2_H__ -#define __SH_MTU2_H__ - -struct sh_mtu2_config { - char *name; - int channel_offset; - int timer_bit; - char *clk; - unsigned long clockevent_rating; -}; - -#endif /* __SH_MTU2_H__ */ diff --git a/include/linux/sh_timer.h b/include/linux/sh_timer.h new file mode 100644 index 00000000000..864bd56bd3b --- /dev/null +++ b/include/linux/sh_timer.h @@ -0,0 +1,13 @@ +#ifndef __SH_TIMER_H__ +#define __SH_TIMER_H__ + +struct sh_timer_config { + char *name; + long channel_offset; + int timer_bit; + char *clk; + unsigned long clockevent_rating; + unsigned long clocksource_rating; +}; + +#endif /* __SH_TIMER_H__ */ diff --git a/include/linux/sh_tmu.h b/include/linux/sh_tmu.h deleted file mode 100644 index b1195e8a37d..00000000000 --- a/include/linux/sh_tmu.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef __SH_TMU_H__ -#define __SH_TMU_H__ - -struct sh_tmu_config { - char *name; - unsigned long channel_offset; - int timer_bit; - char *clk; - unsigned long clockevent_rating; - unsigned long clocksource_rating; -}; - -#endif /* __SH_TMU_H__ */ -- cgit v1.2.3-70-g09d2 From accc5b4f902b0ba83b2c6c48f2d9e7c204cef4a8 Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Sun, 3 May 2009 08:58:48 +0000 Subject: ipv4: remove unused macro (FIB_RES_RESET) from ip_fib.h. Signed-off-by: Rami Rosen Signed-off-by: David S. Miller --- include/net/ip_fib.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 34855eede5f..ef91fe924ba 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -124,14 +124,12 @@ struct fib_result_nl { #ifdef CONFIG_IP_ROUTE_MULTIPATH #define FIB_RES_NH(res) ((res).fi->fib_nh[(res).nh_sel]) -#define FIB_RES_RESET(res) ((res).nh_sel = 0) #define FIB_TABLE_HASHSZ 2 #else /* CONFIG_IP_ROUTE_MULTIPATH */ #define FIB_RES_NH(res) ((res).fi->fib_nh[0]) -#define FIB_RES_RESET(res) #define FIB_TABLE_HASHSZ 256 -- cgit v1.2.3-70-g09d2 From 0d905bca23aca5c86a10ee101bcd3b1abbd40b25 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 4 May 2009 19:13:30 +0200 Subject: perf_counter: initialize the per-cpu context earlier percpu scheduling for perfcounters wants to take the context lock, but that lock first needs to be initialized. Currently it is an early_initcall() - but that is too late, the task tick runs much sooner than that. Call it explicitly from the scheduler init sequence instead. [ Impact: fix access-before-init crash ] LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 5 ++++- kernel/perf_counter.c | 5 +---- kernel/sched.c | 5 ++++- 3 files changed, 9 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index f776851f8c4..a356fa69796 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -573,6 +573,8 @@ extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); extern int sysctl_perf_counter_priv; +extern void perf_counter_init(void); + #else static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } @@ -600,9 +602,10 @@ perf_counter_mmap(unsigned long addr, unsigned long len, static inline void perf_counter_munmap(unsigned long addr, unsigned long len, - unsigned long pgoff, struct file *file) { } + unsigned long pgoff, struct file *file) { } static inline void perf_counter_comm(struct task_struct *tsk) { } +static inline void perf_counter_init(void) { } #endif #endif /* __KERNEL__ */ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index b9679c36bcc..fcdafa234a5 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -3265,15 +3265,12 @@ static struct notifier_block __cpuinitdata perf_cpu_nb = { .notifier_call = perf_cpu_notify, }; -static int __init perf_counter_init(void) +void __init perf_counter_init(void) { perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE, (void *)(long)smp_processor_id()); register_cpu_notifier(&perf_cpu_nb); - - return 0; } -early_initcall(perf_counter_init); static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, char *buf) { diff --git a/kernel/sched.c b/kernel/sched.c index 2f600e30dcf..a728976a3a6 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -8996,7 +8997,7 @@ void __init sched_init(void) * 1024) and two child groups A0 and A1 (of weight 1024 each), * then A0's share of the cpu resource is: * - * A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33% + * A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33% * * We achieve this by letting init_task_group's tasks sit * directly in rq->cfs (i.e init_task_group->se[] = NULL). @@ -9097,6 +9098,8 @@ void __init sched_init(void) alloc_bootmem_cpumask_var(&cpu_isolated_map); #endif /* SMP */ + perf_counter_init(); + scheduler_running = 1; } -- cgit v1.2.3-70-g09d2 From bbd993077d788589a86a718ba7a7895ba5e71a17 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 5 May 2009 10:27:38 +0100 Subject: ASoC: Remove redundant codec pointer from DAIs The DAI structure has two pointers to the codec, one in the body of the DAI and one in a union for a parent pointer. Drop the parent pointer version. Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index a997c2cac63..496dc30457b 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -205,11 +205,8 @@ struct snd_soc_dai { /* DAI private data */ void *private_data; - /* parent codec/platform */ - union { - struct snd_soc_codec *codec; - struct snd_soc_platform *platform; - }; + /* parent platform */ + struct snd_soc_platform *platform; struct list_head list; }; -- cgit v1.2.3-70-g09d2 From 4bbe1ddf89a5ba3ec30fe5980912d8bda3a3cbb2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 13 Oct 2008 03:07:14 +0200 Subject: ALSA: Add extra delay count in PCM Added runtime->delay field to adjust the delayed samples for snd_pcm_delay(). Typically a hardware FIFO length is stored in this field, so that the extra delay between hwptr and applptr can be computed. Signed-off-by: Takashi Iwai --- include/sound/pcm.h | 1 + sound/core/pcm_native.c | 8 +++++--- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/sound/pcm.h b/include/sound/pcm.h index c1729689161..267effddb07 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -270,6 +270,7 @@ struct snd_pcm_runtime { snd_pcm_uframes_t hw_ptr_base; /* Position at buffer restart */ snd_pcm_uframes_t hw_ptr_interrupt; /* Position at interrupt time */ unsigned long hw_ptr_jiffies; /* Time when hw_ptr is updated */ + snd_pcm_sframes_t delay; /* extra delay; typically FIFO size */ /* -- HW params -- */ snd_pcm_access_t access; /* access mode */ diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index fc6f98e257d..cb769d415db 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -587,14 +587,15 @@ int snd_pcm_status(struct snd_pcm_substream *substream, if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { status->avail = snd_pcm_playback_avail(runtime); if (runtime->status->state == SNDRV_PCM_STATE_RUNNING || - runtime->status->state == SNDRV_PCM_STATE_DRAINING) + runtime->status->state == SNDRV_PCM_STATE_DRAINING) { status->delay = runtime->buffer_size - status->avail; - else + status->delay += runtime->delay; + } else status->delay = 0; } else { status->avail = snd_pcm_capture_avail(runtime); if (runtime->status->state == SNDRV_PCM_STATE_RUNNING) - status->delay = status->avail; + status->delay = status->avail + runtime->delay; else status->delay = 0; } @@ -2404,6 +2405,7 @@ static int snd_pcm_delay(struct snd_pcm_substream *substream, n = snd_pcm_playback_hw_avail(runtime); else n = snd_pcm_capture_avail(runtime); + n += runtime->delay; break; case SNDRV_PCM_STATE_XRUN: err = -EPIPE; -- cgit v1.2.3-70-g09d2 From f0d2c681ac0a85142fc8abe65fc33fcad35cb9b7 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 29 Apr 2009 13:43:37 -0400 Subject: ring-buffer: add counters for commit overrun and nmi dropped entries The WARN_ON in the ring buffer when a commit is preempted and the buffer is filled by preceding writes can happen in normal operations. The WARN_ON makes it look like a bug, not to mention, because it does not stop tracing and calls printk which can also recurse, this is prone to deadlock (the WARN_ON is not in a position to recurse). This patch removes the WARN_ON and replaces it with a counter that can be retrieved by a tracer. This counter is called commit_overrun. While at it, I added a nmi_dropped counter to count any time an NMI entry is dropped because the NMI could not take the spinlock. [ Impact: prevent deadlock by printing normal case warning ] Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 2 ++ kernel/trace/ring_buffer.c | 52 ++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 51 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 1c2f80911fb..f1345828c7c 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -153,6 +153,8 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer); unsigned long ring_buffer_overruns(struct ring_buffer *buffer); unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu); +unsigned long ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu); +unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu); u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu); void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 3e86da9b2a0..26e1359fe19 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -402,6 +402,8 @@ struct ring_buffer_per_cpu { struct buffer_page *tail_page; /* write to tail */ struct buffer_page *commit_page; /* committed pages */ struct buffer_page *reader_page; + unsigned long nmi_dropped; + unsigned long commit_overrun; unsigned long overrun; unsigned long entries; u64 write_stamp; @@ -1216,8 +1218,10 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, * simply fail. */ if (unlikely(in_nmi())) { - if (!__raw_spin_trylock(&cpu_buffer->lock)) + if (!__raw_spin_trylock(&cpu_buffer->lock)) { + cpu_buffer->nmi_dropped++; goto out_reset; + } } else __raw_spin_lock(&cpu_buffer->lock); @@ -1238,8 +1242,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, * about it. */ if (unlikely(next_page == commit_page)) { - /* This can easily happen on small ring buffers */ - WARN_ON_ONCE(buffer->pages > 2); + cpu_buffer->commit_overrun++; goto out_reset; } @@ -1925,6 +1928,47 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu) } EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); +/** + * ring_buffer_nmi_dropped_cpu - get the number of nmis that were dropped + * @buffer: The ring buffer + * @cpu: The per CPU buffer to get the number of overruns from + */ +unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu) +{ + struct ring_buffer_per_cpu *cpu_buffer; + unsigned long ret; + + if (!cpumask_test_cpu(cpu, buffer->cpumask)) + return 0; + + cpu_buffer = buffer->buffers[cpu]; + ret = cpu_buffer->nmi_dropped; + + return ret; +} +EXPORT_SYMBOL_GPL(ring_buffer_nmi_dropped_cpu); + +/** + * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits + * @buffer: The ring buffer + * @cpu: The per CPU buffer to get the number of overruns from + */ +unsigned long +ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu) +{ + struct ring_buffer_per_cpu *cpu_buffer; + unsigned long ret; + + if (!cpumask_test_cpu(cpu, buffer->cpumask)) + return 0; + + cpu_buffer = buffer->buffers[cpu]; + ret = cpu_buffer->commit_overrun; + + return ret; +} +EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu); + /** * ring_buffer_entries - get the number of entries in a buffer * @buffer: The ring buffer @@ -2595,6 +2639,8 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) local_set(&cpu_buffer->reader_page->page->commit, 0); cpu_buffer->reader_page->read = 0; + cpu_buffer->nmi_dropped = 0; + cpu_buffer->commit_overrun = 0; cpu_buffer->overrun = 0; cpu_buffer->entries = 0; -- cgit v1.2.3-70-g09d2 From c66de4a5be7913247bd83d79168f8e4420c9cfbc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 May 2009 17:50:22 +0200 Subject: perf_counter: uncouple data_head updates from wakeups Keep data_head up-to-date irrespective of notifications. This fixes the case where you disable a counter and don't get a notification for the last few pending events, and it also allows polling usage. [ Impact: increase precision of perfcounter mmap-ed fields ] Suggested-by: Corey Ashford Signed-off-by: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20090505155436.925084300@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 4 +++- kernel/perf_counter.c | 20 +++++++++----------- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index a356fa69796..17b63105f2a 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -362,9 +362,11 @@ struct perf_mmap_data { atomic_t head; /* write position */ atomic_t events; /* event limit */ - atomic_t wakeup_head; /* completed head */ + atomic_t done_head; /* completed head */ atomic_t lock; /* concurrent writes */ + atomic_t wakeup; /* needs a wakeup */ + struct perf_counter_mmap_page *user_page; void *data_pages[0]; }; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 5f86a1156c9..ba5e921e1f3 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1696,7 +1696,6 @@ struct perf_output_handle { struct perf_mmap_data *data; unsigned int offset; unsigned int head; - int wakeup; int nmi; int overflow; int locked; @@ -1752,8 +1751,7 @@ static void perf_output_unlock(struct perf_output_handle *handle) struct perf_mmap_data *data = handle->data; int head, cpu; - if (handle->wakeup) - data->wakeup_head = data->head; + data->done_head = data->head; if (!handle->locked) goto out; @@ -1764,13 +1762,11 @@ again: * before we publish the new head, matched by a rmb() in userspace when * reading this position. */ - while ((head = atomic_xchg(&data->wakeup_head, 0))) { + while ((head = atomic_xchg(&data->done_head, 0))) data->user_page->data_head = head; - handle->wakeup = 1; - } /* - * NMI can happen here, which means we can miss a wakeup_head update. + * NMI can happen here, which means we can miss a done_head update. */ cpu = atomic_xchg(&data->lock, 0); @@ -1779,7 +1775,7 @@ again: /* * Therefore we have to validate we did not indeed do so. */ - if (unlikely(atomic_read(&data->wakeup_head))) { + if (unlikely(atomic_read(&data->done_head))) { /* * Since we had it locked, we can lock it again. */ @@ -1789,7 +1785,7 @@ again: goto again; } - if (handle->wakeup) + if (atomic_xchg(&data->wakeup, 0)) perf_output_wakeup(handle); out: local_irq_restore(handle->flags); @@ -1824,7 +1820,9 @@ static int perf_output_begin(struct perf_output_handle *handle, handle->offset = offset; handle->head = head; - handle->wakeup = (offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT); + + if ((offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT)) + atomic_set(&data->wakeup, 1); return 0; @@ -1882,7 +1880,7 @@ static void perf_output_end(struct perf_output_handle *handle) int events = atomic_inc_return(&data->events); if (events >= wakeup_events) { atomic_sub(wakeup_events, &data->events); - handle->wakeup = 1; + atomic_set(&data->wakeup, 1); } } -- cgit v1.2.3-70-g09d2 From 6de6a7b95705b859b61430fa3afa1403034eb3e6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 May 2009 17:50:23 +0200 Subject: perf_counter: add ioctl(PERF_COUNTER_IOC_RESET) Provide a way to reset an existing counter - this eases PAPI libraries around perfcounters. Similar to read() it doesn't collapse pending child counters. [ Impact: new perfcounter fd ioctl method to reset counters ] Suggested-by: Corey Ashford Signed-off-by: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20090505155437.022272933@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 1 + kernel/perf_counter.c | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 17b63105f2a..0fcbf34a4f7 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -160,6 +160,7 @@ struct perf_counter_hw_event { #define PERF_COUNTER_IOC_ENABLE _IO ('$', 0) #define PERF_COUNTER_IOC_DISABLE _IO ('$', 1) #define PERF_COUNTER_IOC_REFRESH _IOW('$', 2, u32) +#define PERF_COUNTER_IOC_RESET _IO ('$', 3) /* * Structure of the page that can be mapped via mmap diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index ba5e921e1f3..6e6834e0587 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1288,6 +1288,11 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) return events; } +static void perf_counter_reset(struct perf_counter *counter) +{ + atomic_set(&counter->count, 0); +} + static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct perf_counter *counter = file->private_data; @@ -1303,6 +1308,9 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case PERF_COUNTER_IOC_REFRESH: perf_counter_refresh(counter, arg); break; + case PERF_COUNTER_IOC_RESET: + perf_counter_reset(counter); + break; default: err = -ENOTTY; } -- cgit v1.2.3-70-g09d2 From c5078f78b455fbf67ea71442c7e7ca8acf9ff095 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 May 2009 17:50:24 +0200 Subject: perf_counter: provide an mlock threshold Provide a threshold to relax the mlock accounting, increasing usability. Each counter gets perf_counter_mlock_kb for free. [ Impact: allow more mmap buffering ] Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090505155437.112113632@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 ++ kernel/perf_counter.c | 15 +++++++++++---- kernel/sysctl.c | 8 ++++++++ 3 files changed, 21 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 0fcbf34a4f7..00081d84169 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -358,6 +358,7 @@ struct file; struct perf_mmap_data { struct rcu_head rcu_head; int nr_pages; /* nr of data pages */ + int nr_locked; /* nr pages mlocked */ atomic_t poll; /* POLL_ for wakeups */ atomic_t head; /* write position */ @@ -575,6 +576,7 @@ struct perf_callchain_entry { extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); extern int sysctl_perf_counter_priv; +extern int sysctl_perf_counter_mlock; extern void perf_counter_init(void); diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 6e6834e0587..2d134273830 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -44,6 +44,7 @@ static atomic_t nr_munmap_tracking __read_mostly; static atomic_t nr_comm_tracking __read_mostly; int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */ +int sysctl_perf_counter_mlock __read_mostly = 128; /* 'free' kb per counter */ /* * Lock for (sysadmin-configurable) counter reservations: @@ -1461,7 +1462,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) if (atomic_dec_and_mutex_lock(&counter->mmap_count, &counter->mmap_mutex)) { - vma->vm_mm->locked_vm -= counter->data->nr_pages + 1; + vma->vm_mm->locked_vm -= counter->data->nr_locked; perf_mmap_data_free(counter); mutex_unlock(&counter->mmap_mutex); } @@ -1480,6 +1481,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) unsigned long nr_pages; unsigned long locked, lock_limit; int ret = 0; + long extra; if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE)) return -EINVAL; @@ -1507,8 +1509,12 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) goto unlock; } - locked = vma->vm_mm->locked_vm; - locked += nr_pages + 1; + extra = nr_pages /* + 1 only account the data pages */; + extra -= sysctl_perf_counter_mlock >> (PAGE_SHIFT - 10); + if (extra < 0) + extra = 0; + + locked = vma->vm_mm->locked_vm + extra; lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; lock_limit >>= PAGE_SHIFT; @@ -1524,7 +1530,8 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) goto unlock; atomic_set(&counter->mmap_count, 1); - vma->vm_mm->locked_vm += nr_pages + 1; + vma->vm_mm->locked_vm += extra; + counter->data->nr_locked = extra; unlock: mutex_unlock(&counter->mmap_mutex); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8203d70928d..3b05c2b088d 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -920,6 +920,14 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "perf_counter_mlock_kb", + .data = &sysctl_perf_counter_mlock, + .maxlen = sizeof(sysctl_perf_counter_mlock), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, #endif /* * NOTE: do not add new entries to this table unless you have read -- cgit v1.2.3-70-g09d2 From f001fde5eadd915f4858d22ed70d7040f48767cf Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 5 May 2009 02:48:28 +0000 Subject: net: introduce a list of device addresses dev_addr_list (v6) v5 -> v6 (current): -removed so far unused static functions -corrected dev_addr_del_multiple to call del instead of add v4 -> v5: -added device address type (suggested by davem) -removed refcounting (better to have simplier code then safe potentially few bytes) v3 -> v4: -changed kzalloc to kmalloc in __hw_addr_add_ii() -ASSERT_RTNL() avoided in dev_addr_flush() and dev_addr_init() v2 -> v3: -removed unnecessary rcu read locking -moved dev_addr_flush() calling to ensure no null dereference of dev_addr v1 -> v2: -added forgotten ASSERT_RTNL to dev_addr_init and dev_addr_flush -removed unnecessary rcu_read locking in dev_addr_init -use compare_ether_addr_64bits instead of compare_ether_addr -use L1_CACHE_BYTES as size for allocating struct netdev_hw_addr -use call_rcu instead of rcu_synchronize -moved is_etherdev_addr into __KERNEL__ ifdef This patch introduces a new list in struct net_device and brings a set of functions to handle the work with device address list. The list is a replacement for the original dev_addr field and because in some situations there is need to carry several device addresses with the net device. To be backward compatible, dev_addr is made to point to the first member of the list so original drivers sees no difference. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 27 +++++ include/linux/netdevice.h | 37 ++++++- net/core/dev.c | 250 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 312 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index a1f17abba7d..3d7a6687d24 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -182,6 +182,33 @@ static inline unsigned compare_ether_addr_64bits(const u8 addr1[6+2], return compare_ether_addr(addr1, addr2); #endif } + +/** + * is_etherdev_addr - Tell if given Ethernet address belongs to the device. + * @dev: Pointer to a device structure + * @addr: Pointer to a six-byte array containing the Ethernet address + * + * Compare passed address with all addresses of the device. Return true if the + * address if one of the device addresses. + * + * Note that this function calls compare_ether_addr_64bits() so take care of + * the right padding. + */ +static inline bool is_etherdev_addr(const struct net_device *dev, + const u8 addr[6 + 2]) +{ + struct netdev_hw_addr *ha; + int res = 1; + + rcu_read_lock(); + for_each_dev_addr(dev, ha) { + res = compare_ether_addr_64bits(addr, ha->addr); + if (!res) + break; + } + rcu_read_unlock(); + return !res; +} #endif /* __KERNEL__ */ /** diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ff42aba403c..02882e2ebd4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -210,6 +210,16 @@ struct dev_addr_list #define dmi_users da_users #define dmi_gusers da_gusers +struct netdev_hw_addr { + struct list_head list; + unsigned char addr[MAX_ADDR_LEN]; + unsigned char type; +#define NETDEV_HW_ADDR_T_LAN 1 +#define NETDEV_HW_ADDR_T_SAN 2 +#define NETDEV_HW_ADDR_T_SLAVE 3 + struct rcu_head rcu_head; +}; + struct hh_cache { struct hh_cache *hh_next; /* Next entry */ @@ -784,8 +794,11 @@ struct net_device */ unsigned long last_rx; /* Time of last Rx */ /* Interface address info used in eth_type_trans() */ - unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address, (before bcast - because most packets are unicast) */ + unsigned char *dev_addr; /* hw address, (before bcast + because most packets are + unicast) */ + + struct list_head dev_addr_list; /* list of device hw addresses */ unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ @@ -1791,6 +1804,13 @@ static inline void netif_addr_unlock_bh(struct net_device *dev) spin_unlock_bh(&dev->addr_list_lock); } +/* + * dev_addr_list walker. Should be used only for read access. Call with + * rcu_read_lock held. + */ +#define for_each_dev_addr(dev, ha) \ + list_for_each_entry_rcu(ha, &dev->dev_addr_list, list) + /* These functions live elsewhere (drivers/net/net_init.c, but related) */ extern void ether_setup(struct net_device *dev); @@ -1803,6 +1823,19 @@ extern struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, alloc_netdev_mq(sizeof_priv, name, setup, 1) extern int register_netdev(struct net_device *dev); extern void unregister_netdev(struct net_device *dev); + +/* Functions used for device addresses handling */ +extern int dev_addr_add(struct net_device *dev, unsigned char *addr, + unsigned char addr_type); +extern int dev_addr_del(struct net_device *dev, unsigned char *addr, + unsigned char addr_type); +extern int dev_addr_add_multiple(struct net_device *to_dev, + struct net_device *from_dev, + unsigned char addr_type); +extern int dev_addr_del_multiple(struct net_device *to_dev, + struct net_device *from_dev, + unsigned char addr_type); + /* Functions used for secondary unicast and multicast support */ extern void dev_set_rx_mode(struct net_device *dev); extern void __dev_set_rx_mode(struct net_device *dev); diff --git a/net/core/dev.c b/net/core/dev.c index 3c8073fe970..637ea71b0a0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3434,6 +3434,252 @@ void dev_set_rx_mode(struct net_device *dev) netif_addr_unlock_bh(dev); } +/* hw addresses list handling functions */ + +static int __hw_addr_add(struct list_head *list, unsigned char *addr, + int addr_len, unsigned char addr_type) +{ + struct netdev_hw_addr *ha; + int alloc_size; + + if (addr_len > MAX_ADDR_LEN) + return -EINVAL; + + alloc_size = sizeof(*ha); + if (alloc_size < L1_CACHE_BYTES) + alloc_size = L1_CACHE_BYTES; + ha = kmalloc(alloc_size, GFP_ATOMIC); + if (!ha) + return -ENOMEM; + memcpy(ha->addr, addr, addr_len); + ha->type = addr_type; + list_add_tail_rcu(&ha->list, list); + return 0; +} + +static void ha_rcu_free(struct rcu_head *head) +{ + struct netdev_hw_addr *ha; + + ha = container_of(head, struct netdev_hw_addr, rcu_head); + kfree(ha); +} + +static int __hw_addr_del_ii(struct list_head *list, unsigned char *addr, + int addr_len, unsigned char addr_type, + int ignore_index) +{ + struct netdev_hw_addr *ha; + int i = 0; + + list_for_each_entry(ha, list, list) { + if (i++ != ignore_index && + !memcmp(ha->addr, addr, addr_len) && + (ha->type == addr_type || !addr_type)) { + list_del_rcu(&ha->list); + call_rcu(&ha->rcu_head, ha_rcu_free); + return 0; + } + } + return -ENOENT; +} + +static int __hw_addr_add_multiple_ii(struct list_head *to_list, + struct list_head *from_list, + int addr_len, unsigned char addr_type, + int ignore_index) +{ + int err; + struct netdev_hw_addr *ha, *ha2; + unsigned char type; + + list_for_each_entry(ha, from_list, list) { + type = addr_type ? addr_type : ha->type; + err = __hw_addr_add(to_list, ha->addr, addr_len, type); + if (err) + goto unroll; + } + return 0; + +unroll: + list_for_each_entry(ha2, from_list, list) { + if (ha2 == ha) + break; + type = addr_type ? addr_type : ha2->type; + __hw_addr_del_ii(to_list, ha2->addr, addr_len, type, + ignore_index); + } + return err; +} + +static void __hw_addr_del_multiple_ii(struct list_head *to_list, + struct list_head *from_list, + int addr_len, unsigned char addr_type, + int ignore_index) +{ + struct netdev_hw_addr *ha; + unsigned char type; + + list_for_each_entry(ha, from_list, list) { + type = addr_type ? addr_type : ha->type; + __hw_addr_del_ii(to_list, ha->addr, addr_len, addr_type, + ignore_index); + } +} + +static void __hw_addr_flush(struct list_head *list) +{ + struct netdev_hw_addr *ha, *tmp; + + list_for_each_entry_safe(ha, tmp, list, list) { + list_del_rcu(&ha->list); + call_rcu(&ha->rcu_head, ha_rcu_free); + } +} + +/* Device addresses handling functions */ + +static void dev_addr_flush(struct net_device *dev) +{ + /* rtnl_mutex must be held here */ + + __hw_addr_flush(&dev->dev_addr_list); + dev->dev_addr = NULL; +} + +static int dev_addr_init(struct net_device *dev) +{ + unsigned char addr[MAX_ADDR_LEN]; + struct netdev_hw_addr *ha; + int err; + + /* rtnl_mutex must be held here */ + + INIT_LIST_HEAD(&dev->dev_addr_list); + memset(addr, 0, sizeof(*addr)); + err = __hw_addr_add(&dev->dev_addr_list, addr, sizeof(*addr), + NETDEV_HW_ADDR_T_LAN); + if (!err) { + /* + * Get the first (previously created) address from the list + * and set dev_addr pointer to this location. + */ + ha = list_first_entry(&dev->dev_addr_list, + struct netdev_hw_addr, list); + dev->dev_addr = ha->addr; + } + return err; +} + +/** + * dev_addr_add - Add a device address + * @dev: device + * @addr: address to add + * @addr_type: address type + * + * Add a device address to the device or increase the reference count if + * it already exists. + * + * The caller must hold the rtnl_mutex. + */ +int dev_addr_add(struct net_device *dev, unsigned char *addr, + unsigned char addr_type) +{ + int err; + + ASSERT_RTNL(); + + err = __hw_addr_add(&dev->dev_addr_list, addr, dev->addr_len, + addr_type); + if (!err) + call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); + return err; +} +EXPORT_SYMBOL(dev_addr_add); + +/** + * dev_addr_del - Release a device address. + * @dev: device + * @addr: address to delete + * @addr_type: address type + * + * Release reference to a device address and remove it from the device + * if the reference count drops to zero. + * + * The caller must hold the rtnl_mutex. + */ +int dev_addr_del(struct net_device *dev, unsigned char *addr, + unsigned char addr_type) +{ + int err; + + ASSERT_RTNL(); + + err = __hw_addr_del_ii(&dev->dev_addr_list, addr, dev->addr_len, + addr_type, 0); + if (!err) + call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); + return err; +} +EXPORT_SYMBOL(dev_addr_del); + +/** + * dev_addr_add_multiple - Add device addresses from another device + * @to_dev: device to which addresses will be added + * @from_dev: device from which addresses will be added + * @addr_type: address type - 0 means type will be used from from_dev + * + * Add device addresses of the one device to another. + ** + * The caller must hold the rtnl_mutex. + */ +int dev_addr_add_multiple(struct net_device *to_dev, + struct net_device *from_dev, + unsigned char addr_type) +{ + int err; + + ASSERT_RTNL(); + + if (from_dev->addr_len != to_dev->addr_len) + return -EINVAL; + err = __hw_addr_add_multiple_ii(&to_dev->dev_addr_list, + &from_dev->dev_addr_list, + to_dev->addr_len, addr_type, 0); + if (!err) + call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev); + return err; +} +EXPORT_SYMBOL(dev_addr_add_multiple); + +/** + * dev_addr_del_multiple - Delete device addresses by another device + * @to_dev: device where the addresses will be deleted + * @from_dev: device by which addresses the addresses will be deleted + * @addr_type: address type - 0 means type will used from from_dev + * + * Deletes addresses in to device by the list of addresses in from device. + * + * The caller must hold the rtnl_mutex. + */ +int dev_addr_del_multiple(struct net_device *to_dev, + struct net_device *from_dev, + unsigned char addr_type) +{ + ASSERT_RTNL(); + + if (from_dev->addr_len != to_dev->addr_len) + return -EINVAL; + __hw_addr_del_multiple_ii(&to_dev->dev_addr_list, + &from_dev->dev_addr_list, + to_dev->addr_len, addr_type, 0); + call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev); + return 0; +} +EXPORT_SYMBOL(dev_addr_del_multiple); + +/* unicast and multicast addresses handling functions */ + int __dev_addr_delete(struct dev_addr_list **list, int *count, void *addr, int alen, int glbl) { @@ -4776,6 +5022,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, dev->gso_max_size = GSO_MAX_SIZE; + dev_addr_init(dev); netdev_init_queues(dev); INIT_LIST_HEAD(&dev->napi_list); @@ -4801,6 +5048,9 @@ void free_netdev(struct net_device *dev) kfree(dev->_tx); + /* Flush device addresses */ + dev_addr_flush(dev); + list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) netif_napi_del(p); -- cgit v1.2.3-70-g09d2 From 2df75e415709ad12862028916c772c1f377f6a7c Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 6 May 2009 10:33:04 +0800 Subject: tracing/events: fix memory leak when unloading module When unloading a module, memory allocated by init_preds() and trace_define_field() is not freed. [ Impact: fix memory leak ] Signed-off-by: Li Zefan Acked-by: Frederic Weisbecker Acked-by: Steven Rostedt Cc: Tom Zanussi LKML-Reference: <4A00F6E0.3040503@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 1 + kernel/trace/trace_events.c | 18 ++++++++++++++++++ kernel/trace/trace_events_filter.c | 22 +++++++++++++++------- 3 files changed, 34 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 5fff40c9ff5..662c1becf36 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -116,6 +116,7 @@ struct ftrace_event_call { #define MAX_FILTER_STR_VAL 128 extern int init_preds(struct ftrace_event_call *call); +extern void destroy_preds(struct ftrace_event_call *call); extern int filter_match_preds(struct ftrace_event_call *call, void *rec); extern int filter_current_check_discard(struct ftrace_event_call *call, void *rec, diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index f789ca540fe..f251a150e75 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -60,6 +60,22 @@ err: } EXPORT_SYMBOL_GPL(trace_define_field); +#ifdef CONFIG_MODULES + +static void trace_destroy_fields(struct ftrace_event_call *call) +{ + struct ftrace_event_field *field, *next; + + list_for_each_entry_safe(field, next, &call->fields, link) { + list_del(&field->link); + kfree(field->type); + kfree(field->name); + kfree(field); + } +} + +#endif /* CONFIG_MODULES */ + static void ftrace_clear_events(void) { struct ftrace_event_call *call; @@ -925,6 +941,8 @@ static void trace_module_remove_events(struct module *mod) unregister_ftrace_event(call->event); debugfs_remove_recursive(call->dir); list_del(&call->list); + trace_destroy_fields(call); + destroy_preds(call); } } diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index f49486687ee..ce07b818671 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -346,6 +346,20 @@ static void filter_disable_preds(struct ftrace_event_call *call) filter->preds[i]->fn = filter_pred_none; } +void destroy_preds(struct ftrace_event_call *call) +{ + struct event_filter *filter = call->filter; + int i; + + for (i = 0; i < MAX_FILTER_PRED; i++) { + if (filter->preds[i]) + filter_free_pred(filter->preds[i]); + } + kfree(filter->preds); + kfree(filter); + call->filter = NULL; +} + int init_preds(struct ftrace_event_call *call) { struct event_filter *filter; @@ -374,13 +388,7 @@ int init_preds(struct ftrace_event_call *call) return 0; oom: - for (i = 0; i < MAX_FILTER_PRED; i++) { - if (filter->preds[i]) - filter_free_pred(filter->preds[i]); - } - kfree(filter->preds); - kfree(call->filter); - call->filter = NULL; + destroy_preds(call); return -ENOMEM; } -- cgit v1.2.3-70-g09d2 From 22cfbbfd9f67b67fe073010f51cb71d3632387d5 Mon Sep 17 00:00:00 2001 From: john stultz Date: Wed, 6 May 2009 11:43:57 +0200 Subject: ntp: adjust SHIFT_PLL to improve NTP convergence The conversion to the ntpv4 reference model f19923937321244e7dc334767eb4b67e0e3d5c74 ("ntp: convert to the NTP4 reference model") in 2.6.19 added nanosecond resolution the adjtimex interface, but also changed the "stiffness" of the frequency adjustments, causing NTP convergence time to greatly increase. SHIFT_PLL, which reduces the stiffness of the freq adjustments, was designed to be inversely linked to HZ, and the reference value of 4 was designed for Unix systems using HZ=100. However Linux's clock steering code mostly independent of HZ. So this patch reduces the SHIFT_PLL value from 4 to 2, which causes NTPd behavior to match kernels prior to 2.6.19, greatly reducing convergence times, and improving close synchronization through environmental thermal changes. The patch also changes some l's to L's in nearby code to avoid misreading 50l as 501. [ Impact: tweak NTP algorithm for faster convergence ] Signed-off-by: John Stultz Acked-by: Rik van Riel Cc: zippel@linux-m68k.org Signed-off-by: Andrew Morton LKML-Reference: <200905051956.n45JuVo9025575@imap1.linux-foundation.org> Signed-off-by: Ingo Molnar --- include/linux/timex.h | 42 +++++++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/timex.h b/include/linux/timex.h index aa3475fcff6..0daf9611ef4 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -170,17 +170,37 @@ struct timex { #include /* - * SHIFT_KG and SHIFT_KF establish the damping of the PLL and are chosen - * for a slightly underdamped convergence characteristic. SHIFT_KH - * establishes the damping of the FLL and is chosen by wisdom and black - * art. + * SHIFT_PLL is used as a dampening factor to define how much we + * adjust the frequency correction for a given offset in PLL mode. + * It also used in dampening the offset correction, to define how + * much of the current value in time_offset we correct for each + * second. Changing this value changes the stiffness of the ntp + * adjustment code. A lower value makes it more flexible, reducing + * NTP convergence time. A higher value makes it stiffer, increasing + * convergence time, but making the clock more stable. * - * MAXTC establishes the maximum time constant of the PLL. With the - * SHIFT_KG and SHIFT_KF values given and a time constant range from - * zero to MAXTC, the PLL will converge in 15 minutes to 16 hours, - * respectively. + * In David Mills' nanokenrel reference implmentation SHIFT_PLL is 4. + * However this seems to increase convergence time much too long. + * + * https://lists.ntp.org/pipermail/hackers/2008-January/003487.html + * + * In the above mailing list discussion, it seems the value of 4 + * was appropriate for other Unix systems with HZ=100, and that + * SHIFT_PLL should be decreased as HZ increases. However, Linux's + * clock steering implementation is HZ independent. + * + * Through experimentation, a SHIFT_PLL value of 2 was found to allow + * for fast convergence (very similar to the NTPv3 code used prior to + * v2.6.19), with good clock stability. + * + * + * SHIFT_FLL is used as a dampening factor to define how much we + * adjust the frequency correction for a given offset in FLL mode. + * In David Mills' nanokenrel reference implmentation SHIFT_PLL is 2. + * + * MAXTC establishes the maximum time constant of the PLL. */ -#define SHIFT_PLL 4 /* PLL frequency factor (shift) */ +#define SHIFT_PLL 2 /* PLL frequency factor (shift) */ #define SHIFT_FLL 2 /* FLL frequency factor (shift) */ #define MAXTC 10 /* maximum time constant (shift) */ @@ -192,10 +212,10 @@ struct timex { #define SHIFT_USEC 16 /* frequency offset scale (shift) */ #define PPM_SCALE ((s64)NSEC_PER_USEC << (NTP_SCALE_SHIFT - SHIFT_USEC)) #define PPM_SCALE_INV_SHIFT 19 -#define PPM_SCALE_INV ((1ll << (PPM_SCALE_INV_SHIFT + NTP_SCALE_SHIFT)) / \ +#define PPM_SCALE_INV ((1LL << (PPM_SCALE_INV_SHIFT + NTP_SCALE_SHIFT)) / \ PPM_SCALE + 1) -#define MAXPHASE 500000000l /* max phase error (ns) */ +#define MAXPHASE 500000000L /* max phase error (ns) */ #define MAXFREQ 500000 /* max frequency error (ns/s) */ #define MAXFREQ_SCALED ((s64)MAXFREQ << NTP_SCALE_SHIFT) #define MINSEC 256 /* min interval between updates (s) */ -- cgit v1.2.3-70-g09d2 From de1d7286060430e79a1d50ad6e5fee8fe863c5f6 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 5 May 2009 16:49:59 +0800 Subject: tracepoint: trace_sched_migrate_task(): remove parameter The orig_cpu parameter in trace_sched_migrate_task() is not necessary, it can be got by using task_cpu(p) in the probe. [ Impact: micro-optimization ] Signed-off-by: Mathieu Desnoyers [ modified from Mathieu's patch. The original patch is at: http://marc.info/?l=linux-kernel&m=123791201716239&w=2 ] Signed-off-by: Xiao Guangrong Cc: fweisbec@gmail.com Cc: rostedt@goodmis.org Cc: Li Zefan Cc: zhaolei@cn.fujitsu.com Cc: laijs@cn.fujitsu.com LKML-Reference: <49FFFDB7.1050402@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/sched.h | 6 +++--- kernel/sched.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index ffa1cab586b..dd4033cf5b0 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -180,9 +180,9 @@ TRACE_EVENT(sched_switch, */ TRACE_EVENT(sched_migrate_task, - TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu), + TP_PROTO(struct task_struct *p, int dest_cpu), - TP_ARGS(p, orig_cpu, dest_cpu), + TP_ARGS(p, dest_cpu), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) @@ -196,7 +196,7 @@ TRACE_EVENT(sched_migrate_task, memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; __entry->prio = p->prio; - __entry->orig_cpu = orig_cpu; + __entry->orig_cpu = task_cpu(p); __entry->dest_cpu = dest_cpu; ), diff --git a/kernel/sched.c b/kernel/sched.c index 9f7ffd00b6e..9cdedbd181c 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1954,7 +1954,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) clock_offset = old_rq->clock - new_rq->clock; - trace_sched_migrate_task(p, task_cpu(p), new_cpu); + trace_sched_migrate_task(p, new_cpu); #ifdef CONFIG_SCHEDSTATS if (p->se.wait_start) -- cgit v1.2.3-70-g09d2 From a42aaa3bbce85ac487ad4fad5db99e8e91b7aac1 Mon Sep 17 00:00:00 2001 From: "Alan D. Brunelle" Date: Mon, 4 May 2009 16:27:26 -0400 Subject: blktrace: correct remap names This attempts to clarify names utilized during block I/O remap operations (partition, volume manager). It correctly matches up the /from/ information for both device & sector. This takes in the concept from Kosaki Motohiro and extends it to include better naming for the "device_from" field. [ Impact: cleanup ] Signed-off-by: Alan D. Brunelle Reviewed-by: Li Zefan Reviewed-by: KOSAKI Motohiro Cc: Jens Axboe Cc: Arnaldo Carvalho de Melo LKML-Reference: <49FF4FAE.3000301@hp.com> Signed-off-by: Ingo Molnar --- include/linux/blktrace_api.h | 4 ++-- include/trace/block.h | 4 ++-- kernel/trace/blktrace.c | 24 ++++++++++++------------ 3 files changed, 16 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 62763c95285..82b4636030e 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -116,9 +116,9 @@ struct blk_io_trace { * The remap event */ struct blk_io_trace_remap { - __be32 device; __be32 device_from; - __be64 sector; + __be32 device_to; + __be64 sector_from; }; enum { diff --git a/include/trace/block.h b/include/trace/block.h index 25b7068b819..87f6456fd32 100644 --- a/include/trace/block.h +++ b/include/trace/block.h @@ -70,7 +70,7 @@ DECLARE_TRACE(block_split, DECLARE_TRACE(block_remap, TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, - sector_t from, sector_t to), - TP_ARGS(q, bio, dev, from, to)); + sector_t to, sector_t from), + TP_ARGS(q, bio, dev, to, from)); #endif diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index c32062bd10b..f8d46d6f5d3 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -830,8 +830,8 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio, * @q: queue the io is for * @bio: the source bio * @dev: target device - * @from: source sector * @to: target sector + * @from: source sector * * Description: * Device mapper or raid target sometimes need to split a bio because @@ -839,7 +839,7 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio, * **/ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio, - dev_t dev, sector_t from, sector_t to) + dev_t dev, sector_t to, sector_t from) { struct blk_trace *bt = q->blk_trace; struct blk_io_trace_remap r; @@ -847,9 +847,9 @@ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio, if (likely(!bt)) return; - r.device = cpu_to_be32(dev); - r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev); - r.sector = cpu_to_be64(to); + r.device_from = cpu_to_be32(dev); + r.device_to = cpu_to_be32(bio->bi_bdev->bd_dev); + r.sector_from = cpu_to_be64(from); __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r); @@ -1028,11 +1028,11 @@ static void get_pdu_remap(const struct trace_entry *ent, struct blk_io_trace_remap *r) { const struct blk_io_trace_remap *__r = pdu_start(ent); - __u64 sector = __r->sector; + __u64 sector_from = __r->sector_from; - r->device = be32_to_cpu(__r->device); r->device_from = be32_to_cpu(__r->device_from); - r->sector = be64_to_cpu(sector); + r->device_to = be32_to_cpu(__r->device_to); + r->sector_from = be64_to_cpu(sector_from); } typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act); @@ -1148,13 +1148,13 @@ static int blk_log_with_error(struct trace_seq *s, static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent) { - struct blk_io_trace_remap r = { .device = 0, }; + struct blk_io_trace_remap r = { .device_from = 0, }; get_pdu_remap(ent, &r); return trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n", - t_sector(ent), - t_sec(ent), MAJOR(r.device), MINOR(r.device), - (unsigned long long)r.sector); + t_sector(ent), t_sec(ent), + MAJOR(r.device_from), MINOR(r.device_from), + (unsigned long long)r.sector_from); } static int blk_log_plug(struct trace_seq *s, const struct trace_entry *ent) -- cgit v1.2.3-70-g09d2 From 22a7c31a9659deaddafbbcec6562d44141e84474 Mon Sep 17 00:00:00 2001 From: "Alan D. Brunelle" Date: Mon, 4 May 2009 16:35:08 -0400 Subject: blktrace: from-sector redundant in trace_block_remap Remove redundant from-sector parameter: it's /always/ the bio's sector passed in. [ Impact: cleanup ] Signed-off-by: Alan D. Brunelle Reviewed-by: Li Zefan Reviewed-by: KOSAKI Motohiro Cc: Jens Axboe Cc: Arnaldo Carvalho de Melo LKML-Reference: <49FF517C.7000503@hp.com> Signed-off-by: Ingo Molnar --- block/blk-core.c | 5 ++--- drivers/md/dm.c | 3 +-- include/trace/block.h | 4 ++-- kernel/trace/blktrace.c | 8 ++++---- 4 files changed, 9 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index 07ab75403e1..a5f747a8312 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1275,7 +1275,7 @@ static inline void blk_partition_remap(struct bio *bio) bio->bi_bdev = bdev->bd_contains; trace_block_remap(bdev_get_queue(bio->bi_bdev), bio, - bdev->bd_dev, bio->bi_sector, + bdev->bd_dev, bio->bi_sector - p->start_sect); } } @@ -1444,8 +1444,7 @@ static inline void __generic_make_request(struct bio *bio) goto end_io; if (old_sector != -1) - trace_block_remap(q, bio, old_dev, bio->bi_sector, - old_sector); + trace_block_remap(q, bio, old_dev, old_sector); trace_block_bio_queue(q, bio); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 8a994be035b..b01514afb6b 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -657,8 +657,7 @@ static void __map_bio(struct dm_target *ti, struct bio *clone, /* the bio has been remapped so dispatch it */ trace_block_remap(bdev_get_queue(clone->bi_bdev), clone, - tio->io->bio->bi_bdev->bd_dev, - clone->bi_sector, sector); + tio->io->bio->bi_bdev->bd_dev, sector); generic_make_request(clone); } else if (r < 0 || r == DM_MAPIO_REQUEUE) { diff --git a/include/trace/block.h b/include/trace/block.h index 87f6456fd32..8ac945b7746 100644 --- a/include/trace/block.h +++ b/include/trace/block.h @@ -70,7 +70,7 @@ DECLARE_TRACE(block_split, DECLARE_TRACE(block_remap, TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, - sector_t to, sector_t from), - TP_ARGS(q, bio, dev, to, from)); + sector_t to), + TP_ARGS(q, bio, dev, to)); #endif diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index f8d46d6f5d3..e099f8cc1d1 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -830,7 +830,6 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio, * @q: queue the io is for * @bio: the source bio * @dev: target device - * @to: target sector * @from: source sector * * Description: @@ -839,7 +838,7 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio, * **/ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio, - dev_t dev, sector_t to, sector_t from) + dev_t dev, sector_t from) { struct blk_trace *bt = q->blk_trace; struct blk_io_trace_remap r; @@ -851,8 +850,9 @@ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio, r.device_to = cpu_to_be32(bio->bi_bdev->bd_dev); r.sector_from = cpu_to_be64(from); - __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, - !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r); + __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, + BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), + sizeof(r), &r); } /** -- cgit v1.2.3-70-g09d2 From 9ccebe6148bcb0aba2d89743df2ff182ced505ec Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 Apr 2009 10:32:36 +0200 Subject: mac80211: rename max_sleep_interval to max_sleep_period Kalle points out that max_sleep_interval is somewhat confusing because the value is measured in beacon intervals, and not in TU. Rename it to max_sleep_period to be consistent with things like DTIM period that are also measured in beacon intervals. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 4 ++-- net/mac80211/mlme.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 446dbf75a1c..81d706d8522 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -559,7 +559,7 @@ enum ieee80211_conf_changed { * @beacon_int: beacon interval (TODO make interface config) * * @listen_interval: listen interval in units of beacon interval - * @max_sleep_interval: the maximum number of beacon intervals to sleep for + * @max_sleep_period: the maximum number of beacon intervals to sleep for * before checking the beacon for a TIM bit (managed mode only); this * value will be only achievable between DTIM frames, the hardware * needs to check for the multicast traffic bit in DTIM beacons. @@ -584,7 +584,7 @@ struct ieee80211_conf { int beacon_int; u32 flags; int power_level, dynamic_ps_timeout; - int max_sleep_interval; + int max_sleep_period; u16 listen_interval; bool radio_enabled; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 2029b71eb87..f8925ca7c8f 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -562,7 +562,7 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency) maxslp = min_t(int, dtimper, latency / beaconint_us); - local->hw.conf.max_sleep_interval = maxslp; + local->hw.conf.max_sleep_period = maxslp; local->ps_sdata = found; } } else { -- cgit v1.2.3-70-g09d2 From 57c4d7b4c4986037be51476b8e3025d5ba18d8b8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 Apr 2009 16:10:04 +0200 Subject: mac80211: clean up beacon interval settings We currently have two beacon interval configuration knobs: hw.conf.beacon_int and vif.bss_info.beacon_int. This is rather confusing, even though the former is used when we beacon ourselves and the latter when we are associated to an AP. This just deprecates the hw.conf.beacon_int setting in favour of always using vif.bss_info.beacon_int. Since it touches all the beaconing IBSS code anyway, we can also add support for the cfg80211 IBSS beacon interval configuration easily. NOTE: The hw.conf.beacon_int setting is retained for now due to drivers still using it -- I couldn't untangle all drivers, some are updated in this patch. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ar9170/mac.c | 6 +++--- drivers/net/wireless/ath/ar9170/main.c | 5 ++++- drivers/net/wireless/ath/ath5k/base.c | 5 ++++- drivers/net/wireless/ath/ath9k/ath9k.h | 2 ++ drivers/net/wireless/ath/ath9k/beacon.c | 9 +++------ drivers/net/wireless/ath/ath9k/main.c | 22 ++++++++++++---------- drivers/net/wireless/ath/ath9k/xmit.c | 2 +- drivers/net/wireless/b43/main.c | 12 +++++++----- drivers/net/wireless/b43legacy/main.c | 10 +++++----- drivers/net/wireless/iwlwifi/iwl-agn.c | 3 ++- drivers/net/wireless/iwlwifi/iwl-core.c | 3 +-- drivers/net/wireless/iwlwifi/iwl3945-base.c | 5 ++--- drivers/net/wireless/libertas_tf/main.c | 2 +- drivers/net/wireless/mac80211_hwsim.c | 20 ++++++++++++-------- include/net/mac80211.h | 14 ++++++++++++-- net/mac80211/cfg.c | 17 +++++------------ net/mac80211/ibss.c | 25 +++++++++++++++++-------- net/mac80211/main.c | 17 +++++++++++------ net/mac80211/mlme.c | 1 + net/mac80211/sta_info.c | 12 ++++++------ net/mac80211/tx.c | 2 +- 21 files changed, 112 insertions(+), 82 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/ath/ar9170/mac.c b/drivers/net/wireless/ath/ar9170/mac.c index c8fa3073169..0e5967dd119 100644 --- a/drivers/net/wireless/ath/ar9170/mac.c +++ b/drivers/net/wireless/ath/ar9170/mac.c @@ -316,9 +316,9 @@ int ar9170_set_beacon_timers(struct ar9170 *ar) u32 v = 0; u32 pretbtt = 0; - v |= ar->hw->conf.beacon_int; - if (ar->vif) { + v |= ar->vif->bss_conf.beacon_int; + switch (ar->vif->type) { case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_ADHOC: @@ -326,7 +326,7 @@ int ar9170_set_beacon_timers(struct ar9170 *ar) break; case NL80211_IFTYPE_AP: v |= BIT(24); - pretbtt = (ar->hw->conf.beacon_int - 6) << 16; + pretbtt = (ar->vif->bss_conf.beacon_int - 6) << 16; break; default: break; diff --git a/drivers/net/wireless/ath/ar9170/main.c b/drivers/net/wireless/ath/ar9170/main.c index 1b60906b80c..49c729bc714 100644 --- a/drivers/net/wireless/ath/ar9170/main.c +++ b/drivers/net/wireless/ath/ar9170/main.c @@ -1337,7 +1337,7 @@ static int ar9170_op_config(struct ieee80211_hw *hw, u32 changed) goto out; } - if (changed & IEEE80211_CONF_CHANGE_BEACON_INTERVAL) { + if (changed & BSS_CHANGED_BEACON_INT) { err = ar9170_set_beacon_timers(ar); if (err) goto out; @@ -1499,6 +1499,9 @@ static void ar9170_op_bss_info_changed(struct ieee80211_hw *hw, #endif /* CONFIG_AR9170_LEDS */ } + if (changed & BSS_CHANGED_BEACON_INT) + err = ar9170_set_beacon_timers(ar); + if (changed & BSS_CHANGED_HT) { /* TODO */ err = 0; diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c index c8c658bfcf9..e4b1d9efa42 100644 --- a/drivers/net/wireless/ath/ath5k/base.c +++ b/drivers/net/wireless/ath/ath5k/base.c @@ -2756,7 +2756,6 @@ ath5k_config(struct ieee80211_hw *hw, u32 changed) mutex_lock(&sc->lock); - sc->bintval = conf->beacon_int; sc->power_level = conf->power_level; ret = ath5k_chan_set(sc, conf->channel); @@ -3083,6 +3082,10 @@ static void ath5k_bss_info_changed(struct ieee80211_hw *hw, u32 changes) { struct ath5k_softc *sc = hw->priv; + + if (changes & BSS_CHANGED_BEACON_INT) + sc->bintval = bss_conf->beacon_int; + if (changes & BSS_CHANGED_ASSOC) { mutex_lock(&sc->lock); sc->assoc = bss_conf->assoc; diff --git a/drivers/net/wireless/ath/ath9k/ath9k.h b/drivers/net/wireless/ath/ath9k/ath9k.h index c92d46fa9d5..d5084ddf44f 100644 --- a/drivers/net/wireless/ath/ath9k/ath9k.h +++ b/drivers/net/wireless/ath/ath9k/ath9k.h @@ -590,6 +590,8 @@ struct ath_softc { int led_on_cnt; int led_off_cnt; + int beacon_interval; + struct ath_rfkill rf_kill; struct ath_ani ani; struct ath9k_node_stats nodestats; diff --git a/drivers/net/wireless/ath/ath9k/beacon.c b/drivers/net/wireless/ath/ath9k/beacon.c index eb4759fc6a0..c17b8382e32 100644 --- a/drivers/net/wireless/ath/ath9k/beacon.c +++ b/drivers/net/wireless/ath/ath9k/beacon.c @@ -320,8 +320,7 @@ int ath_beacon_alloc(struct ath_wiphy *aphy, struct ieee80211_vif *vif) u64 tsfadjust; int intval; - intval = sc->hw->conf.beacon_int ? - sc->hw->conf.beacon_int : ATH_DEFAULT_BINTVAL; + intval = sc->beacon_interval ? : ATH_DEFAULT_BINTVAL; /* * Calculate the TSF offset for this beacon slot, i.e., the @@ -431,8 +430,7 @@ void ath_beacon_tasklet(unsigned long data) * on the tsf to safeguard against missing an swba. */ - intval = sc->hw->conf.beacon_int ? - sc->hw->conf.beacon_int : ATH_DEFAULT_BINTVAL; + intval = sc->beacon_interval ? : ATH_DEFAULT_BINTVAL; tsf = ath9k_hw_gettsf64(ah); tsftu = TSF_TO_TU(tsf>>32, tsf); @@ -711,8 +709,7 @@ void ath_beacon_config(struct ath_softc *sc, struct ieee80211_vif *vif) /* Setup the beacon configuration parameters */ memset(&conf, 0, sizeof(struct ath_beacon_config)); - conf.beacon_interval = sc->hw->conf.beacon_int ? - sc->hw->conf.beacon_int : ATH_DEFAULT_BINTVAL; + conf.beacon_interval = sc->beacon_interval ? : ATH_DEFAULT_BINTVAL; conf.listen_interval = 1; conf.dtim_period = conf.beacon_interval; conf.dtim_count = 1; diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 2398d4f45f2..28cc9cd52f3 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -2358,16 +2358,6 @@ skip_chan_change: if (changed & IEEE80211_CONF_CHANGE_POWER) sc->config.txpowlimit = 2 * conf->power_level; - /* - * The HW TSF has to be reset when the beacon interval changes. - * We set the flag here, and ath_beacon_config_ap() would take this - * into account when it gets called through the subsequent - * config_interface() call - with IFCC_BEACON in the changed field. - */ - - if (changed & IEEE80211_CONF_CHANGE_BEACON_INTERVAL) - sc->sc_flags |= SC_OP_TSF_RESET; - mutex_unlock(&sc->mutex); return 0; @@ -2635,6 +2625,18 @@ static void ath9k_bss_info_changed(struct ieee80211_hw *hw, ath9k_bss_assoc_info(sc, vif, bss_conf); } + /* + * The HW TSF has to be reset when the beacon interval changes. + * We set the flag here, and ath_beacon_config_ap() would take this + * into account when it gets called through the subsequent + * config_interface() call - with IFCC_BEACON in the changed field. + */ + + if (changed & BSS_CHANGED_BEACON_INT) { + sc->sc_flags |= SC_OP_TSF_RESET; + sc->beacon_interval = bss_conf->beacon_int; + } + mutex_unlock(&sc->mutex); } diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index faf2cab49ea..501493ffc3a 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -971,7 +971,7 @@ int ath_cabq_update(struct ath_softc *sc) else if (sc->config.cabqReadytime > ATH9K_READY_TIME_HI_BOUND) sc->config.cabqReadytime = ATH9K_READY_TIME_HI_BOUND; - qi.tqi_readyTime = (sc->hw->conf.beacon_int * + qi.tqi_readyTime = (sc->beacon_interval * sc->config.cabqReadytime) / 100; ath_txq_update(sc, qnum, &qi); diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index a97c6ff0f12..3c693e6ec90 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -3468,11 +3468,6 @@ static int b43_op_config(struct ieee80211_hw *hw, u32 changed) if (phy->ops->set_rx_antenna) phy->ops->set_rx_antenna(dev, antenna); - /* Update templates for AP/mesh mode. */ - if (b43_is_mode(wl, NL80211_IFTYPE_AP) || - b43_is_mode(wl, NL80211_IFTYPE_MESH_POINT)) - b43_set_beacon_int(dev, conf->beacon_int); - if (!!conf->radio_enabled != phy->radio_on) { if (conf->radio_enabled) { b43_software_rfkill(dev, RFKILL_STATE_UNBLOCKED); @@ -3556,6 +3551,13 @@ static void b43_op_bss_info_changed(struct ieee80211_hw *hw, goto out_unlock_mutex; b43_mac_suspend(dev); + /* Update templates for AP/mesh mode. */ + if (changed & BSS_CHANGED_BEACON_INT && + (b43_is_mode(wl, NL80211_IFTYPE_AP) || + b43_is_mode(wl, NL80211_IFTYPE_MESH_POINT) || + b43_is_mode(wl, NL80211_IFTYPE_ADHOC))) + b43_set_beacon_int(dev, conf->beacon_int); + if (changed & BSS_CHANGED_BASIC_RATES) b43_update_basic_rates(dev, conf->basic_rates); diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c index ee202b4f77b..276f314688f 100644 --- a/drivers/net/wireless/b43legacy/main.c +++ b/drivers/net/wireless/b43legacy/main.c @@ -2721,11 +2721,6 @@ static int b43legacy_op_dev_config(struct ieee80211_hw *hw, /* Antennas for RX and management frame TX. */ b43legacy_mgmtframe_txantenna(dev, antenna_tx); - /* Update templates for AP mode. */ - if (b43legacy_is_mode(wl, NL80211_IFTYPE_AP)) - b43legacy_set_beacon_int(dev, conf->beacon_int); - - if (!!conf->radio_enabled != phy->radio_on) { if (conf->radio_enabled) { b43legacy_radio_turn_on(dev); @@ -2827,6 +2822,11 @@ static void b43legacy_op_bss_info_changed(struct ieee80211_hw *hw, b43legacy_mac_suspend(dev); + if (changed & BSS_CHANGED_BEACON_INT && + (b43legacy_is_mode(wl, NL80211_IFTYPE_AP) || + b43legacy_is_mode(wl, NL80211_IFTYPE_ADHOC))) + b43legacy_set_beacon_int(dev, conf->beacon_int); + if (changed & BSS_CHANGED_BASIC_RATES) b43legacy_update_basic_rates(dev, conf->basic_rates); diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c index 277dfc57fde..4920dcf7d7b 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn.c @@ -567,7 +567,8 @@ static void iwl_setup_rxon_timing(struct iwl_priv *priv) beacon_int = iwl_adjust_beacon_interval(priv->beacon_int); priv->rxon_timing.atim_window = 0; } else { - beacon_int = iwl_adjust_beacon_interval(conf->beacon_int); + beacon_int = iwl_adjust_beacon_interval( + priv->vif->bss_conf.beacon_int); /* TODO: we need to get atim_window from upper stack * for now we set to 0 */ diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c index 3dec2d25fa3..cd8a5ed97c4 100644 --- a/drivers/net/wireless/iwlwifi/iwl-core.c +++ b/drivers/net/wireless/iwlwifi/iwl-core.c @@ -1313,7 +1313,6 @@ int iwl_setup_mac(struct iwl_priv *priv) /* Default value; 4 EDCA QOS priorities */ hw->queues = 4; - hw->conf.beacon_int = 100; hw->max_listen_interval = IWL_CONN_MAX_LISTEN_INTERVAL; if (priv->bands[IEEE80211_BAND_2GHZ].n_channels) @@ -2751,7 +2750,7 @@ void iwl_mac_reset_tsf(struct ieee80211_hw *hw) priv->ibss_beacon = NULL; - priv->beacon_int = priv->hw->conf.beacon_int; + priv->beacon_int = priv->vif->bss_conf.beacon_int; priv->timestamp = 0; if ((priv->iw_mode == NL80211_IFTYPE_STATION)) priv->beacon_int = 0; diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index a782292ed43..f9d9b65ef30 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -551,7 +551,8 @@ static void iwl3945_setup_rxon_timing(struct iwl_priv *priv) priv->rxon_timing.atim_window = 0; } else { priv->rxon_timing.beacon_interval = - iwl3945_adjust_beacon_interval(conf->beacon_int); + iwl3945_adjust_beacon_interval( + priv->vif->bss_conf.beacon_int); /* TODO: we need to get atim_window from upper stack * for now we set to 0 */ priv->rxon_timing.atim_window = 0; @@ -4211,8 +4212,6 @@ static int iwl3945_setup_mac(struct iwl_priv *priv) /* Default value; 4 EDCA QOS priorities */ hw->queues = 4; - hw->conf.beacon_int = 100; - if (priv->bands[IEEE80211_BAND_2GHZ].n_channels) priv->hw->wiphy->bands[IEEE80211_BAND_2GHZ] = &priv->bands[IEEE80211_BAND_2GHZ]; diff --git a/drivers/net/wireless/libertas_tf/main.c b/drivers/net/wireless/libertas_tf/main.c index e7289e2e7f1..68e47fb47a1 100644 --- a/drivers/net/wireless/libertas_tf/main.c +++ b/drivers/net/wireless/libertas_tf/main.c @@ -380,7 +380,7 @@ static int lbtf_op_config_interface(struct ieee80211_hw *hw, if (beacon) { lbtf_beacon_set(priv, beacon); kfree_skb(beacon); - lbtf_beacon_ctrl(priv, 1, hw->conf.beacon_int); + lbtf_beacon_ctrl(priv, 1, vif->bss_conf.beacon_int); } break; default: diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index d4fdc8b7d7d..24c95a619e4 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -553,18 +553,13 @@ static int mac80211_hwsim_config(struct ieee80211_hw *hw, u32 changed) struct mac80211_hwsim_data *data = hw->priv; struct ieee80211_conf *conf = &hw->conf; - printk(KERN_DEBUG "%s:%s (freq=%d radio_enabled=%d beacon_int=%d)\n", + printk(KERN_DEBUG "%s:%s (freq=%d radio_enabled=%d)\n", wiphy_name(hw->wiphy), __func__, - conf->channel->center_freq, conf->radio_enabled, - conf->beacon_int); + conf->channel->center_freq, conf->radio_enabled); data->channel = conf->channel; data->radio_enabled = conf->radio_enabled; - data->beacon_int = 1024 * conf->beacon_int / 1000 * HZ / 1000; - if (data->beacon_int < 1) - data->beacon_int = 1; - - if (!data->started || !data->radio_enabled) + if (!data->started || !data->radio_enabled || !data->beacon_int) del_timer(&data->beacon_timer); else mod_timer(&data->beacon_timer, jiffies + data->beacon_int); @@ -615,6 +610,7 @@ static void mac80211_hwsim_bss_info_changed(struct ieee80211_hw *hw, u32 changed) { struct hwsim_vif_priv *vp = (void *)vif->drv_priv; + struct mac80211_hwsim_data *data = hw->priv; hwsim_check_magic(vif); @@ -628,6 +624,14 @@ static void mac80211_hwsim_bss_info_changed(struct ieee80211_hw *hw, vp->aid = info->aid; } + if (changed & BSS_CHANGED_BEACON_INT) { + printk(KERN_DEBUG " %s: BCNINT: %d\n", + wiphy_name(hw->wiphy), info->beacon_int); + data->beacon_int = 1024 * info->beacon_int / 1000 * HZ / 1000; + if (WARN_ON(data->beacon_int)) + data->beacon_int = 1; + } + if (changed & BSS_CHANGED_ERP_CTS_PROT) { printk(KERN_DEBUG " %s: ERP_CTS_PROT: %d\n", wiphy_name(hw->wiphy), info->use_cts_prot); diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 81d706d8522..22c65e8cbb7 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -149,6 +149,7 @@ struct ieee80211_low_level_stats { * @BSS_CHANGED_ERP_SLOT: slot timing changed * @BSS_CHANGED_HT: 802.11n parameters changed * @BSS_CHANGED_BASIC_RATES: Basic rateset changed + * @BSS_CHANGED_BEACON_INT: Beacon interval changed */ enum ieee80211_bss_change { BSS_CHANGED_ASSOC = 1<<0, @@ -157,6 +158,7 @@ enum ieee80211_bss_change { BSS_CHANGED_ERP_SLOT = 1<<3, BSS_CHANGED_HT = 1<<4, BSS_CHANGED_BASIC_RATES = 1<<5, + BSS_CHANGED_BEACON_INT = 1<<6, }; /** @@ -529,7 +531,7 @@ enum ieee80211_conf_flags { * enum ieee80211_conf_changed - denotes which configuration changed * * @IEEE80211_CONF_CHANGE_RADIO_ENABLED: the value of radio_enabled changed - * @IEEE80211_CONF_CHANGE_BEACON_INTERVAL: the beacon interval changed + * @_IEEE80211_CONF_CHANGE_BEACON_INTERVAL: DEPRECATED * @IEEE80211_CONF_CHANGE_LISTEN_INTERVAL: the listen interval changed * @IEEE80211_CONF_CHANGE_RADIOTAP: the radiotap flag changed * @IEEE80211_CONF_CHANGE_PS: the PS flag or dynamic PS timeout changed @@ -539,7 +541,7 @@ enum ieee80211_conf_flags { */ enum ieee80211_conf_changed { IEEE80211_CONF_CHANGE_RADIO_ENABLED = BIT(0), - IEEE80211_CONF_CHANGE_BEACON_INTERVAL = BIT(1), + _IEEE80211_CONF_CHANGE_BEACON_INTERVAL = BIT(1), IEEE80211_CONF_CHANGE_LISTEN_INTERVAL = BIT(2), IEEE80211_CONF_CHANGE_RADIOTAP = BIT(3), IEEE80211_CONF_CHANGE_PS = BIT(4), @@ -548,6 +550,14 @@ enum ieee80211_conf_changed { IEEE80211_CONF_CHANGE_RETRY_LIMITS = BIT(7), }; +static inline __deprecated enum ieee80211_conf_changed +__IEEE80211_CONF_CHANGE_BEACON_INTERVAL(void) +{ + return _IEEE80211_CONF_CHANGE_BEACON_INTERVAL; +} +#define IEEE80211_CONF_CHANGE_BEACON_INTERVAL \ + __IEEE80211_CONF_CHANGE_BEACON_INTERVAL() + /** * struct ieee80211_conf - configuration of the device * diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 5e1c230744b..a898ccd3f2c 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -451,18 +451,11 @@ static int ieee80211_config_beacon(struct ieee80211_sub_if_data *sdata, * This is a kludge. beacon interval should really be part * of the beacon information. */ - if (params->interval && (sdata->local->hw.conf.beacon_int != - params->interval)) { - sdata->local->hw.conf.beacon_int = params->interval; - err = ieee80211_hw_config(sdata->local, - IEEE80211_CONF_CHANGE_BEACON_INTERVAL); - if (err < 0) - return err; - /* - * We updated some parameter so if below bails out - * it's not an error. - */ - err = 0; + if (params->interval && + (sdata->vif.bss_conf.beacon_int != params->interval)) { + sdata->vif.bss_conf.beacon_int = params->interval; + ieee80211_bss_info_change_notify(sdata, + BSS_CHANGED_BEACON_INT); } /* Need to have a beacon head if we don't have one yet */ diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 25ff583612e..f4879dad3cd 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -73,6 +73,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt; u8 *pos; struct ieee80211_supported_band *sband; + u32 bss_change; if (local->ops->reset_tsf) { /* Reset own TSF to allow time synchronization work. */ @@ -92,8 +93,6 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, memcpy(ifibss->bssid, bssid, ETH_ALEN); - local->hw.conf.beacon_int = beacon_int >= 10 ? beacon_int : 10; - sdata->drop_unencrypted = capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0; ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID); @@ -101,6 +100,12 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, local->oper_channel = chan; local->oper_channel_type = NL80211_CHAN_NO_HT; ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); + + sdata->vif.bss_conf.beacon_int = beacon_int; + bss_change = BSS_CHANGED_BEACON_INT; + bss_change |= ieee80211_reset_erp_info(sdata); + ieee80211_bss_info_change_notify(sdata, bss_change); + sband = local->hw.wiphy->bands[chan->band]; /* Build IBSS probe response */ @@ -111,7 +116,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, memset(mgmt->da, 0xff, ETH_ALEN); memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); memcpy(mgmt->bssid, ifibss->bssid, ETH_ALEN); - mgmt->u.beacon.beacon_int = cpu_to_le16(local->hw.conf.beacon_int); + mgmt->u.beacon.beacon_int = cpu_to_le16(beacon_int); mgmt->u.beacon.timestamp = cpu_to_le64(tsf); mgmt->u.beacon.capab_info = cpu_to_le16(capability); @@ -181,8 +186,13 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, struct ieee80211_bss *bss) { + u16 beacon_int = bss->cbss.beacon_interval; + + if (beacon_int < 10) + beacon_int = 10; + __ieee80211_sta_join_ibss(sdata, bss->cbss.bssid, - bss->cbss.beacon_interval, + beacon_int, bss->cbss.channel, bss->supp_rates_len, bss->supp_rates, bss->cbss.capability, @@ -464,9 +474,6 @@ static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata) sband = local->hw.wiphy->bands[ifibss->channel->band]; - if (local->hw.conf.beacon_int == 0) - local->hw.conf.beacon_int = 100; - capability = WLAN_CAPABILITY_IBSS; if (sdata->default_key) @@ -480,7 +487,7 @@ static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata) *pos++ = (u8) (rate / 5); } - __ieee80211_sta_join_ibss(sdata, bssid, local->hw.conf.beacon_int, + __ieee80211_sta_join_ibss(sdata, bssid, sdata->vif.bss_conf.beacon_int, ifibss->channel, sband->n_bitrates, supp_rates, capability, 0); } @@ -823,6 +830,8 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, } else sdata->u.ibss.fixed_bssid = false; + sdata->vif.bss_conf.beacon_int = params->beacon_interval; + sdata->u.ibss.channel = params->channel; sdata->u.ibss.fixed_channel = params->channel_fixed; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index e00d124e4ef..b254879d863 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -294,9 +294,6 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; - if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) - return; - if (!changed) return; @@ -305,6 +302,17 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, &sdata->vif, &sdata->vif.bss_conf, changed); + + /* + * DEPRECATED + * + * ~changed is just there to not do this at resume time + */ + if (changed & BSS_CHANGED_BEACON_INT && ~changed) { + local->hw.conf.beacon_int = sdata->vif.bss_conf.beacon_int; + ieee80211_hw_config(local, + _IEEE80211_CONF_CHANGE_BEACON_INTERVAL); + } } u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata) @@ -971,9 +979,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) debugfs_hw_add(local); - if (local->hw.conf.beacon_int < 10) - local->hw.conf.beacon_int = 100; - if (local->hw.max_listen_interval == 0) local->hw.max_listen_interval = 1; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a2f5e622305..bfd571e6f22 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -842,6 +842,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.timestamp = bss->cbss.tsf; sdata->vif.bss_conf.dtim_period = bss->dtim_period; + bss_info_changed |= BSS_CHANGED_BEACON_INT; bss_info_changed |= ieee80211_handle_bss_capability(sdata, bss->cbss.capability, bss->has_erp_value, bss->erp_value); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 654a8e963cc..7116220d06b 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -543,9 +543,8 @@ void sta_info_unlink(struct sta_info **sta) spin_unlock_irqrestore(&local->sta_lock, flags); } -static inline int sta_info_buffer_expired(struct ieee80211_local *local, - struct sta_info *sta, - struct sk_buff *skb) +static int sta_info_buffer_expired(struct sta_info *sta, + struct sk_buff *skb) { struct ieee80211_tx_info *info; int timeout; @@ -556,8 +555,9 @@ static inline int sta_info_buffer_expired(struct ieee80211_local *local, info = IEEE80211_SKB_CB(skb); /* Timeout: (2 * listen_interval * beacon_int * 1024 / 1000000) sec */ - timeout = (sta->listen_interval * local->hw.conf.beacon_int * 32 / - 15625) * HZ; + timeout = (sta->listen_interval * + sta->sdata->vif.bss_conf.beacon_int * + 32 / 15625) * HZ; if (timeout < STA_TX_BUFFER_EXPIRE) timeout = STA_TX_BUFFER_EXPIRE; return time_after(jiffies, info->control.jiffies + timeout); @@ -577,7 +577,7 @@ static void sta_info_cleanup_expire_buffered(struct ieee80211_local *local, for (;;) { spin_lock_irqsave(&sta->ps_tx_buf.lock, flags); skb = skb_peek(&sta->ps_tx_buf); - if (sta_info_buffer_expired(local, sta, skb)) + if (sta_info_buffer_expired(sta, skb)) skb = __skb_dequeue(&sta->ps_tx_buf); else skb = NULL; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 1865622003c..29df65045fc 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2132,7 +2132,7 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); /* BSSID is left zeroed, wildcard value */ mgmt->u.beacon.beacon_int = - cpu_to_le16(local->hw.conf.beacon_int); + cpu_to_le16(sdata->vif.bss_conf.beacon_int); mgmt->u.beacon.capab_info = 0x0; /* 0x0 for MPs */ pos = skb_put(skb, 2); -- cgit v1.2.3-70-g09d2 From 2d0ddec5b2b859f06116f631fc0ffe94fbceb556 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 Apr 2009 16:13:26 +0200 Subject: mac80211: unify config_interface and bss_info_changed The config_interface method is a little strange, it contains the BSSID and beacon updates, while bss_info_changed contains most other BSS information for each interface. This patch removes config_interface and rolls all the information it previously passed to drivers into bss_info_changed. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/adm8211.c | 14 ++- drivers/net/wireless/at76c50x-usb.c | 15 ++- drivers/net/wireless/ath/ar9170/main.c | 39 ++---- drivers/net/wireless/ath/ath5k/base.c | 71 +++++------ drivers/net/wireless/ath/ath9k/main.c | 182 +++++++++++++--------------- drivers/net/wireless/b43/main.c | 64 ++++------ drivers/net/wireless/b43legacy/main.c | 63 ++++------ drivers/net/wireless/iwlwifi/iwl-agn.c | 1 - drivers/net/wireless/iwlwifi/iwl-core.c | 162 +++++++++---------------- drivers/net/wireless/iwlwifi/iwl-core.h | 3 - drivers/net/wireless/iwlwifi/iwl3945-base.c | 1 - drivers/net/wireless/libertas_tf/main.c | 56 ++++----- drivers/net/wireless/mac80211_hwsim.c | 25 ++-- drivers/net/wireless/mwl8k.c | 18 +-- drivers/net/wireless/p54/p54common.c | 63 ++++------ drivers/net/wireless/rt2x00/rt2400pci.c | 1 - drivers/net/wireless/rt2x00/rt2500pci.c | 1 - drivers/net/wireless/rt2x00/rt2500usb.c | 1 - drivers/net/wireless/rt2x00/rt2x00.h | 3 - drivers/net/wireless/rt2x00/rt2x00mac.c | 88 ++++++-------- drivers/net/wireless/rt2x00/rt61pci.c | 1 - drivers/net/wireless/rt2x00/rt73usb.c | 1 - drivers/net/wireless/rtl818x/rtl8180_dev.c | 33 +++-- drivers/net/wireless/rtl818x/rtl8187_dev.c | 48 ++++---- drivers/net/wireless/zd1211rw/zd_mac.c | 80 +++++------- include/net/mac80211.h | 51 ++------ net/mac80211/cfg.c | 8 +- net/mac80211/ibss.c | 18 ++- net/mac80211/ieee80211_i.h | 1 - net/mac80211/main.c | 131 +++++++++----------- net/mac80211/mesh.c | 6 +- net/mac80211/mlme.c | 8 +- net/mac80211/scan.c | 8 +- net/mac80211/util.c | 17 +-- 34 files changed, 504 insertions(+), 778 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/adm8211.c b/drivers/net/wireless/adm8211.c index f7182179501..2b9e379994a 100644 --- a/drivers/net/wireless/adm8211.c +++ b/drivers/net/wireless/adm8211.c @@ -1311,18 +1311,20 @@ static int adm8211_config(struct ieee80211_hw *dev, u32 changed) return 0; } -static int adm8211_config_interface(struct ieee80211_hw *dev, - struct ieee80211_vif *vif, - struct ieee80211_if_conf *conf) +static void adm8211_bss_info_changed(struct ieee80211_hw *dev, + struct ieee80211_vif *vif, + struct ieee80211_bss_conf *conf, + u32 changes) { struct adm8211_priv *priv = dev->priv; + if (!(changes & BSS_CHANGED_BSSID)) + return; + if (memcmp(conf->bssid, priv->bssid, ETH_ALEN)) { adm8211_set_bssid(dev, conf->bssid); memcpy(priv->bssid, conf->bssid, ETH_ALEN); } - - return 0; } static void adm8211_configure_filter(struct ieee80211_hw *dev, @@ -1753,7 +1755,7 @@ static const struct ieee80211_ops adm8211_ops = { .add_interface = adm8211_add_interface, .remove_interface = adm8211_remove_interface, .config = adm8211_config, - .config_interface = adm8211_config_interface, + .bss_info_changed = adm8211_bss_info_changed, .configure_filter = adm8211_configure_filter, .get_stats = adm8211_get_stats, .get_tx_stats = adm8211_get_tx_stats, diff --git a/drivers/net/wireless/at76c50x-usb.c b/drivers/net/wireless/at76c50x-usb.c index 55f947ac56d..cea7f1466c5 100644 --- a/drivers/net/wireless/at76c50x-usb.c +++ b/drivers/net/wireless/at76c50x-usb.c @@ -1965,13 +1965,18 @@ static int at76_config(struct ieee80211_hw *hw, u32 changed) return 0; } -static int at76_config_interface(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct ieee80211_if_conf *conf) +static void at76_bss_info_changed(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_bss_conf *conf, + u32 changed) { struct at76_priv *priv = hw->priv; at76_dbg(DBG_MAC80211, "%s():", __func__); + + if (!(changed & BSS_CHANGED_BSSID)) + return; + at76_dbg_dump(DBG_MAC80211, conf->bssid, ETH_ALEN, "bssid:"); mutex_lock(&priv->mtx); @@ -1983,8 +1988,6 @@ static int at76_config_interface(struct ieee80211_hw *hw, at76_join(priv); mutex_unlock(&priv->mtx); - - return 0; } /* must be atomic */ @@ -2076,7 +2079,7 @@ static const struct ieee80211_ops at76_ops = { .add_interface = at76_add_interface, .remove_interface = at76_remove_interface, .config = at76_config, - .config_interface = at76_config_interface, + .bss_info_changed = at76_bss_info_changed, .configure_filter = at76_configure_filter, .start = at76_mac80211_start, .stop = at76_mac80211_stop, diff --git a/drivers/net/wireless/ath/ar9170/main.c b/drivers/net/wireless/ath/ar9170/main.c index 49c729bc714..3bd3a61225c 100644 --- a/drivers/net/wireless/ath/ar9170/main.c +++ b/drivers/net/wireless/ath/ar9170/main.c @@ -1360,33 +1360,6 @@ out: return err; } -static int ar9170_op_config_interface(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct ieee80211_if_conf *conf) -{ - struct ar9170 *ar = hw->priv; - int err = 0; - - mutex_lock(&ar->mutex); - - if (conf->changed & IEEE80211_IFCC_BSSID) { - memcpy(ar->bssid, conf->bssid, ETH_ALEN); - err = ar9170_set_operating_mode(ar); - } - - if (conf->changed & IEEE80211_IFCC_BEACON) { - err = ar9170_update_beacon(ar); - - if (err) - goto out; - err = ar9170_set_beacon_timers(ar); - } - -out: - mutex_unlock(&ar->mutex); - return err; -} - static void ar9170_set_filters(struct work_struct *work) { struct ar9170 *ar = container_of(work, struct ar9170, @@ -1488,6 +1461,17 @@ static void ar9170_op_bss_info_changed(struct ieee80211_hw *hw, mutex_lock(&ar->mutex); + if (changed & BSS_CHANGED_BSSID) { + memcpy(ar->bssid, bss_conf->bssid, ETH_ALEN); + err = ar9170_set_operating_mode(ar); + } + + if (changed & (BSS_CHANGED_BEACON | BSS_CHANGED_BEACON_ENABLED)) { + err = ar9170_update_beacon(ar); + if (!err) + ar9170_set_beacon_timers(ar); + } + ar9170_regwrite_begin(ar); if (changed & BSS_CHANGED_ASSOC) { @@ -1796,7 +1780,6 @@ static const struct ieee80211_ops ar9170_ops = { .add_interface = ar9170_op_add_interface, .remove_interface = ar9170_op_remove_interface, .config = ar9170_op_config, - .config_interface = ar9170_op_config_interface, .configure_filter = ar9170_op_configure_filter, .conf_tx = ar9170_conf_tx, .bss_info_changed = ar9170_op_bss_info_changed, diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c index e4b1d9efa42..410fc4cfc4c 100644 --- a/drivers/net/wireless/ath/ath5k/base.c +++ b/drivers/net/wireless/ath/ath5k/base.c @@ -227,9 +227,6 @@ static int ath5k_add_interface(struct ieee80211_hw *hw, static void ath5k_remove_interface(struct ieee80211_hw *hw, struct ieee80211_if_init_conf *conf); static int ath5k_config(struct ieee80211_hw *hw, u32 changed); -static int ath5k_config_interface(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct ieee80211_if_conf *conf); static void ath5k_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, unsigned int *new_flags, @@ -259,7 +256,6 @@ static const struct ieee80211_ops ath5k_hw_ops = { .add_interface = ath5k_add_interface, .remove_interface = ath5k_remove_interface, .config = ath5k_config, - .config_interface = ath5k_config_interface, .configure_filter = ath5k_configure_filter, .set_key = ath5k_set_key, .get_stats = ath5k_get_stats, @@ -2764,44 +2760,6 @@ ath5k_config(struct ieee80211_hw *hw, u32 changed) return ret; } -static int -ath5k_config_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - struct ieee80211_if_conf *conf) -{ - struct ath5k_softc *sc = hw->priv; - struct ath5k_hw *ah = sc->ah; - int ret = 0; - - mutex_lock(&sc->lock); - if (sc->vif != vif) { - ret = -EIO; - goto unlock; - } - if (conf->changed & IEEE80211_IFCC_BSSID && conf->bssid) { - /* Cache for later use during resets */ - memcpy(ah->ah_bssid, conf->bssid, ETH_ALEN); - /* XXX: assoc id is set to 0 for now, mac80211 doesn't have - * a clean way of letting us retrieve this yet. */ - ath5k_hw_set_associd(ah, ah->ah_bssid, 0); - mmiowb(); - } - if (conf->changed & IEEE80211_IFCC_BEACON && - (vif->type == NL80211_IFTYPE_ADHOC || - vif->type == NL80211_IFTYPE_MESH_POINT || - vif->type == NL80211_IFTYPE_AP)) { - struct sk_buff *beacon = ieee80211_beacon_get(hw, vif); - if (!beacon) { - ret = -ENOMEM; - goto unlock; - } - ath5k_beacon_update(sc, beacon); - } - -unlock: - mutex_unlock(&sc->lock); - return ret; -} - #define SUPPORTED_FIF_FLAGS \ FIF_PROMISC_IN_BSS | FIF_ALLMULTI | FIF_FCSFAIL | \ FIF_PLCPFAIL | FIF_CONTROL | FIF_OTHER_BSS | \ @@ -3082,15 +3040,40 @@ static void ath5k_bss_info_changed(struct ieee80211_hw *hw, u32 changes) { struct ath5k_softc *sc = hw->priv; + struct ath5k_hw *ah = sc->ah; + + mutex_lock(&sc->lock); + if (WARN_ON(sc->vif != vif)) + goto unlock; + + if (changes & BSS_CHANGED_BSSID) { + /* Cache for later use during resets */ + memcpy(ah->ah_bssid, bss_conf->bssid, ETH_ALEN); + /* XXX: assoc id is set to 0 for now, mac80211 doesn't have + * a clean way of letting us retrieve this yet. */ + ath5k_hw_set_associd(ah, ah->ah_bssid, 0); + mmiowb(); + } if (changes & BSS_CHANGED_BEACON_INT) sc->bintval = bss_conf->beacon_int; if (changes & BSS_CHANGED_ASSOC) { - mutex_lock(&sc->lock); sc->assoc = bss_conf->assoc; if (sc->opmode == NL80211_IFTYPE_STATION) set_beacon_filter(hw, sc->assoc); - mutex_unlock(&sc->lock); } + + if (changes & BSS_CHANGED_BEACON && + (vif->type == NL80211_IFTYPE_ADHOC || + vif->type == NL80211_IFTYPE_MESH_POINT || + vif->type == NL80211_IFTYPE_AP)) { + struct sk_buff *beacon = ieee80211_beacon_get(hw, vif); + + if (beacon) + ath5k_beacon_update(sc, beacon); + } + + unlock: + mutex_unlock(&sc->lock); } diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 28cc9cd52f3..d3dc8e2c77b 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -2363,104 +2363,6 @@ skip_chan_change: return 0; } -static int ath9k_config_interface(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct ieee80211_if_conf *conf) -{ - struct ath_wiphy *aphy = hw->priv; - struct ath_softc *sc = aphy->sc; - struct ath_hw *ah = sc->sc_ah; - struct ath_vif *avp = (void *)vif->drv_priv; - u32 rfilt = 0; - int error, i; - - mutex_lock(&sc->mutex); - - /* TODO: Need to decide which hw opmode to use for multi-interface - * cases */ - if (vif->type == NL80211_IFTYPE_AP && - ah->opmode != NL80211_IFTYPE_AP) { - ah->opmode = NL80211_IFTYPE_STATION; - ath9k_hw_setopmode(ah); - memcpy(sc->curbssid, sc->sc_ah->macaddr, ETH_ALEN); - sc->curaid = 0; - ath9k_hw_write_associd(sc); - /* Request full reset to get hw opmode changed properly */ - sc->sc_flags |= SC_OP_FULL_RESET; - } - - if ((conf->changed & IEEE80211_IFCC_BSSID) && - !is_zero_ether_addr(conf->bssid)) { - switch (vif->type) { - case NL80211_IFTYPE_STATION: - case NL80211_IFTYPE_ADHOC: - case NL80211_IFTYPE_MESH_POINT: - /* Set BSSID */ - memcpy(sc->curbssid, conf->bssid, ETH_ALEN); - memcpy(avp->bssid, conf->bssid, ETH_ALEN); - sc->curaid = 0; - ath9k_hw_write_associd(sc); - - /* Set aggregation protection mode parameters */ - sc->config.ath_aggr_prot = 0; - - DPRINTF(sc, ATH_DBG_CONFIG, - "RX filter 0x%x bssid %pM aid 0x%x\n", - rfilt, sc->curbssid, sc->curaid); - - /* need to reconfigure the beacon */ - sc->sc_flags &= ~SC_OP_BEACONS ; - - break; - default: - break; - } - } - - if ((vif->type == NL80211_IFTYPE_ADHOC) || - (vif->type == NL80211_IFTYPE_AP) || - (vif->type == NL80211_IFTYPE_MESH_POINT)) { - if ((conf->changed & IEEE80211_IFCC_BEACON) || - (conf->changed & IEEE80211_IFCC_BEACON_ENABLED && - conf->enable_beacon)) { - /* - * Allocate and setup the beacon frame. - * - * Stop any previous beacon DMA. This may be - * necessary, for example, when an ibss merge - * causes reconfiguration; we may be called - * with beacon transmission active. - */ - ath9k_hw_stoptxdma(sc->sc_ah, sc->beacon.beaconq); - - error = ath_beacon_alloc(aphy, vif); - if (error != 0) { - mutex_unlock(&sc->mutex); - return error; - } - - ath_beacon_config(sc, vif); - } - } - - /* Check for WLAN_CAPABILITY_PRIVACY ? */ - if ((avp->av_opmode != NL80211_IFTYPE_STATION)) { - for (i = 0; i < IEEE80211_WEP_NKID; i++) - if (ath9k_hw_keyisvalid(sc->sc_ah, (u16)i)) - ath9k_hw_keysetmac(sc->sc_ah, - (u16)i, - sc->curbssid); - } - - /* Only legacy IBSS for now */ - if (vif->type == NL80211_IFTYPE_ADHOC) - ath_update_chainmask(sc, 0); - - mutex_unlock(&sc->mutex); - - return 0; -} - #define SUPPORTED_FILTERS \ (FIF_PROMISC_IN_BSS | \ FIF_ALLMULTI | \ @@ -2597,9 +2499,92 @@ static void ath9k_bss_info_changed(struct ieee80211_hw *hw, { struct ath_wiphy *aphy = hw->priv; struct ath_softc *sc = aphy->sc; + struct ath_hw *ah = sc->sc_ah; + struct ath_vif *avp = (void *)vif->drv_priv; + u32 rfilt = 0; + int error, i; mutex_lock(&sc->mutex); + /* + * TODO: Need to decide which hw opmode to use for + * multi-interface cases + * XXX: This belongs into add_interface! + */ + if (vif->type == NL80211_IFTYPE_AP && + ah->opmode != NL80211_IFTYPE_AP) { + ah->opmode = NL80211_IFTYPE_STATION; + ath9k_hw_setopmode(ah); + memcpy(sc->curbssid, sc->sc_ah->macaddr, ETH_ALEN); + sc->curaid = 0; + ath9k_hw_write_associd(sc); + /* Request full reset to get hw opmode changed properly */ + sc->sc_flags |= SC_OP_FULL_RESET; + } + + if ((changed & BSS_CHANGED_BSSID) && + !is_zero_ether_addr(bss_conf->bssid)) { + switch (vif->type) { + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_MESH_POINT: + /* Set BSSID */ + memcpy(sc->curbssid, bss_conf->bssid, ETH_ALEN); + memcpy(avp->bssid, bss_conf->bssid, ETH_ALEN); + sc->curaid = 0; + ath9k_hw_write_associd(sc); + + /* Set aggregation protection mode parameters */ + sc->config.ath_aggr_prot = 0; + + DPRINTF(sc, ATH_DBG_CONFIG, + "RX filter 0x%x bssid %pM aid 0x%x\n", + rfilt, sc->curbssid, sc->curaid); + + /* need to reconfigure the beacon */ + sc->sc_flags &= ~SC_OP_BEACONS ; + + break; + default: + break; + } + } + + if ((vif->type == NL80211_IFTYPE_ADHOC) || + (vif->type == NL80211_IFTYPE_AP) || + (vif->type == NL80211_IFTYPE_MESH_POINT)) { + if ((changed & BSS_CHANGED_BEACON) || + (changed & BSS_CHANGED_BEACON_ENABLED && + bss_conf->enable_beacon)) { + /* + * Allocate and setup the beacon frame. + * + * Stop any previous beacon DMA. This may be + * necessary, for example, when an ibss merge + * causes reconfiguration; we may be called + * with beacon transmission active. + */ + ath9k_hw_stoptxdma(sc->sc_ah, sc->beacon.beaconq); + + error = ath_beacon_alloc(aphy, vif); + if (!error) + ath_beacon_config(sc, vif); + } + } + + /* Check for WLAN_CAPABILITY_PRIVACY ? */ + if ((avp->av_opmode != NL80211_IFTYPE_STATION)) { + for (i = 0; i < IEEE80211_WEP_NKID; i++) + if (ath9k_hw_keyisvalid(sc->sc_ah, (u16)i)) + ath9k_hw_keysetmac(sc->sc_ah, + (u16)i, + sc->curbssid); + } + + /* Only legacy IBSS for now */ + if (vif->type == NL80211_IFTYPE_ADHOC) + ath_update_chainmask(sc, 0); + if (changed & BSS_CHANGED_ERP_PREAMBLE) { DPRINTF(sc, ATH_DBG_CONFIG, "BSS Changed PREAMBLE %d\n", bss_conf->use_short_preamble); @@ -2757,7 +2742,6 @@ struct ieee80211_ops ath9k_ops = { .add_interface = ath9k_add_interface, .remove_interface = ath9k_remove_interface, .config = ath9k_config, - .config_interface = ath9k_config_interface, .configure_filter = ath9k_configure_filter, .sta_notify = ath9k_sta_notify, .conf_tx = ath9k_conf_tx, diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index 3c693e6ec90..2615aaf7df6 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -3543,12 +3543,38 @@ static void b43_op_bss_info_changed(struct ieee80211_hw *hw, { struct b43_wl *wl = hw_to_b43_wl(hw); struct b43_wldev *dev; + unsigned long flags; mutex_lock(&wl->mutex); dev = wl->current_dev; if (!dev || b43_status(dev) < B43_STAT_STARTED) goto out_unlock_mutex; + + B43_WARN_ON(wl->vif != vif); + + if (changed & BSS_CHANGED_BSSID) { + spin_lock_irqsave(&wl->irq_lock, flags); + if (conf->bssid) + memcpy(wl->bssid, conf->bssid, ETH_ALEN); + else + memset(wl->bssid, 0, ETH_ALEN); + + if (b43_status(dev) >= B43_STAT_INITIALIZED) { + if (b43_is_mode(wl, NL80211_IFTYPE_AP) || + b43_is_mode(wl, NL80211_IFTYPE_MESH_POINT)) { + B43_WARN_ON(vif->type != wl->if_type); + if (changed & BSS_CHANGED_BEACON) + b43_update_templates(wl); + } else if (b43_is_mode(wl, NL80211_IFTYPE_ADHOC)) { + if (changed & BSS_CHANGED_BEACON) + b43_update_templates(wl); + } + b43_write_mac_bssid_templates(dev); + } + spin_unlock_irqrestore(&wl->irq_lock, flags); + } + b43_mac_suspend(dev); /* Update templates for AP/mesh mode. */ @@ -3571,8 +3597,6 @@ static void b43_op_bss_info_changed(struct ieee80211_hw *hw, b43_mac_enable(dev); out_unlock_mutex: mutex_unlock(&wl->mutex); - - return; } static int b43_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, @@ -3730,41 +3754,6 @@ static void b43_op_configure_filter(struct ieee80211_hw *hw, spin_unlock_irqrestore(&wl->irq_lock, flags); } -static int b43_op_config_interface(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct ieee80211_if_conf *conf) -{ - struct b43_wl *wl = hw_to_b43_wl(hw); - struct b43_wldev *dev = wl->current_dev; - unsigned long flags; - - if (!dev) - return -ENODEV; - mutex_lock(&wl->mutex); - spin_lock_irqsave(&wl->irq_lock, flags); - B43_WARN_ON(wl->vif != vif); - if (conf->bssid) - memcpy(wl->bssid, conf->bssid, ETH_ALEN); - else - memset(wl->bssid, 0, ETH_ALEN); - if (b43_status(dev) >= B43_STAT_INITIALIZED) { - if (b43_is_mode(wl, NL80211_IFTYPE_AP) || - b43_is_mode(wl, NL80211_IFTYPE_MESH_POINT)) { - B43_WARN_ON(vif->type != wl->if_type); - if (conf->changed & IEEE80211_IFCC_BEACON) - b43_update_templates(wl); - } else if (b43_is_mode(wl, NL80211_IFTYPE_ADHOC)) { - if (conf->changed & IEEE80211_IFCC_BEACON) - b43_update_templates(wl); - } - b43_write_mac_bssid_templates(dev); - } - spin_unlock_irqrestore(&wl->irq_lock, flags); - mutex_unlock(&wl->mutex); - - return 0; -} - /* Locking: wl->mutex */ static void b43_wireless_core_stop(struct b43_wldev *dev) { @@ -4434,7 +4423,6 @@ static const struct ieee80211_ops b43_hw_ops = { .remove_interface = b43_op_remove_interface, .config = b43_op_config, .bss_info_changed = b43_op_bss_info_changed, - .config_interface = b43_op_config_interface, .configure_filter = b43_op_configure_filter, .set_key = b43_op_set_key, .get_stats = b43_op_get_stats, diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c index 276f314688f..07c7898c87a 100644 --- a/drivers/net/wireless/b43legacy/main.c +++ b/drivers/net/wireless/b43legacy/main.c @@ -2804,6 +2804,7 @@ static void b43legacy_op_bss_info_changed(struct ieee80211_hw *hw, u32 savedirqs; mutex_lock(&wl->mutex); + B43legacy_WARN_ON(wl->vif != vif); dev = wl->current_dev; phy = &dev->phy; @@ -2817,8 +2818,29 @@ static void b43legacy_op_bss_info_changed(struct ieee80211_hw *hw, goto out_unlock_mutex; } savedirqs = b43legacy_interrupt_disable(dev, B43legacy_IRQ_ALL); - spin_unlock_irqrestore(&wl->irq_lock, flags); - b43legacy_synchronize_irq(dev); + + if (changed & BSS_CHANGED_BSSID) { + spin_unlock_irqrestore(&wl->irq_lock, flags); + b43legacy_synchronize_irq(dev); + + if (conf->bssid) + memcpy(wl->bssid, conf->bssid, ETH_ALEN); + else + memset(wl->bssid, 0, ETH_ALEN); + + if (b43legacy_status(dev) >= B43legacy_STAT_INITIALIZED) { + if (b43legacy_is_mode(wl, NL80211_IFTYPE_AP)) { + B43legacy_WARN_ON(vif->type != NL80211_IFTYPE_AP); + if (changed & BSS_CHANGED_BEACON) + b43legacy_update_templates(wl); + } else if (b43legacy_is_mode(wl, NL80211_IFTYPE_ADHOC)) { + if (changed & BSS_CHANGED_BEACON) + b43legacy_update_templates(wl); + } + b43legacy_write_mac_bssid_templates(dev); + } + spin_unlock_irqrestore(&wl->irq_lock, flags); + } b43legacy_mac_suspend(dev); @@ -2846,8 +2868,6 @@ static void b43legacy_op_bss_info_changed(struct ieee80211_hw *hw, spin_unlock_irqrestore(&wl->irq_lock, flags); out_unlock_mutex: mutex_unlock(&wl->mutex); - - return; } static void b43legacy_op_configure_filter(struct ieee80211_hw *hw, @@ -2889,40 +2909,6 @@ static void b43legacy_op_configure_filter(struct ieee80211_hw *hw, spin_unlock_irqrestore(&wl->irq_lock, flags); } -static int b43legacy_op_config_interface(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct ieee80211_if_conf *conf) -{ - struct b43legacy_wl *wl = hw_to_b43legacy_wl(hw); - struct b43legacy_wldev *dev = wl->current_dev; - unsigned long flags; - - if (!dev) - return -ENODEV; - mutex_lock(&wl->mutex); - spin_lock_irqsave(&wl->irq_lock, flags); - B43legacy_WARN_ON(wl->vif != vif); - if (conf->bssid) - memcpy(wl->bssid, conf->bssid, ETH_ALEN); - else - memset(wl->bssid, 0, ETH_ALEN); - if (b43legacy_status(dev) >= B43legacy_STAT_INITIALIZED) { - if (b43legacy_is_mode(wl, NL80211_IFTYPE_AP)) { - B43legacy_WARN_ON(vif->type != NL80211_IFTYPE_AP); - if (conf->changed & IEEE80211_IFCC_BEACON) - b43legacy_update_templates(wl); - } else if (b43legacy_is_mode(wl, NL80211_IFTYPE_ADHOC)) { - if (conf->changed & IEEE80211_IFCC_BEACON) - b43legacy_update_templates(wl); - } - b43legacy_write_mac_bssid_templates(dev); - } - spin_unlock_irqrestore(&wl->irq_lock, flags); - mutex_unlock(&wl->mutex); - - return 0; -} - /* Locking: wl->mutex */ static void b43legacy_wireless_core_stop(struct b43legacy_wldev *dev) { @@ -3563,7 +3549,6 @@ static const struct ieee80211_ops b43legacy_hw_ops = { .remove_interface = b43legacy_op_remove_interface, .config = b43legacy_op_dev_config, .bss_info_changed = b43legacy_op_bss_info_changed, - .config_interface = b43legacy_op_config_interface, .configure_filter = b43legacy_op_configure_filter, .get_stats = b43legacy_op_get_stats, .get_tx_stats = b43legacy_op_get_tx_stats, diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c index 4920dcf7d7b..dd8b15ed829 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn.c @@ -2623,7 +2623,6 @@ static struct ieee80211_ops iwl_hw_ops = { .add_interface = iwl_mac_add_interface, .remove_interface = iwl_mac_remove_interface, .config = iwl_mac_config, - .config_interface = iwl_mac_config_interface, .configure_filter = iwl_configure_filter, .set_key = iwl_mac_set_key, .update_tkip_key = iwl_mac_update_tkip_key, diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c index cd8a5ed97c4..0f21fc136ca 100644 --- a/drivers/net/wireless/iwlwifi/iwl-core.c +++ b/drivers/net/wireless/iwlwifi/iwl-core.c @@ -2238,15 +2238,69 @@ static void iwl_ht_conf(struct iwl_priv *priv, #define IWL_DELAY_NEXT_SCAN_AFTER_ASSOC (HZ*6) void iwl_bss_info_changed(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct ieee80211_bss_conf *bss_conf, - u32 changes) + struct ieee80211_vif *vif, + struct ieee80211_bss_conf *bss_conf, + u32 changes) { struct iwl_priv *priv = hw->priv; int ret; IWL_DEBUG_MAC80211(priv, "changes = 0x%X\n", changes); + if (!iwl_is_alive(priv)) + return; + + mutex_lock(&priv->mutex); + + if (changes & BSS_CHANGED_BEACON && + priv->iw_mode == NL80211_IFTYPE_AP) { + dev_kfree_skb(priv->ibss_beacon); + priv->ibss_beacon = ieee80211_beacon_get(hw, vif); + } + + if ((changes & BSS_CHANGED_BSSID) && !iwl_is_rfkill(priv)) { + /* If there is currently a HW scan going on in the background + * then we need to cancel it else the RXON below will fail. */ + if (iwl_scan_cancel_timeout(priv, 100)) { + IWL_WARN(priv, "Aborted scan still in progress " + "after 100ms\n"); + IWL_DEBUG_MAC80211(priv, "leaving - scan abort failed.\n"); + mutex_unlock(&priv->mutex); + return; + } + memcpy(priv->staging_rxon.bssid_addr, + bss_conf->bssid, ETH_ALEN); + + /* TODO: Audit driver for usage of these members and see + * if mac80211 deprecates them (priv->bssid looks like it + * shouldn't be there, but I haven't scanned the IBSS code + * to verify) - jpk */ + memcpy(priv->bssid, bss_conf->bssid, ETH_ALEN); + + if (priv->iw_mode == NL80211_IFTYPE_AP) + iwlcore_config_ap(priv); + else { + int rc = iwlcore_commit_rxon(priv); + if ((priv->iw_mode == NL80211_IFTYPE_STATION) && rc) + iwl_rxon_add_station( + priv, priv->active_rxon.bssid_addr, 1); + } + } else if (!iwl_is_rfkill(priv)) { + iwl_scan_cancel_timeout(priv, 100); + priv->staging_rxon.filter_flags &= ~RXON_FILTER_ASSOC_MSK; + iwlcore_commit_rxon(priv); + } + + if (priv->iw_mode == NL80211_IFTYPE_ADHOC && + changes & BSS_CHANGED_BEACON) { + struct sk_buff *beacon = ieee80211_beacon_get(hw, vif); + + if (beacon) + iwl_mac_beacon_update(hw, beacon); + } + + mutex_unlock(&priv->mutex); + if (changes & BSS_CHANGED_ERP_PREAMBLE) { IWL_DEBUG_MAC80211(priv, "ERP_PREAMBLE %d\n", bss_conf->use_short_preamble); @@ -2305,7 +2359,7 @@ void iwl_bss_info_changed(struct ieee80211_hw *hw, &priv->staging_rxon, sizeof(struct iwl_rxon_cmd)); } - + IWL_DEBUG_MAC80211(priv, "leave\n"); } EXPORT_SYMBOL(iwl_bss_info_changed); @@ -2589,106 +2643,6 @@ out: } EXPORT_SYMBOL(iwl_mac_config); -int iwl_mac_config_interface(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct ieee80211_if_conf *conf) -{ - struct iwl_priv *priv = hw->priv; - int rc; - - if (conf == NULL) - return -EIO; - - if (priv->vif != vif) { - IWL_DEBUG_MAC80211(priv, "leave - priv->vif != vif\n"); - return 0; - } - - if (priv->iw_mode == NL80211_IFTYPE_ADHOC && - conf->changed & IEEE80211_IFCC_BEACON) { - struct sk_buff *beacon = ieee80211_beacon_get(hw, vif); - if (!beacon) - return -ENOMEM; - mutex_lock(&priv->mutex); - rc = iwl_mac_beacon_update(hw, beacon); - mutex_unlock(&priv->mutex); - if (rc) - return rc; - } - - if (!iwl_is_alive(priv)) - return -EAGAIN; - - mutex_lock(&priv->mutex); - - if (conf->bssid) - IWL_DEBUG_MAC80211(priv, "bssid: %pM\n", conf->bssid); - -/* - * very dubious code was here; the probe filtering flag is never set: - * - if (unlikely(test_bit(STATUS_SCANNING, &priv->status)) && - !(priv->hw->flags & IEEE80211_HW_NO_PROBE_FILTERING)) { - */ - - if (priv->iw_mode == NL80211_IFTYPE_AP) { - if (!conf->bssid) { - conf->bssid = priv->mac_addr; - memcpy(priv->bssid, priv->mac_addr, ETH_ALEN); - IWL_DEBUG_MAC80211(priv, "bssid was set to: %pM\n", - conf->bssid); - } - if (priv->ibss_beacon) - dev_kfree_skb(priv->ibss_beacon); - - priv->ibss_beacon = ieee80211_beacon_get(hw, vif); - } - - if (iwl_is_rfkill(priv)) - goto done; - - if (conf->bssid && !is_zero_ether_addr(conf->bssid) && - !is_multicast_ether_addr(conf->bssid)) { - /* If there is currently a HW scan going on in the background - * then we need to cancel it else the RXON below will fail. */ - if (iwl_scan_cancel_timeout(priv, 100)) { - IWL_WARN(priv, "Aborted scan still in progress " - "after 100ms\n"); - IWL_DEBUG_MAC80211(priv, "leaving - scan abort failed.\n"); - mutex_unlock(&priv->mutex); - return -EAGAIN; - } - memcpy(priv->staging_rxon.bssid_addr, conf->bssid, ETH_ALEN); - - /* TODO: Audit driver for usage of these members and see - * if mac80211 deprecates them (priv->bssid looks like it - * shouldn't be there, but I haven't scanned the IBSS code - * to verify) - jpk */ - memcpy(priv->bssid, conf->bssid, ETH_ALEN); - - if (priv->iw_mode == NL80211_IFTYPE_AP) - iwlcore_config_ap(priv); - else { - rc = iwlcore_commit_rxon(priv); - if ((priv->iw_mode == NL80211_IFTYPE_STATION) && rc) - iwl_rxon_add_station( - priv, priv->active_rxon.bssid_addr, 1); - } - - } else { - iwl_scan_cancel_timeout(priv, 100); - priv->staging_rxon.filter_flags &= ~RXON_FILTER_ASSOC_MSK; - iwlcore_commit_rxon(priv); - } - - done: - IWL_DEBUG_MAC80211(priv, "leave\n"); - mutex_unlock(&priv->mutex); - - return 0; -} -EXPORT_SYMBOL(iwl_mac_config_interface); - int iwl_mac_get_tx_stats(struct ieee80211_hw *hw, struct ieee80211_tx_queue_stats *stats) { diff --git a/drivers/net/wireless/iwlwifi/iwl-core.h b/drivers/net/wireless/iwlwifi/iwl-core.h index d4c60afa289..bd7f9d9616b 100644 --- a/drivers/net/wireless/iwlwifi/iwl-core.h +++ b/drivers/net/wireless/iwlwifi/iwl-core.h @@ -281,9 +281,6 @@ void iwl_mac_remove_interface(struct ieee80211_hw *hw, struct ieee80211_if_init_conf *conf); int iwl_mac_config(struct ieee80211_hw *hw, u32 changed); void iwl_config_ap(struct iwl_priv *priv); -int iwl_mac_config_interface(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct ieee80211_if_conf *conf); int iwl_mac_get_tx_stats(struct ieee80211_hw *hw, struct ieee80211_tx_queue_stats *stats); void iwl_mac_reset_tsf(struct ieee80211_hw *hw); diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index f9d9b65ef30..5cd4321d7cf 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -4106,7 +4106,6 @@ static struct ieee80211_ops iwl3945_hw_ops = { .add_interface = iwl_mac_add_interface, .remove_interface = iwl_mac_remove_interface, .config = iwl_mac_config, - .config_interface = iwl_mac_config_interface, .configure_filter = iwl_configure_filter, .set_key = iwl3945_mac_set_key, .get_tx_stats = iwl_mac_get_tx_stats, diff --git a/drivers/net/wireless/libertas_tf/main.c b/drivers/net/wireless/libertas_tf/main.c index 68e47fb47a1..10a99e26d39 100644 --- a/drivers/net/wireless/libertas_tf/main.c +++ b/drivers/net/wireless/libertas_tf/main.c @@ -366,36 +366,6 @@ static int lbtf_op_config(struct ieee80211_hw *hw, u32 changed) return 0; } -static int lbtf_op_config_interface(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct ieee80211_if_conf *conf) -{ - struct lbtf_private *priv = hw->priv; - struct sk_buff *beacon; - - switch (priv->vif->type) { - case NL80211_IFTYPE_AP: - case NL80211_IFTYPE_MESH_POINT: - beacon = ieee80211_beacon_get(hw, vif); - if (beacon) { - lbtf_beacon_set(priv, beacon); - kfree_skb(beacon); - lbtf_beacon_ctrl(priv, 1, vif->bss_conf.beacon_int); - } - break; - default: - break; - } - - if (conf->bssid) { - u8 null_bssid[ETH_ALEN] = {0}; - bool activate = compare_ether_addr(conf->bssid, null_bssid); - lbtf_set_bssid(priv, activate, conf->bssid); - } - - return 0; -} - #define SUPPORTED_FIF_FLAGS (FIF_PROMISC_IN_BSS | FIF_ALLMULTI) static void lbtf_op_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, @@ -451,6 +421,29 @@ static void lbtf_op_bss_info_changed(struct ieee80211_hw *hw, u32 changes) { struct lbtf_private *priv = hw->priv; + struct sk_buff *beacon; + + if (changes & (BSS_CHANGED_BEACON | BSS_CHANGED_BEACON_INT)) { + switch (priv->vif->type) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_MESH_POINT: + beacon = ieee80211_beacon_get(hw, vif); + if (beacon) { + lbtf_beacon_set(priv, beacon); + kfree_skb(beacon); + lbtf_beacon_ctrl(priv, 1, + bss_conf->beacon_int); + } + break; + default: + break; + } + } + + if (changes & BSS_CHANGED_BSSID) { + bool activate = !is_zero_ether_addr(bss_conf->bssid); + lbtf_set_bssid(priv, activate, bss_conf->bssid); + } if (changes & BSS_CHANGED_ERP_PREAMBLE) { if (bss_conf->use_short_preamble) @@ -459,8 +452,6 @@ static void lbtf_op_bss_info_changed(struct ieee80211_hw *hw, priv->preamble = CMD_TYPE_LONG_PREAMBLE; lbtf_set_radio_control(priv); } - - return; } static const struct ieee80211_ops lbtf_ops = { @@ -470,7 +461,6 @@ static const struct ieee80211_ops lbtf_ops = { .add_interface = lbtf_op_add_interface, .remove_interface = lbtf_op_remove_interface, .config = lbtf_op_config, - .config_interface = lbtf_op_config_interface, .configure_filter = lbtf_op_configure_filter, .bss_info_changed = lbtf_op_bss_info_changed, }; diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 24c95a619e4..e67b424f925 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -587,23 +587,6 @@ static void mac80211_hwsim_configure_filter(struct ieee80211_hw *hw, *total_flags = data->rx_filter; } -static int mac80211_hwsim_config_interface(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct ieee80211_if_conf *conf) -{ - struct hwsim_vif_priv *vp = (void *)vif->drv_priv; - - hwsim_check_magic(vif); - if (conf->changed & IEEE80211_IFCC_BSSID) { - DECLARE_MAC_BUF(mac); - printk(KERN_DEBUG "%s:%s: BSSID changed: %pM\n", - wiphy_name(hw->wiphy), __func__, - conf->bssid); - memcpy(vp->bssid, conf->bssid, ETH_ALEN); - } - return 0; -} - static void mac80211_hwsim_bss_info_changed(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_bss_conf *info, @@ -617,6 +600,13 @@ static void mac80211_hwsim_bss_info_changed(struct ieee80211_hw *hw, printk(KERN_DEBUG "%s:%s(changed=0x%x)\n", wiphy_name(hw->wiphy), __func__, changed); + if (changed & BSS_CHANGED_BSSID) { + printk(KERN_DEBUG "%s:%s: BSSID changed: %pM\n", + wiphy_name(hw->wiphy), __func__, + info->bssid); + memcpy(vp->bssid, info->bssid, ETH_ALEN); + } + if (changed & BSS_CHANGED_ASSOC) { printk(KERN_DEBUG " %s: ASSOC: assoc=%d aid=%d\n", wiphy_name(hw->wiphy), info->assoc, info->aid); @@ -708,7 +698,6 @@ static const struct ieee80211_ops mac80211_hwsim_ops = .remove_interface = mac80211_hwsim_remove_interface, .config = mac80211_hwsim_config, .configure_filter = mac80211_hwsim_configure_filter, - .config_interface = mac80211_hwsim_config_interface, .bss_info_changed = mac80211_hwsim_bss_info_changed, .sta_notify = mac80211_hwsim_sta_notify, .set_tim = mac80211_hwsim_set_tim, diff --git a/drivers/net/wireless/mwl8k.c b/drivers/net/wireless/mwl8k.c index a9a970469c2..46b288dc8f4 100644 --- a/drivers/net/wireless/mwl8k.c +++ b/drivers/net/wireless/mwl8k.c @@ -3089,19 +3089,6 @@ static int mwl8k_config(struct ieee80211_hw *hw, u32 changed) return rc ? -EINVAL : 0; } -static int mwl8k_config_interface(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct ieee80211_if_conf *conf) -{ - struct mwl8k_vif *mv_vif = MWL8K_VIF(vif); - u32 changed = conf->changed; - - if (changed & IEEE80211_IFCC_BSSID) - memcpy(mv_vif->bssid, conf->bssid, IEEE80211_ADDR_LEN); - - return 0; -} - struct mwl8k_bss_info_changed_worker { struct mwl8k_work_struct header; struct ieee80211_vif *vif; @@ -3183,8 +3170,12 @@ static void mwl8k_bss_info_changed(struct ieee80211_hw *hw, { struct mwl8k_bss_info_changed_worker *worker; struct mwl8k_priv *priv = hw->priv; + struct mwl8k_vif *mv_vif = MWL8K_VIF(vif); int rc; + if (changed & BSS_CHANGED_BSSID) + memcpy(mv_vif->bssid, info->bssid, IEEE80211_ADDR_LEN); + if ((changed & BSS_CHANGED_ASSOC) == 0) return; @@ -3442,7 +3433,6 @@ static const struct ieee80211_ops mwl8k_ops = { .add_interface = mwl8k_add_interface, .remove_interface = mwl8k_remove_interface, .config = mwl8k_config, - .config_interface = mwl8k_config_interface, .bss_info_changed = mwl8k_bss_info_changed, .configure_filter = mwl8k_configure_filter, .set_rts_threshold = mwl8k_set_rts_threshold, diff --git a/drivers/net/wireless/p54/p54common.c b/drivers/net/wireless/p54/p54common.c index 71394968d45..b8578529180 100644 --- a/drivers/net/wireless/p54/p54common.c +++ b/drivers/net/wireless/p54/p54common.c @@ -2204,41 +2204,6 @@ out: return ret; } -static int p54_config_interface(struct ieee80211_hw *dev, - struct ieee80211_vif *vif, - struct ieee80211_if_conf *conf) -{ - struct p54_common *priv = dev->priv; - int ret = 0; - - mutex_lock(&priv->conf_mutex); - if (conf->changed & IEEE80211_IFCC_BSSID) { - memcpy(priv->bssid, conf->bssid, ETH_ALEN); - ret = p54_setup_mac(dev); - if (ret) - goto out; - } - - if (conf->changed & IEEE80211_IFCC_BEACON) { - ret = p54_scan(dev, P54_SCAN_EXIT, 0); - if (ret) - goto out; - ret = p54_setup_mac(dev); - if (ret) - goto out; - ret = p54_beacon_update(dev, vif); - if (ret) - goto out; - ret = p54_set_edcf(dev); - if (ret) - goto out; - } - -out: - mutex_unlock(&priv->conf_mutex); - return ret; -} - static void p54_configure_filter(struct ieee80211_hw *dev, unsigned int changed_flags, unsigned int *total_flags, @@ -2342,8 +2307,32 @@ static void p54_bss_info_changed(struct ieee80211_hw *dev, u32 changed) { struct p54_common *priv = dev->priv; + int ret; + + mutex_lock(&priv->conf_mutex); + if (changed & BSS_CHANGED_BSSID) { + memcpy(priv->bssid, info->bssid, ETH_ALEN); + ret = p54_setup_mac(dev); + if (ret) + goto out; + } + + if (changed & BSS_CHANGED_BEACON) { + ret = p54_scan(dev, P54_SCAN_EXIT, 0); + if (ret) + goto out; + ret = p54_setup_mac(dev); + if (ret) + goto out; + ret = p54_beacon_update(dev, vif); + if (ret) + goto out; + } + /* XXX: this mimics having two callbacks... clean up */ + out: + mutex_unlock(&priv->conf_mutex); - if (changed & BSS_CHANGED_ERP_SLOT) { + if (changed & (BSS_CHANGED_ERP_SLOT | BSS_CHANGED_BEACON)) { priv->use_short_slot = info->use_short_slot; p54_set_edcf(dev); } @@ -2364,7 +2353,6 @@ static void p54_bss_info_changed(struct ieee80211_hw *dev, p54_setup_mac(dev); } } - } static int p54_set_key(struct ieee80211_hw *dev, enum set_key_cmd cmd, @@ -2619,7 +2607,6 @@ static const struct ieee80211_ops p54_ops = { .sta_notify = p54_sta_notify, .set_key = p54_set_key, .config = p54_config, - .config_interface = p54_config_interface, .bss_info_changed = p54_bss_info_changed, .configure_filter = p54_configure_filter, .conf_tx = p54_conf_tx, diff --git a/drivers/net/wireless/rt2x00/rt2400pci.c b/drivers/net/wireless/rt2x00/rt2400pci.c index 411eb9cbb4e..6f39ba66218 100644 --- a/drivers/net/wireless/rt2x00/rt2400pci.c +++ b/drivers/net/wireless/rt2x00/rt2400pci.c @@ -1580,7 +1580,6 @@ static const struct ieee80211_ops rt2400pci_mac80211_ops = { .add_interface = rt2x00mac_add_interface, .remove_interface = rt2x00mac_remove_interface, .config = rt2x00mac_config, - .config_interface = rt2x00mac_config_interface, .configure_filter = rt2x00mac_configure_filter, .get_stats = rt2x00mac_get_stats, .bss_info_changed = rt2x00mac_bss_info_changed, diff --git a/drivers/net/wireless/rt2x00/rt2500pci.c b/drivers/net/wireless/rt2x00/rt2500pci.c index e1be67ca23d..906960f67b6 100644 --- a/drivers/net/wireless/rt2x00/rt2500pci.c +++ b/drivers/net/wireless/rt2x00/rt2500pci.c @@ -1879,7 +1879,6 @@ static const struct ieee80211_ops rt2500pci_mac80211_ops = { .add_interface = rt2x00mac_add_interface, .remove_interface = rt2x00mac_remove_interface, .config = rt2x00mac_config, - .config_interface = rt2x00mac_config_interface, .configure_filter = rt2x00mac_configure_filter, .get_stats = rt2x00mac_get_stats, .bss_info_changed = rt2x00mac_bss_info_changed, diff --git a/drivers/net/wireless/rt2x00/rt2500usb.c b/drivers/net/wireless/rt2x00/rt2500usb.c index 9e630e70fc9..08e88333b0f 100644 --- a/drivers/net/wireless/rt2x00/rt2500usb.c +++ b/drivers/net/wireless/rt2x00/rt2500usb.c @@ -1908,7 +1908,6 @@ static const struct ieee80211_ops rt2500usb_mac80211_ops = { .add_interface = rt2x00mac_add_interface, .remove_interface = rt2x00mac_remove_interface, .config = rt2x00mac_config, - .config_interface = rt2x00mac_config_interface, .configure_filter = rt2x00mac_configure_filter, .set_key = rt2x00mac_set_key, .get_stats = rt2x00mac_get_stats, diff --git a/drivers/net/wireless/rt2x00/rt2x00.h b/drivers/net/wireless/rt2x00/rt2x00.h index e03d69975ea..0be39552826 100644 --- a/drivers/net/wireless/rt2x00/rt2x00.h +++ b/drivers/net/wireless/rt2x00/rt2x00.h @@ -943,9 +943,6 @@ int rt2x00mac_add_interface(struct ieee80211_hw *hw, void rt2x00mac_remove_interface(struct ieee80211_hw *hw, struct ieee80211_if_init_conf *conf); int rt2x00mac_config(struct ieee80211_hw *hw, u32 changed); -int rt2x00mac_config_interface(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct ieee80211_if_conf *conf); void rt2x00mac_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, unsigned int *total_flags, diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c index c41a0b9e473..c4c06b4e1f0 100644 --- a/drivers/net/wireless/rt2x00/rt2x00mac.c +++ b/drivers/net/wireless/rt2x00/rt2x00mac.c @@ -390,56 +390,6 @@ int rt2x00mac_config(struct ieee80211_hw *hw, u32 changed) } EXPORT_SYMBOL_GPL(rt2x00mac_config); -int rt2x00mac_config_interface(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct ieee80211_if_conf *conf) -{ - struct rt2x00_dev *rt2x00dev = hw->priv; - struct rt2x00_intf *intf = vif_to_intf(vif); - int update_bssid = 0; - int status = 0; - - /* - * Mac80211 might be calling this function while we are trying - * to remove the device or perhaps suspending it. - */ - if (!test_bit(DEVICE_STATE_PRESENT, &rt2x00dev->flags)) - return 0; - - spin_lock(&intf->lock); - - /* - * conf->bssid can be NULL if coming from the internal - * beacon update routine. - */ - if (conf->changed & IEEE80211_IFCC_BSSID && conf->bssid) { - update_bssid = 1; - memcpy(&intf->bssid, conf->bssid, ETH_ALEN); - } - - spin_unlock(&intf->lock); - - /* - * Call rt2x00_config_intf() outside of the spinlock context since - * the call will sleep for USB drivers. By using the ieee80211_if_conf - * values as arguments we make keep access to rt2x00_intf thread safe - * even without the lock. - */ - rt2x00lib_config_intf(rt2x00dev, intf, vif->type, NULL, - update_bssid ? conf->bssid : NULL); - - /* - * Update the beacon. - */ - if (conf->changed & (IEEE80211_IFCC_BEACON | - IEEE80211_IFCC_BEACON_ENABLED)) - status = rt2x00queue_update_beacon(rt2x00dev, vif, - conf->enable_beacon); - - return status; -} -EXPORT_SYMBOL_GPL(rt2x00mac_config_interface); - void rt2x00mac_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, unsigned int *total_flags, @@ -623,6 +573,44 @@ void rt2x00mac_bss_info_changed(struct ieee80211_hw *hw, struct rt2x00_dev *rt2x00dev = hw->priv; struct rt2x00_intf *intf = vif_to_intf(vif); unsigned int delayed = 0; + int update_bssid = 0; + + /* + * Mac80211 might be calling this function while we are trying + * to remove the device or perhaps suspending it. + */ + if (!test_bit(DEVICE_STATE_PRESENT, &rt2x00dev->flags)) + return; + + spin_lock(&intf->lock); + + /* + * conf->bssid can be NULL if coming from the internal + * beacon update routine. + */ + if (changes & BSS_CHANGED_BSSID) { + update_bssid = 1; + memcpy(&intf->bssid, bss_conf->bssid, ETH_ALEN); + } + + spin_unlock(&intf->lock); + + /* + * Call rt2x00_config_intf() outside of the spinlock context since + * the call will sleep for USB drivers. By using the ieee80211_if_conf + * values as arguments we make keep access to rt2x00_intf thread safe + * even without the lock. + */ + if (changes & BSS_CHANGED_BSSID) + rt2x00lib_config_intf(rt2x00dev, intf, vif->type, NULL, + update_bssid ? bss_conf->bssid : NULL); + + /* + * Update the beacon. + */ + if (changes & (BSS_CHANGED_BEACON | BSS_CHANGED_BEACON_ENABLED)) + rt2x00queue_update_beacon(rt2x00dev, vif, + bss_conf->enable_beacon); /* * When the association status has changed we must reset the link diff --git a/drivers/net/wireless/rt2x00/rt61pci.c b/drivers/net/wireless/rt2x00/rt61pci.c index 4346cd1494b..cb521ee7a8f 100644 --- a/drivers/net/wireless/rt2x00/rt61pci.c +++ b/drivers/net/wireless/rt2x00/rt61pci.c @@ -2735,7 +2735,6 @@ static const struct ieee80211_ops rt61pci_mac80211_ops = { .add_interface = rt2x00mac_add_interface, .remove_interface = rt2x00mac_remove_interface, .config = rt2x00mac_config, - .config_interface = rt2x00mac_config_interface, .configure_filter = rt2x00mac_configure_filter, .set_key = rt2x00mac_set_key, .get_stats = rt2x00mac_get_stats, diff --git a/drivers/net/wireless/rt2x00/rt73usb.c b/drivers/net/wireless/rt2x00/rt73usb.c index 853b2b279b6..6bd28a6bcef 100644 --- a/drivers/net/wireless/rt2x00/rt73usb.c +++ b/drivers/net/wireless/rt2x00/rt73usb.c @@ -2259,7 +2259,6 @@ static const struct ieee80211_ops rt73usb_mac80211_ops = { .add_interface = rt2x00mac_add_interface, .remove_interface = rt2x00mac_remove_interface, .config = rt2x00mac_config, - .config_interface = rt2x00mac_config_interface, .configure_filter = rt2x00mac_configure_filter, .set_key = rt2x00mac_set_key, .get_stats = rt2x00mac_get_stats, diff --git a/drivers/net/wireless/rtl818x/rtl8180_dev.c b/drivers/net/wireless/rtl818x/rtl8180_dev.c index 387c133ec0f..7e65d7c3180 100644 --- a/drivers/net/wireless/rtl818x/rtl8180_dev.c +++ b/drivers/net/wireless/rtl818x/rtl8180_dev.c @@ -702,30 +702,26 @@ static int rtl8180_config(struct ieee80211_hw *dev, u32 changed) return 0; } -static int rtl8180_config_interface(struct ieee80211_hw *dev, - struct ieee80211_vif *vif, - struct ieee80211_if_conf *conf) -{ - struct rtl8180_priv *priv = dev->priv; - int i; - - for (i = 0; i < ETH_ALEN; i++) - rtl818x_iowrite8(priv, &priv->map->BSSID[i], conf->bssid[i]); - - if (is_valid_ether_addr(conf->bssid)) - rtl818x_iowrite8(priv, &priv->map->MSR, RTL818X_MSR_INFRA); - else - rtl818x_iowrite8(priv, &priv->map->MSR, RTL818X_MSR_NO_LINK); - - return 0; -} - static void rtl8180_bss_info_changed(struct ieee80211_hw *dev, struct ieee80211_vif *vif, struct ieee80211_bss_conf *info, u32 changed) { struct rtl8180_priv *priv = dev->priv; + int i; + + if (changed & BSS_CHANGED_BSSID) { + for (i = 0; i < ETH_ALEN; i++) + rtl818x_iowrite8(priv, &priv->map->BSSID[i], + info->bssid[i]); + + if (is_valid_ether_addr(info->bssid)) + rtl818x_iowrite8(priv, &priv->map->MSR, + RTL818X_MSR_INFRA); + else + rtl818x_iowrite8(priv, &priv->map->MSR, + RTL818X_MSR_NO_LINK); + } if (changed & BSS_CHANGED_ERP_SLOT && priv->rf->conf_erp) priv->rf->conf_erp(dev, info); @@ -770,7 +766,6 @@ static const struct ieee80211_ops rtl8180_ops = { .add_interface = rtl8180_add_interface, .remove_interface = rtl8180_remove_interface, .config = rtl8180_config, - .config_interface = rtl8180_config_interface, .bss_info_changed = rtl8180_bss_info_changed, .configure_filter = rtl8180_configure_filter, }; diff --git a/drivers/net/wireless/rtl818x/rtl8187_dev.c b/drivers/net/wireless/rtl818x/rtl8187_dev.c index ac558da92aa..158827e50c5 100644 --- a/drivers/net/wireless/rtl818x/rtl8187_dev.c +++ b/drivers/net/wireless/rtl818x/rtl8187_dev.c @@ -1090,32 +1090,6 @@ static int rtl8187_config(struct ieee80211_hw *dev, u32 changed) return 0; } -static int rtl8187_config_interface(struct ieee80211_hw *dev, - struct ieee80211_vif *vif, - struct ieee80211_if_conf *conf) -{ - struct rtl8187_priv *priv = dev->priv; - int i; - u8 reg; - - mutex_lock(&priv->conf_mutex); - for (i = 0; i < ETH_ALEN; i++) - rtl818x_iowrite8(priv, &priv->map->BSSID[i], conf->bssid[i]); - - if (is_valid_ether_addr(conf->bssid)) { - reg = RTL818X_MSR_INFRA; - if (priv->is_rtl8187b) - reg |= RTL818X_MSR_ENEDCA; - rtl818x_iowrite8(priv, &priv->map->MSR, reg); - } else { - reg = RTL818X_MSR_NO_LINK; - rtl818x_iowrite8(priv, &priv->map->MSR, reg); - } - - mutex_unlock(&priv->conf_mutex); - return 0; -} - /* * With 8187B, AC_*_PARAM clashes with FEMR definition in struct rtl818x_csr for * example. Thus we have to use raw values for AC_*_PARAM register addresses. @@ -1193,6 +1167,27 @@ static void rtl8187_bss_info_changed(struct ieee80211_hw *dev, u32 changed) { struct rtl8187_priv *priv = dev->priv; + int i; + u8 reg; + + if (changed & BSS_CHANGED_BSSID) { + mutex_lock(&priv->conf_mutex); + for (i = 0; i < ETH_ALEN; i++) + rtl818x_iowrite8(priv, &priv->map->BSSID[i], + info->bssid[i]); + + if (is_valid_ether_addr(info->bssid)) { + reg = RTL818X_MSR_INFRA; + if (priv->is_rtl8187b) + reg |= RTL818X_MSR_ENEDCA; + rtl818x_iowrite8(priv, &priv->map->MSR, reg); + } else { + reg = RTL818X_MSR_NO_LINK; + rtl818x_iowrite8(priv, &priv->map->MSR, reg); + } + + mutex_unlock(&priv->conf_mutex); + } if (changed & (BSS_CHANGED_ERP_SLOT | BSS_CHANGED_ERP_PREAMBLE)) rtl8187_conf_erp(priv, info->use_short_slot, @@ -1274,7 +1269,6 @@ static const struct ieee80211_ops rtl8187_ops = { .add_interface = rtl8187_add_interface, .remove_interface = rtl8187_remove_interface, .config = rtl8187_config, - .config_interface = rtl8187_config_interface, .bss_info_changed = rtl8187_bss_info_changed, .configure_filter = rtl8187_configure_filter, .conf_tx = rtl8187_conf_tx diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c index c3a51266de2..6bdb1704083 100644 --- a/drivers/net/wireless/zd1211rw/zd_mac.c +++ b/drivers/net/wireless/zd1211rw/zd_mac.c @@ -755,52 +755,6 @@ static int zd_op_config(struct ieee80211_hw *hw, u32 changed) return zd_chip_set_channel(&mac->chip, conf->channel->hw_value); } -static int zd_op_config_interface(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct ieee80211_if_conf *conf) -{ - struct zd_mac *mac = zd_hw_mac(hw); - int associated; - int r; - - if (mac->type == NL80211_IFTYPE_MESH_POINT || - mac->type == NL80211_IFTYPE_ADHOC) { - associated = true; - if (conf->changed & IEEE80211_IFCC_BEACON) { - struct sk_buff *beacon = ieee80211_beacon_get(hw, vif); - - if (!beacon) - return -ENOMEM; - r = zd_mac_config_beacon(hw, beacon); - kfree_skb(beacon); - - if (r < 0) - return r; - } - - if (conf->changed & IEEE80211_IFCC_BEACON_ENABLED) { - u32 interval; - - if (conf->enable_beacon) - interval = BCN_MODE_IBSS | hw->conf.beacon_int; - else - interval = 0; - - r = zd_set_beacon_interval(&mac->chip, interval); - if (r < 0) - return r; - } - } else - associated = is_valid_ether_addr(conf->bssid); - - spin_lock_irq(&mac->lock); - mac->associated = associated; - spin_unlock_irq(&mac->lock); - - /* TODO: do hardware bssid filtering */ - return 0; -} - static void zd_process_intr(struct work_struct *work) { u16 int_status; @@ -923,9 +877,42 @@ static void zd_op_bss_info_changed(struct ieee80211_hw *hw, { struct zd_mac *mac = zd_hw_mac(hw); unsigned long flags; + int associated; dev_dbg_f(zd_mac_dev(mac), "changes: %x\n", changes); + if (mac->type == NL80211_IFTYPE_MESH_POINT || + mac->type == NL80211_IFTYPE_ADHOC) { + associated = true; + if (changes & BSS_CHANGED_BEACON) { + struct sk_buff *beacon = ieee80211_beacon_get(hw, vif); + + if (beacon) { + zd_mac_config_beacon(hw, beacon); + kfree_skb(beacon); + } + } + + if (changes & BSS_CHANGED_BEACON_ENABLED) { + u32 interval; + + if (bss_conf->enable_beacon) + interval = BCN_MODE_IBSS | + bss_conf->beacon_int; + else + interval = 0; + + zd_set_beacon_interval(&mac->chip, interval); + } + } else + associated = is_valid_ether_addr(bss_conf->bssid); + + spin_lock_irq(&mac->lock); + mac->associated = associated; + spin_unlock_irq(&mac->lock); + + /* TODO: do hardware bssid filtering */ + if (changes & BSS_CHANGED_ERP_PREAMBLE) { spin_lock_irqsave(&mac->lock, flags); mac->short_preamble = bss_conf->use_short_preamble; @@ -952,7 +939,6 @@ static const struct ieee80211_ops zd_ops = { .add_interface = zd_op_add_interface, .remove_interface = zd_op_remove_interface, .config = zd_op_config, - .config_interface = zd_op_config_interface, .configure_filter = zd_op_configure_filter, .bss_info_changed = zd_op_bss_info_changed, .get_tsf = zd_op_get_tsf, diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 22c65e8cbb7..7806e22f4ac 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -150,6 +150,12 @@ struct ieee80211_low_level_stats { * @BSS_CHANGED_HT: 802.11n parameters changed * @BSS_CHANGED_BASIC_RATES: Basic rateset changed * @BSS_CHANGED_BEACON_INT: Beacon interval changed + * @BSS_CHANGED_BSSID: BSSID changed, for whatever + * reason (IBSS and managed mode) + * @BSS_CHANGED_BEACON: Beacon data changed, retrieve + * new beacon (beaconing modes) + * @BSS_CHANGED_BEACON_ENABLED: Beaconing should be + * enabled/disabled (beaconing modes) */ enum ieee80211_bss_change { BSS_CHANGED_ASSOC = 1<<0, @@ -159,6 +165,9 @@ enum ieee80211_bss_change { BSS_CHANGED_HT = 1<<4, BSS_CHANGED_BASIC_RATES = 1<<5, BSS_CHANGED_BEACON_INT = 1<<6, + BSS_CHANGED_BSSID = 1<<7, + BSS_CHANGED_BEACON = 1<<8, + BSS_CHANGED_BEACON_ENABLED = 1<<9, }; /** @@ -192,8 +201,11 @@ struct ieee80211_bss_ht_conf { * @basic_rates: bitmap of basic rates, each bit stands for an * index into the rate table configured by the driver in * the current band. + * @bssid: The BSSID for this BSS + * @enable_beacon: whether beaconing should be enabled or not */ struct ieee80211_bss_conf { + const u8 *bssid; /* association related data */ bool assoc; u16 aid; @@ -201,6 +213,7 @@ struct ieee80211_bss_conf { bool use_cts_prot; bool use_short_preamble; bool use_short_slot; + bool enable_beacon; u8 dtim_period; u16 beacon_int; u16 assoc_capability; @@ -659,37 +672,6 @@ struct ieee80211_if_init_conf { void *mac_addr; }; -/** - * enum ieee80211_if_conf_change - interface config change flags - * - * @IEEE80211_IFCC_BSSID: The BSSID changed. - * @IEEE80211_IFCC_BEACON: The beacon for this interface changed - * (currently AP and MESH only), use ieee80211_beacon_get(). - * @IEEE80211_IFCC_BEACON_ENABLED: The enable_beacon value changed. - */ -enum ieee80211_if_conf_change { - IEEE80211_IFCC_BSSID = BIT(0), - IEEE80211_IFCC_BEACON = BIT(1), - IEEE80211_IFCC_BEACON_ENABLED = BIT(2), -}; - -/** - * struct ieee80211_if_conf - configuration of an interface - * - * @changed: parameters that have changed, see &enum ieee80211_if_conf_change. - * @bssid: BSSID of the network we are associated to/creating. - * @enable_beacon: Indicates whether beacons can be sent. - * This is valid only for AP/IBSS/MESH modes. - * - * This structure is passed to the config_interface() callback of - * &struct ieee80211_hw. - */ -struct ieee80211_if_conf { - u32 changed; - const u8 *bssid; - bool enable_beacon; -}; - /** * enum ieee80211_key_alg - key algorithm * @ALG_WEP: WEP40 or WEP104 @@ -1358,10 +1340,6 @@ enum ieee80211_ampdu_mlme_action { * This function should never fail but returns a negative error code * if it does. * - * @config_interface: Handler for configuration requests related to interfaces - * (e.g. BSSID changes.) - * Returns a negative error code which will be seen in userspace. - * * @bss_info_changed: Handler for configuration requests related to BSS * parameters that may vary during BSS's lifespan, and may affect low * level driver (e.g. assoc/disassoc status, erp parameters). @@ -1463,9 +1441,6 @@ struct ieee80211_ops { void (*remove_interface)(struct ieee80211_hw *hw, struct ieee80211_if_init_conf *conf); int (*config)(struct ieee80211_hw *hw, u32 changed); - int (*config_interface)(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct ieee80211_if_conf *conf); void (*bss_info_changed)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_bss_conf *info, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index a898ccd3f2c..648bac1c850 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -521,8 +521,9 @@ static int ieee80211_config_beacon(struct ieee80211_sub_if_data *sdata, kfree(old); - return ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON | - IEEE80211_IFCC_BEACON_ENABLED); + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED | + BSS_CHANGED_BEACON); + return 0; } static int ieee80211_add_beacon(struct wiphy *wiphy, struct net_device *dev, @@ -573,7 +574,8 @@ static int ieee80211_del_beacon(struct wiphy *wiphy, struct net_device *dev) synchronize_rcu(); kfree(old); - return ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON_ENABLED); + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); + return 0; } /* Layer 2 Update frame (802.2 Type 1 LLC XID Update response) */ diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index f4879dad3cd..c87caad383f 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -95,17 +95,10 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, sdata->drop_unencrypted = capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0; - ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID); - local->oper_channel = chan; local->oper_channel_type = NL80211_CHAN_NO_HT; ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); - sdata->vif.bss_conf.beacon_int = beacon_int; - bss_change = BSS_CHANGED_BEACON_INT; - bss_change |= ieee80211_reset_erp_info(sdata); - ieee80211_bss_info_change_notify(sdata, bss_change); - sband = local->hw.wiphy->bands[chan->band]; /* Build IBSS probe response */ @@ -161,8 +154,13 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, rcu_assign_pointer(ifibss->presp, skb); - ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON | - IEEE80211_IFCC_BEACON_ENABLED); + sdata->vif.bss_conf.beacon_int = beacon_int; + bss_change = BSS_CHANGED_BEACON_INT; + bss_change |= ieee80211_reset_erp_info(sdata); + bss_change |= BSS_CHANGED_BSSID; + bss_change |= BSS_CHANGED_BEACON; + bss_change |= BSS_CHANGED_BEACON_ENABLED; + ieee80211_bss_info_change_notify(sdata, bss_change); rates = 0; for (i = 0; i < supp_rates_len; i++) { @@ -887,7 +885,7 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) kfree(sdata->u.ibss.ie); skb = sdata->u.ibss.presp; rcu_assign_pointer(sdata->u.ibss.presp, NULL); - ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON_ENABLED); + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); synchronize_rcu(); kfree_skb(skb); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index e6fd4bcf1c3..d8de1e159ee 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -906,7 +906,6 @@ static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr) int ieee80211_hw_config(struct ieee80211_local *local, u32 changed); -int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed); void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx); void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, u32 changed); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index b254879d863..c817c9ef215 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -152,82 +152,6 @@ static void ieee80211_master_set_multicast_list(struct net_device *dev) ieee80211_configure_filter(local); } -/* everything else */ - -int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed) -{ - struct ieee80211_local *local = sdata->local; - struct ieee80211_if_conf conf; - - if (WARN_ON(!netif_running(sdata->dev))) - return 0; - - memset(&conf, 0, sizeof(conf)); - - if (sdata->vif.type == NL80211_IFTYPE_STATION) - conf.bssid = sdata->u.mgd.bssid; - else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) - conf.bssid = sdata->u.ibss.bssid; - else if (sdata->vif.type == NL80211_IFTYPE_AP) - conf.bssid = sdata->dev->dev_addr; - else if (ieee80211_vif_is_mesh(&sdata->vif)) { - static const u8 zero[ETH_ALEN] = { 0 }; - conf.bssid = zero; - } else { - WARN_ON(1); - return -EINVAL; - } - - if (!local->ops->config_interface) - return 0; - - switch (sdata->vif.type) { - case NL80211_IFTYPE_AP: - case NL80211_IFTYPE_ADHOC: - case NL80211_IFTYPE_MESH_POINT: - break; - default: - /* do not warn to simplify caller in scan.c */ - changed &= ~IEEE80211_IFCC_BEACON_ENABLED; - if (WARN_ON(changed & IEEE80211_IFCC_BEACON)) - return -EINVAL; - changed &= ~IEEE80211_IFCC_BEACON; - break; - } - - if (changed & IEEE80211_IFCC_BEACON_ENABLED) { - if (local->sw_scanning) { - conf.enable_beacon = false; - } else { - /* - * Beacon should be enabled, but AP mode must - * check whether there is a beacon configured. - */ - switch (sdata->vif.type) { - case NL80211_IFTYPE_AP: - conf.enable_beacon = - !!rcu_dereference(sdata->u.ap.beacon); - break; - case NL80211_IFTYPE_ADHOC: - conf.enable_beacon = !!sdata->u.ibss.presp; - break; - case NL80211_IFTYPE_MESH_POINT: - conf.enable_beacon = true; - break; - default: - /* not reached */ - WARN_ON(1); - break; - } - } - } - - conf.changed = changed; - - return local->ops->config_interface(local_to_hw(local), - &sdata->vif, &conf); -} - int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) { struct ieee80211_channel *chan; @@ -297,6 +221,61 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, if (!changed) return; + if (sdata->vif.type == NL80211_IFTYPE_STATION) + sdata->vif.bss_conf.bssid = sdata->u.mgd.bssid; + else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) + sdata->vif.bss_conf.bssid = sdata->u.ibss.bssid; + else if (sdata->vif.type == NL80211_IFTYPE_AP) + sdata->vif.bss_conf.bssid = sdata->dev->dev_addr; + else if (ieee80211_vif_is_mesh(&sdata->vif)) { + static const u8 zero[ETH_ALEN] = { 0 }; + sdata->vif.bss_conf.bssid = zero; + } else { + WARN_ON(1); + return; + } + + switch (sdata->vif.type) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_MESH_POINT: + break; + default: + /* do not warn to simplify caller in scan.c */ + changed &= ~BSS_CHANGED_BEACON_ENABLED; + if (WARN_ON(changed & BSS_CHANGED_BEACON)) + return; + break; + } + + if (changed & BSS_CHANGED_BEACON_ENABLED) { + if (local->sw_scanning) { + sdata->vif.bss_conf.enable_beacon = false; + } else { + /* + * Beacon should be enabled, but AP mode must + * check whether there is a beacon configured. + */ + switch (sdata->vif.type) { + case NL80211_IFTYPE_AP: + sdata->vif.bss_conf.enable_beacon = + !!rcu_dereference(sdata->u.ap.beacon); + break; + case NL80211_IFTYPE_ADHOC: + sdata->vif.bss_conf.enable_beacon = + !!rcu_dereference(sdata->u.ibss.presp); + break; + case NL80211_IFTYPE_MESH_POINT: + sdata->vif.bss_conf.enable_beacon = true; + break; + default: + /* not reached */ + WARN_ON(1); + break; + } + } + } + if (local->ops->bss_info_changed) local->ops->bss_info_changed(local_to_hw(local), &sdata->vif, diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 9a3e5de0410..9000b01a167 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -417,7 +417,7 @@ static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata, free_plinks = mesh_plink_availables(sdata); if (free_plinks != sdata->u.mesh.accepting_plinks) - ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON); + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); ifmsh->housekeeping = false; mod_timer(&ifmsh->housekeeping_timer, @@ -432,8 +432,8 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) ifmsh->housekeeping = true; queue_work(local->hw.workqueue, &ifmsh->work); - ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON | - IEEE80211_IFCC_BEACON_ENABLED); + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON | + BSS_CHANGED_BEACON_ENABLED); } void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index bfd571e6f22..c7971196d9d 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2289,12 +2289,8 @@ int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid) ifmgd->flags &= ~IEEE80211_STA_BSSID_SET; } - if (netif_running(sdata->dev)) { - if (ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID)) { - printk(KERN_DEBUG "%s: Failed to config new BSSID to " - "the low-level driver\n", sdata->dev->name); - } - } + if (netif_running(sdata->dev)) + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID); return ieee80211_sta_commit(sdata); } diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 086d21645cb..04e270abdd2 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -346,8 +346,8 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) if (sdata->vif.type == NL80211_IFTYPE_AP || sdata->vif.type == NL80211_IFTYPE_ADHOC || sdata->vif.type == NL80211_IFTYPE_MESH_POINT) - ieee80211_if_config(sdata, - IEEE80211_IFCC_BEACON_ENABLED); + ieee80211_bss_info_change_notify( + sdata, BSS_CHANGED_BEACON_ENABLED); } mutex_unlock(&local->iflist_mtx); @@ -387,8 +387,8 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local) if (sdata->vif.type == NL80211_IFTYPE_AP || sdata->vif.type == NL80211_IFTYPE_ADHOC || sdata->vif.type == NL80211_IFTYPE_MESH_POINT) - ieee80211_if_config(sdata, - IEEE80211_IFCC_BEACON_ENABLED); + ieee80211_bss_info_change_notify( + sdata, BSS_CHANGED_BEACON_ENABLED); if (sdata->vif.type == NL80211_IFTYPE_STATION) { if (sdata->u.mgd.flags & IEEE80211_STA_ASSOCIATED) { diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 61876eb50b4..2cde9bbfe7d 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1063,24 +1063,13 @@ int ieee80211_reconfig(struct ieee80211_local *local) switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: /* disable beacon change bits */ - changed &= ~IEEE80211_IFCC_BEACON; + changed &= ~(BSS_CHANGED_BEACON | + BSS_CHANGED_BEACON_ENABLED); /* fall through */ case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_AP: case NL80211_IFTYPE_MESH_POINT: - /* - * Driver's config_interface can fail if rfkill is - * enabled. Accommodate this return code. - * FIXME: When mac80211 has knowledge of rfkill - * state the code below can change back to: - * WARN(ieee80211_if_config(sdata, changed)); - * ieee80211_bss_info_change_notify(sdata, ~0); - */ - if (ieee80211_if_config(sdata, changed)) - printk(KERN_DEBUG "%s: failed to configure interface during resume\n", - sdata->dev->name); - else - ieee80211_bss_info_change_notify(sdata, ~0); + ieee80211_bss_info_change_notify(sdata, changed); break; case NL80211_IFTYPE_WDS: break; -- cgit v1.2.3-70-g09d2 From 5cff20e6c5a6591a79d3b027af222870f52bb550 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 29 Apr 2009 12:26:17 +0200 Subject: mac80211: tell driver when idle When we aren't doing anything in mac80211, we can turn off much of the hardware, depending on the driver/hw. Not doing anything, aka being idle, means: * no monitor interfaces * no AP/mesh/wds interfaces * any station interfaces are in DISABLED state * any IBSS interfaces aren't trying to be in a network * we aren't trying to scan By creating a new function that verifies these conditions and calling it at strategic points where the states of those conditions change, we can easily make mac80211 tell the driver when we are idle to save power. Additionally, this fixes a small quirk where a recalculated powersave state is passed to the driver even if the hardware is about to stopped completely. This patch intentionally doesn't touch radio_enabled because that is currently implemented to be a soft rfkill which is inappropriate here when we need to be able to wake up with low latency. One thing I'm not entirely sure about is this: phy0: device no longer idle - in use wlan0: direct probe to AP 00:11:24:91:07:4d try 1 wlan0 direct probe responded wlan0: authenticate with AP 00:11:24:91:07:4d wlan0: authenticated > phy0: device now idle > phy0: device no longer idle - in use wlan0: associate with AP 00:11:24:91:07:4d wlan0: RX AssocResp from 00:11:24:91:07:4d (capab=0x401 status=0 aid=1) wlan0: associated Is it appropriate to go into idle state for a short time when we have just authenticated, but not associated yet? This happens only with the userspace SME, because we cannot really know how long it will wait before asking us to associate. Would going idle after a short timeout be more appropriate? We may need to revisit this, depending on what happens. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/mac80211_hwsim.c | 6 ++- include/net/mac80211.h | 8 ++++ net/mac80211/ibss.c | 5 +++ net/mac80211/ieee80211_i.h | 2 + net/mac80211/iface.c | 78 ++++++++++++++++++++++++++++++++++- net/mac80211/mlme.c | 11 +++++ net/mac80211/scan.c | 17 ++++---- 7 files changed, 113 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 8fa6e6ce570..b1213b6a6b9 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -553,9 +553,11 @@ static int mac80211_hwsim_config(struct ieee80211_hw *hw, u32 changed) struct mac80211_hwsim_data *data = hw->priv; struct ieee80211_conf *conf = &hw->conf; - printk(KERN_DEBUG "%s:%s (freq=%d radio_enabled=%d)\n", + printk(KERN_DEBUG "%s:%s (freq=%d radio_enabled=%d idle=%d ps=%d)\n", wiphy_name(hw->wiphy), __func__, - conf->channel->center_freq, conf->radio_enabled); + conf->channel->center_freq, conf->radio_enabled, + !!(conf->flags & IEEE80211_CONF_IDLE), + !!(conf->flags & IEEE80211_CONF_PS)); data->channel = conf->channel; data->radio_enabled = conf->radio_enabled; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 7806e22f4ac..38dc1cd1027 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -533,10 +533,16 @@ struct ieee80211_rx_status { * * @IEEE80211_CONF_RADIOTAP: add radiotap header at receive time (if supported) * @IEEE80211_CONF_PS: Enable 802.11 power save mode (managed mode only) + * @IEEE80211_CONF_IDLE: The device is running, but idle; if the flag is set + * the driver should be prepared to handle configuration requests but + * may turn the device off as much as possible. Typically, this flag will + * be set when an interface is set UP but not associated or scanning, but + * it can also be unset in that case when monitor interfaces are active. */ enum ieee80211_conf_flags { IEEE80211_CONF_RADIOTAP = (1<<0), IEEE80211_CONF_PS = (1<<1), + IEEE80211_CONF_IDLE = (1<<2), }; @@ -551,6 +557,7 @@ enum ieee80211_conf_flags { * @IEEE80211_CONF_CHANGE_POWER: the TX power changed * @IEEE80211_CONF_CHANGE_CHANNEL: the channel/channel_type changed * @IEEE80211_CONF_CHANGE_RETRY_LIMITS: retry limits changed + * @IEEE80211_CONF_CHANGE_IDLE: Idle flag changed */ enum ieee80211_conf_changed { IEEE80211_CONF_CHANGE_RADIO_ENABLED = BIT(0), @@ -561,6 +568,7 @@ enum ieee80211_conf_changed { IEEE80211_CONF_CHANGE_POWER = BIT(5), IEEE80211_CONF_CHANGE_CHANNEL = BIT(6), IEEE80211_CONF_CHANGE_RETRY_LIMITS = BIT(7), + IEEE80211_CONF_CHANGE_IDLE = BIT(8), }; static inline __deprecated enum ieee80211_conf_changed diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index a8e23232267..aa537681f87 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -862,6 +862,8 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, sdata->u.ibss.ssid_len = params->ssid_len; + ieee80211_recalc_idle(sdata->local); + set_bit(IEEE80211_IBSS_REQ_RUN, &sdata->u.ibss.request); queue_work(sdata->local->hw.workqueue, &sdata->u.ibss.work); @@ -889,6 +891,9 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) skb_queue_purge(&sdata->u.ibss.skb_queue); memset(sdata->u.ibss.bssid, 0, ETH_ALEN); + sdata->u.ibss.ssid_len = 0; + + ieee80211_recalc_idle(sdata->local); return 0; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 236ea098bb6..03e0d22603c 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -985,6 +985,8 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, enum nl80211_iftype type); void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata); void ieee80211_remove_interfaces(struct ieee80211_local *local); +u32 __ieee80211_recalc_idle(struct ieee80211_local *local); +void ieee80211_recalc_idle(struct ieee80211_local *local); /* tx handling */ void ieee80211_clear_tx_pending(struct ieee80211_local *local); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 256fa19e14e..8b6daf0219f 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -301,6 +301,8 @@ static int ieee80211_open(struct net_device *dev) if (sdata->flags & IEEE80211_SDATA_PROMISC) atomic_inc(&local->iff_promiscs); + hw_reconf_flags |= __ieee80211_recalc_idle(local); + local->open_count++; if (hw_reconf_flags) { ieee80211_hw_config(local, hw_reconf_flags); @@ -548,6 +550,10 @@ static int ieee80211_stop(struct net_device *dev) sdata->bss = NULL; + hw_reconf_flags |= __ieee80211_recalc_idle(local); + + ieee80211_recalc_ps(local, -1); + if (local->open_count == 0) { if (netif_running(local->mdev)) dev_close(local->mdev); @@ -565,8 +571,6 @@ static int ieee80211_stop(struct net_device *dev) hw_reconf_flags = 0; } - ieee80211_recalc_ps(local, -1); - /* do after stop to avoid reconfiguring when we stop anyway */ if (hw_reconf_flags) ieee80211_hw_config(local, hw_reconf_flags); @@ -892,3 +896,73 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) unregister_netdevice(sdata->dev); } } + +static u32 ieee80211_idle_off(struct ieee80211_local *local, + const char *reason) +{ + if (!(local->hw.conf.flags & IEEE80211_CONF_IDLE)) + return 0; + +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + printk(KERN_DEBUG "%s: device no longer idle - %s\n", + wiphy_name(local->hw.wiphy), reason); +#endif + + local->hw.conf.flags &= ~IEEE80211_CONF_IDLE; + return IEEE80211_CONF_CHANGE_IDLE; +} + +static u32 ieee80211_idle_on(struct ieee80211_local *local) +{ + if (local->hw.conf.flags & IEEE80211_CONF_IDLE) + return 0; + +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + printk(KERN_DEBUG "%s: device now idle\n", + wiphy_name(local->hw.wiphy)); +#endif + + local->hw.conf.flags |= IEEE80211_CONF_IDLE; + return IEEE80211_CONF_CHANGE_IDLE; +} + +u32 __ieee80211_recalc_idle(struct ieee80211_local *local) +{ + struct ieee80211_sub_if_data *sdata; + int count = 0; + + if (local->hw_scanning || local->sw_scanning) + return ieee80211_idle_off(local, "scanning"); + + list_for_each_entry(sdata, &local->interfaces, list) { + if (!netif_running(sdata->dev)) + continue; + /* do not count disabled managed interfaces */ + if (sdata->vif.type == NL80211_IFTYPE_STATION && + sdata->u.mgd.state == IEEE80211_STA_MLME_DISABLED) + continue; + /* do not count unused IBSS interfaces */ + if (sdata->vif.type == NL80211_IFTYPE_ADHOC && + !sdata->u.ibss.ssid_len) + continue; + /* count everything else */ + count++; + } + + if (!count) + return ieee80211_idle_on(local); + else + return ieee80211_idle_off(local, "in use"); + + return 0; +} + +void ieee80211_recalc_idle(struct ieee80211_local *local) +{ + u32 chg; + + mutex_lock(&local->iflist_mtx); + chg = __ieee80211_recalc_idle(local); + mutex_unlock(&local->iflist_mtx); + ieee80211_hw_config(local, chg); +} diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 2ded4766d01..5509c5aa6be 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -890,6 +890,7 @@ static void ieee80211_direct_probe(struct ieee80211_sub_if_data *sdata) printk(KERN_DEBUG "%s: direct probe to AP %pM timed out\n", sdata->dev->name, ifmgd->bssid); ifmgd->state = IEEE80211_STA_MLME_DISABLED; + ieee80211_recalc_idle(local); cfg80211_send_auth_timeout(sdata->dev, ifmgd->bssid); /* @@ -938,6 +939,7 @@ static void ieee80211_authenticate(struct ieee80211_sub_if_data *sdata) " timed out\n", sdata->dev->name, ifmgd->bssid); ifmgd->state = IEEE80211_STA_MLME_DISABLED; + ieee80211_recalc_idle(local); cfg80211_send_auth_timeout(sdata->dev, ifmgd->bssid); ieee80211_rx_bss_remove(sdata, ifmgd->bssid, sdata->local->hw.conf.channel->center_freq, @@ -1041,6 +1043,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); + ieee80211_recalc_idle(local); + /* channel(_type) changes are handled by ieee80211_hw_config */ local->oper_channel_type = NL80211_CHAN_NO_HT; @@ -1121,6 +1125,7 @@ static void ieee80211_associate(struct ieee80211_sub_if_data *sdata) " timed out\n", sdata->dev->name, ifmgd->bssid); ifmgd->state = IEEE80211_STA_MLME_DISABLED; + ieee80211_recalc_idle(local); cfg80211_send_assoc_timeout(sdata->dev, ifmgd->bssid); ieee80211_rx_bss_remove(sdata, ifmgd->bssid, sdata->local->hw.conf.channel->center_freq, @@ -1141,6 +1146,7 @@ static void ieee80211_associate(struct ieee80211_sub_if_data *sdata) printk(KERN_DEBUG "%s: mismatch in privacy configuration and " "mixed-cell disabled - abort association\n", sdata->dev->name); ifmgd->state = IEEE80211_STA_MLME_DISABLED; + ieee80211_recalc_idle(local); return; } @@ -1279,6 +1285,7 @@ static void ieee80211_auth_completed(struct ieee80211_sub_if_data *sdata) if (ifmgd->flags & IEEE80211_STA_EXT_SME) { /* Wait for SME to request association */ ifmgd->state = IEEE80211_STA_MLME_DISABLED; + ieee80211_recalc_idle(sdata->local); } else ieee80211_associate(sdata); } @@ -1515,6 +1522,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, if (ifmgd->flags & IEEE80211_STA_EXT_SME) { /* Wait for SME to decide what to do next */ ifmgd->state = IEEE80211_STA_MLME_DISABLED; + ieee80211_recalc_idle(local); } return; } @@ -2083,6 +2091,7 @@ static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata) } else { ifmgd->assoc_scan_tries = 0; ifmgd->state = IEEE80211_STA_MLME_DISABLED; + ieee80211_recalc_idle(local); } } return -1; @@ -2126,6 +2135,8 @@ static void ieee80211_sta_work(struct work_struct *work) } else if (!test_and_clear_bit(IEEE80211_STA_REQ_RUN, &ifmgd->request)) return; + ieee80211_recalc_idle(local); + switch (ifmgd->state) { case IEEE80211_STA_MLME_DISABLED: break; diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 127bd54e0e3..c99ef8d04d3 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -302,17 +302,9 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) /* we only have to protect scan_req and hw/sw scan */ mutex_unlock(&local->scan_mtx); - if (was_hw_scan) { - /* - * Somebody might have requested channel change during scan - * that we won't have acted upon, try now. ieee80211_hw_config - * will set the flag based on actual changes. - */ - ieee80211_hw_config(local, 0); - goto done; - } - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); + if (was_hw_scan) + goto done; netif_tx_lock_bh(local->mdev); netif_addr_lock(local->mdev); @@ -351,6 +343,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) mutex_unlock(&local->iflist_mtx); done: + ieee80211_recalc_idle(local); ieee80211_mlme_notify_scan_completed(local); ieee80211_ibss_notify_scan_completed(local); ieee80211_mesh_notify_scan_completed(local); @@ -471,6 +464,8 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, * dependency, so that the scan completed calls * have more locking freedom. */ + + ieee80211_recalc_idle(local); mutex_unlock(&local->scan_mtx); if (local->ops->hw_scan) @@ -487,6 +482,8 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, } else local->sw_scanning = false; + ieee80211_recalc_idle(local); + local->scan_req = NULL; local->scan_sdata = NULL; } -- cgit v1.2.3-70-g09d2 From 2f01a1f58889fbfeb68b1bc1b52e4197f3333490 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Wed, 29 Apr 2009 23:33:31 +0300 Subject: wl12xx: add driver wl12xx is a driver for TI wl1251 802.11 chipset designed for embedded devices, supporting both SDIO and SPI busses. Currently the driver supports only SPI. Adding support 1253 (the 5 GHz version) should be relatively easy. More information here: http://focus.ti.com/general/docs/wtbu/wtbuproductcontent.tsp?contentId=4711&navigationId=12494&templateId=6123 (Collapsed original sequence of pre-merge patches into single commit for initial merge. -- JWL) Signed-off-by: Kalle Valo Signed-off-by: Bob Copeland Signed-off-by: John W. Linville --- drivers/net/wireless/Kconfig | 1 + drivers/net/wireless/Makefile | 2 + drivers/net/wireless/wl12xx/Kconfig | 11 + drivers/net/wireless/wl12xx/Makefile | 4 + drivers/net/wireless/wl12xx/acx.c | 689 ++++++++++++++ drivers/net/wireless/wl12xx/acx.h | 1245 +++++++++++++++++++++++++ drivers/net/wireless/wl12xx/boot.c | 295 ++++++ drivers/net/wireless/wl12xx/boot.h | 40 + drivers/net/wireless/wl12xx/cmd.c | 353 ++++++++ drivers/net/wireless/wl12xx/cmd.h | 265 ++++++ drivers/net/wireless/wl12xx/debugfs.c | 508 +++++++++++ drivers/net/wireless/wl12xx/debugfs.h | 33 + drivers/net/wireless/wl12xx/event.c | 127 +++ drivers/net/wireless/wl12xx/event.h | 121 +++ drivers/net/wireless/wl12xx/init.c | 200 ++++ drivers/net/wireless/wl12xx/init.h | 40 + drivers/net/wireless/wl12xx/main.c | 1358 ++++++++++++++++++++++++++++ drivers/net/wireless/wl12xx/ps.c | 151 ++++ drivers/net/wireless/wl12xx/ps.h | 36 + drivers/net/wireless/wl12xx/reg.h | 745 +++++++++++++++ drivers/net/wireless/wl12xx/rx.c | 208 +++++ drivers/net/wireless/wl12xx/rx.h | 122 +++ drivers/net/wireless/wl12xx/spi.c | 358 ++++++++ drivers/net/wireless/wl12xx/spi.h | 109 +++ drivers/net/wireless/wl12xx/tx.c | 557 ++++++++++++ drivers/net/wireless/wl12xx/tx.h | 215 +++++ drivers/net/wireless/wl12xx/wl1251.c | 709 +++++++++++++++ drivers/net/wireless/wl12xx/wl1251.h | 165 ++++ drivers/net/wireless/wl12xx/wl12xx.h | 409 +++++++++ drivers/net/wireless/wl12xx/wl12xx_80211.h | 156 ++++ include/linux/spi/wl12xx.h | 31 + 31 files changed, 9263 insertions(+) create mode 100644 drivers/net/wireless/wl12xx/Kconfig create mode 100644 drivers/net/wireless/wl12xx/Makefile create mode 100644 drivers/net/wireless/wl12xx/acx.c create mode 100644 drivers/net/wireless/wl12xx/acx.h create mode 100644 drivers/net/wireless/wl12xx/boot.c create mode 100644 drivers/net/wireless/wl12xx/boot.h create mode 100644 drivers/net/wireless/wl12xx/cmd.c create mode 100644 drivers/net/wireless/wl12xx/cmd.h create mode 100644 drivers/net/wireless/wl12xx/debugfs.c create mode 100644 drivers/net/wireless/wl12xx/debugfs.h create mode 100644 drivers/net/wireless/wl12xx/event.c create mode 100644 drivers/net/wireless/wl12xx/event.h create mode 100644 drivers/net/wireless/wl12xx/init.c create mode 100644 drivers/net/wireless/wl12xx/init.h create mode 100644 drivers/net/wireless/wl12xx/main.c create mode 100644 drivers/net/wireless/wl12xx/ps.c create mode 100644 drivers/net/wireless/wl12xx/ps.h create mode 100644 drivers/net/wireless/wl12xx/reg.h create mode 100644 drivers/net/wireless/wl12xx/rx.c create mode 100644 drivers/net/wireless/wl12xx/rx.h create mode 100644 drivers/net/wireless/wl12xx/spi.c create mode 100644 drivers/net/wireless/wl12xx/spi.h create mode 100644 drivers/net/wireless/wl12xx/tx.c create mode 100644 drivers/net/wireless/wl12xx/tx.h create mode 100644 drivers/net/wireless/wl12xx/wl1251.c create mode 100644 drivers/net/wireless/wl12xx/wl1251.h create mode 100644 drivers/net/wireless/wl12xx/wl12xx.h create mode 100644 drivers/net/wireless/wl12xx/wl12xx_80211.h create mode 100644 include/linux/spi/wl12xx.h (limited to 'include') diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig index 2d8434f409b..91be3e7bf13 100644 --- a/drivers/net/wireless/Kconfig +++ b/drivers/net/wireless/Kconfig @@ -500,5 +500,6 @@ source "drivers/net/wireless/b43legacy/Kconfig" source "drivers/net/wireless/zd1211rw/Kconfig" source "drivers/net/wireless/rt2x00/Kconfig" source "drivers/net/wireless/orinoco/Kconfig" +source "drivers/net/wireless/wl12xx/Kconfig" endmenu diff --git a/drivers/net/wireless/Makefile b/drivers/net/wireless/Makefile index 0625e91b599..f2b1861e6bc 100644 --- a/drivers/net/wireless/Makefile +++ b/drivers/net/wireless/Makefile @@ -58,3 +58,5 @@ obj-$(CONFIG_P54_COMMON) += p54/ obj-$(CONFIG_ATH_COMMON) += ath/ obj-$(CONFIG_MAC80211_HWSIM) += mac80211_hwsim.o + +obj-$(CONFIG_WL12XX) += wl12xx/ diff --git a/drivers/net/wireless/wl12xx/Kconfig b/drivers/net/wireless/wl12xx/Kconfig new file mode 100644 index 00000000000..20a9633569f --- /dev/null +++ b/drivers/net/wireless/wl12xx/Kconfig @@ -0,0 +1,11 @@ +config WL12XX + tristate "TI wl1251/wl1271 support" + depends on MAC80211 && WLAN_80211 && SPI_MASTER && EXPERIMENTAL + select FW_LOADER + select CRC7 + ---help--- + This module adds support for wireless adapters based on + TI wl1251/wl1271 chipsets. + + If you choose to build a module, it'll be called wl12xx. Say N if + unsure. diff --git a/drivers/net/wireless/wl12xx/Makefile b/drivers/net/wireless/wl12xx/Makefile new file mode 100644 index 00000000000..d43de27dc54 --- /dev/null +++ b/drivers/net/wireless/wl12xx/Makefile @@ -0,0 +1,4 @@ +wl12xx-objs = main.o spi.o event.o tx.o rx.o \ + ps.o cmd.o acx.o boot.o init.o wl1251.o \ + debugfs.o +obj-$(CONFIG_WL12XX) += wl12xx.o diff --git a/drivers/net/wireless/wl12xx/acx.c b/drivers/net/wireless/wl12xx/acx.c new file mode 100644 index 00000000000..1cfd458ad5a --- /dev/null +++ b/drivers/net/wireless/wl12xx/acx.c @@ -0,0 +1,689 @@ +#include "acx.h" + +#include +#include +#include + +#include "wl12xx.h" +#include "wl12xx_80211.h" +#include "reg.h" +#include "spi.h" +#include "ps.h" + +int wl12xx_acx_frame_rates(struct wl12xx *wl, u8 ctrl_rate, u8 ctrl_mod, + u8 mgt_rate, u8 mgt_mod) +{ + int ret; + struct acx_fw_gen_frame_rates rates; + + wl12xx_debug(DEBUG_ACX, "acx frame rates"); + + rates.header.id = ACX_FW_GEN_FRAME_RATES; + rates.header.len = sizeof(struct acx_fw_gen_frame_rates) - + sizeof(struct acx_header); + + rates.tx_ctrl_frame_rate = ctrl_rate; + rates.tx_ctrl_frame_mod = ctrl_mod; + rates.tx_mgt_frame_rate = mgt_rate; + rates.tx_mgt_frame_mod = mgt_mod; + + ret = wl12xx_cmd_configure(wl, &rates, sizeof(rates)); + if (ret < 0) { + wl12xx_error("Failed to set FW rates and modulation"); + return ret; + } + + return 0; +} + + +int wl12xx_acx_station_id(struct wl12xx *wl) +{ + int ret, i; + struct dot11_station_id mac; + + wl12xx_debug(DEBUG_ACX, "acx dot11_station_id"); + + mac.header.id = DOT11_STATION_ID; + mac.header.len = sizeof(mac) - sizeof(struct acx_header); + + for (i = 0; i < ETH_ALEN; i++) + mac.mac[i] = wl->mac_addr[ETH_ALEN - 1 - i]; + + ret = wl12xx_cmd_configure(wl, &mac, sizeof(mac)); + if (ret < 0) + return ret; + + return 0; +} + +int wl12xx_acx_default_key(struct wl12xx *wl, u8 key_id) +{ + struct acx_dot11_default_key default_key; + int ret; + + wl12xx_debug(DEBUG_ACX, "acx dot11_default_key (%d)", key_id); + + default_key.header.id = DOT11_DEFAULT_KEY; + default_key.header.len = sizeof(default_key) - + sizeof(struct acx_header); + + default_key.id = key_id; + + ret = wl12xx_cmd_configure(wl, &default_key, sizeof(default_key)); + if (ret < 0) { + wl12xx_error("Couldnt set default key"); + return ret; + } + + wl->default_key = key_id; + + return 0; +} + +int wl12xx_acx_wake_up_conditions(struct wl12xx *wl, u8 listen_interval) +{ + struct acx_wake_up_condition wake_up; + + wl12xx_debug(DEBUG_ACX, "acx wake up conditions"); + + wake_up.header.id = ACX_WAKE_UP_CONDITIONS; + wake_up.header.len = sizeof(wake_up) - sizeof(struct acx_header); + + wake_up.wake_up_event = WAKE_UP_EVENT_DTIM_BITMAP; + wake_up.listen_interval = listen_interval; + + return wl12xx_cmd_configure(wl, &wake_up, sizeof(wake_up)); +} + +int wl12xx_acx_sleep_auth(struct wl12xx *wl, u8 sleep_auth) +{ + int ret; + struct acx_sleep_auth auth; + + wl12xx_debug(DEBUG_ACX, "acx sleep auth"); + + auth.header.id = ACX_SLEEP_AUTH; + auth.header.len = sizeof(auth) - sizeof(struct acx_header); + + auth.sleep_auth = sleep_auth; + + ret = wl12xx_cmd_configure(wl, &auth, sizeof(auth)); + if (ret < 0) + return ret; + + return 0; +} + +int wl12xx_acx_fw_version(struct wl12xx *wl, char *buf, size_t len) +{ + struct wl12xx_command cmd; + struct acx_revision *rev; + int ret; + + wl12xx_debug(DEBUG_ACX, "acx fw rev"); + + memset(&cmd, 0, sizeof(cmd)); + + ret = wl12xx_cmd_interrogate(wl, ACX_FW_REV, sizeof(*rev), &cmd); + if (ret < 0) { + wl12xx_warning("ACX_FW_REV interrogate failed"); + return ret; + } + + rev = (struct acx_revision *) &cmd.parameters; + + /* be careful with the buffer sizes */ + strncpy(buf, rev->fw_version, min(len, sizeof(rev->fw_version))); + + /* + * if the firmware version string is exactly + * sizeof(rev->fw_version) long or fw_len is less than + * sizeof(rev->fw_version) it won't be null terminated + */ + buf[min(len, sizeof(rev->fw_version)) - 1] = '\0'; + + return 0; +} + +int wl12xx_acx_tx_power(struct wl12xx *wl, int power) +{ + struct acx_current_tx_power ie; + int ret; + + wl12xx_debug(DEBUG_ACX, "acx dot11_cur_tx_pwr"); + + if (power < 0 || power > 25) + return -EINVAL; + + memset(&ie, 0, sizeof(ie)); + + ie.header.id = DOT11_CUR_TX_PWR; + ie.header.len = sizeof(ie) - sizeof(struct acx_header); + ie.current_tx_power = power * 10; + + ret = wl12xx_cmd_configure(wl, &ie, sizeof(ie)); + if (ret < 0) { + wl12xx_warning("configure of tx power failed: %d", ret); + return ret; + } + + return 0; +} + +int wl12xx_acx_feature_cfg(struct wl12xx *wl) +{ + struct acx_feature_config feature; + int ret; + + wl12xx_debug(DEBUG_ACX, "acx feature cfg"); + + memset(&feature, 0, sizeof(feature)); + + feature.header.id = ACX_FEATURE_CFG; + feature.header.len = sizeof(feature) - sizeof(struct acx_header); + + /* DF_ENCRYPTION_DISABLE and DF_SNIFF_MODE_ENABLE are disabled */ + feature.data_flow_options = 0; + feature.options = 0; + + ret = wl12xx_cmd_configure(wl, &feature, sizeof(feature)); + if (ret < 0) + wl12xx_error("Couldnt set HW encryption"); + + return ret; +} + +int wl12xx_acx_mem_map(struct wl12xx *wl, void *mem_map, size_t len) +{ + struct wl12xx_command cmd; + int ret; + + wl12xx_debug(DEBUG_ACX, "acx mem map"); + + ret = wl12xx_cmd_interrogate(wl, ACX_MEM_MAP, len, &cmd); + if (ret < 0) + return ret; + else if (cmd.status != CMD_STATUS_SUCCESS) + return -EIO; + + memcpy(mem_map, &cmd.parameters, len); + + return 0; +} + +int wl12xx_acx_data_path_params(struct wl12xx *wl, + struct acx_data_path_params_resp *data_path) +{ + struct acx_data_path_params params; + struct wl12xx_command cmd; + int ret; + + wl12xx_debug(DEBUG_ACX, "acx data path params"); + + params.rx_packet_ring_chunk_size = DP_RX_PACKET_RING_CHUNK_SIZE; + params.tx_packet_ring_chunk_size = DP_TX_PACKET_RING_CHUNK_SIZE; + + params.rx_packet_ring_chunk_num = DP_RX_PACKET_RING_CHUNK_NUM; + params.tx_packet_ring_chunk_num = DP_TX_PACKET_RING_CHUNK_NUM; + + params.tx_complete_threshold = 1; + + params.tx_complete_ring_depth = FW_TX_CMPLT_BLOCK_SIZE; + + params.tx_complete_timeout = DP_TX_COMPLETE_TIME_OUT; + + params.header.id = ACX_DATA_PATH_PARAMS; + params.header.len = sizeof(params) - sizeof(struct acx_header); + + ret = wl12xx_cmd_configure(wl, ¶ms, sizeof(params)); + if (ret < 0) + return ret; + + + ret = wl12xx_cmd_interrogate(wl, ACX_DATA_PATH_PARAMS, + sizeof(struct acx_data_path_params_resp), + &cmd); + + if (ret < 0) { + wl12xx_warning("failed to read data path parameters: %d", ret); + return ret; + } else if (cmd.status != CMD_STATUS_SUCCESS) { + wl12xx_warning("data path parameter acx status failed"); + return -EIO; + } + + memcpy(data_path, &cmd.parameters, sizeof(*data_path)); + + return 0; +} + +int wl12xx_acx_rx_msdu_life_time(struct wl12xx *wl, u32 life_time) +{ + struct rx_msdu_lifetime msdu_lifetime; + int ret; + + wl12xx_debug(DEBUG_ACX, "acx rx msdu life time"); + + msdu_lifetime.header.id = DOT11_RX_MSDU_LIFE_TIME; + msdu_lifetime.header.len = sizeof(msdu_lifetime) - + sizeof(struct acx_header); + msdu_lifetime.lifetime = life_time; + + ret = wl12xx_cmd_configure(wl, &msdu_lifetime, sizeof(msdu_lifetime)); + if (ret < 0) { + wl12xx_warning("failed to set rx msdu life time: %d", ret); + return ret; + } + + return 0; +} + +int wl12xx_acx_rx_config(struct wl12xx *wl, u32 config, u32 filter) +{ + struct acx_rx_config rx_config; + int ret; + + wl12xx_debug(DEBUG_ACX, "acx rx config"); + + rx_config.header.id = ACX_RX_CFG; + rx_config.header.len = sizeof(rx_config) - sizeof(struct acx_header); + rx_config.config_options = config; + rx_config.filter_options = filter; + + ret = wl12xx_cmd_configure(wl, &rx_config, sizeof(rx_config)); + if (ret < 0) { + wl12xx_warning("failed to set rx config: %d", ret); + return ret; + } + + return 0; +} + +int wl12xx_acx_pd_threshold(struct wl12xx *wl) +{ + struct acx_packet_detection packet_detection; + int ret; + + wl12xx_debug(DEBUG_ACX, "acx data pd threshold"); + + /* FIXME: threshold value not set */ + packet_detection.header.id = ACX_PD_THRESHOLD; + packet_detection.header.len = sizeof(packet_detection) - + sizeof(struct acx_header); + + ret = wl12xx_cmd_configure(wl, &packet_detection, + sizeof(packet_detection)); + if (ret < 0) { + wl12xx_warning("failed to set pd threshold: %d", ret); + return ret; + } + + return 0; +} + +int wl12xx_acx_slot(struct wl12xx *wl, enum acx_slot_type slot_time) +{ + struct acx_slot slot; + int ret; + + wl12xx_debug(DEBUG_ACX, "acx slot"); + + slot.header.id = ACX_SLOT; + slot.header.len = sizeof(slot) - sizeof(struct acx_header); + + slot.wone_index = STATION_WONE_INDEX; + slot.slot_time = slot_time; + + ret = wl12xx_cmd_configure(wl, &slot, sizeof(slot)); + if (ret < 0) { + wl12xx_warning("failed to set slot time: %d", ret); + return ret; + } + + return 0; +} + +int wl12xx_acx_group_address_tbl(struct wl12xx *wl) +{ + struct multicast_grp_addr_start multicast; + int ret; + + wl12xx_debug(DEBUG_ACX, "acx group address tbl"); + + /* MAC filtering */ + multicast.header.id = DOT11_GROUP_ADDRESS_TBL; + multicast.header.len = sizeof(multicast) - sizeof(struct acx_header); + + multicast.enabled = 0; + multicast.num_groups = 0; + memset(multicast.mac_table, 0, ADDRESS_GROUP_MAX_LEN); + + ret = wl12xx_cmd_configure(wl, &multicast, sizeof(multicast)); + if (ret < 0) { + wl12xx_warning("failed to set group addr table: %d", ret); + return ret; + } + + return 0; +} + +int wl12xx_acx_service_period_timeout(struct wl12xx *wl) +{ + struct acx_rx_timeout rx_timeout; + int ret; + + wl12xx_debug(DEBUG_ACX, "acx service period timeout"); + + /* RX timeout */ + rx_timeout.header.id = ACX_SERVICE_PERIOD_TIMEOUT; + rx_timeout.header.len = sizeof(rx_timeout) - sizeof(struct acx_header); + + rx_timeout.ps_poll_timeout = RX_TIMEOUT_PS_POLL_DEF; + rx_timeout.upsd_timeout = RX_TIMEOUT_UPSD_DEF; + + ret = wl12xx_cmd_configure(wl, &rx_timeout, sizeof(rx_timeout)); + if (ret < 0) { + wl12xx_warning("failed to set service period timeout: %d", + ret); + return ret; + } + + return 0; +} + +int wl12xx_acx_rts_threshold(struct wl12xx *wl, u16 rts_threshold) +{ + struct acx_rts_threshold rts; + int ret; + + wl12xx_debug(DEBUG_ACX, "acx rts threshold"); + + rts.header.id = DOT11_RTS_THRESHOLD; + rts.header.len = sizeof(rts) - sizeof(struct acx_header); + + rts.threshold = rts_threshold; + + ret = wl12xx_cmd_configure(wl, &rts, sizeof(rts)); + if (ret < 0) { + wl12xx_warning("failed to set rts threshold: %d", ret); + return ret; + } + + return 0; +} + +int wl12xx_acx_beacon_filter_opt(struct wl12xx *wl) +{ + struct acx_beacon_filter_option beacon_filter; + int ret; + + wl12xx_debug(DEBUG_ACX, "acx beacon filter opt"); + + beacon_filter.header.id = ACX_BEACON_FILTER_OPT; + beacon_filter.header.len = sizeof(beacon_filter) - + sizeof(struct acx_header); + + beacon_filter.enable = 0; + beacon_filter.max_num_beacons = 0; + + ret = wl12xx_cmd_configure(wl, &beacon_filter, sizeof(beacon_filter)); + if (ret < 0) { + wl12xx_warning("failed to set beacon filter opt: %d", ret); + return ret; + } + + return 0; +} + +int wl12xx_acx_beacon_filter_table(struct wl12xx *wl) +{ + struct acx_beacon_filter_ie_table ie_table; + int ret; + + wl12xx_debug(DEBUG_ACX, "acx beacon filter table"); + + ie_table.header.id = ACX_BEACON_FILTER_TABLE; + ie_table.header.len = sizeof(ie_table) - sizeof(struct acx_header); + + ie_table.num_ie = 0; + memset(ie_table.table, 0, BEACON_FILTER_TABLE_MAX_SIZE); + + ret = wl12xx_cmd_configure(wl, &ie_table, sizeof(ie_table)); + if (ret < 0) { + wl12xx_warning("failed to set beacon filter table: %d", ret); + return ret; + } + + return 0; +} + +int wl12xx_acx_sg_enable(struct wl12xx *wl) +{ + struct acx_bt_wlan_coex pta; + int ret; + + wl12xx_debug(DEBUG_ACX, "acx sg enable"); + + pta.header.id = ACX_SG_ENABLE; + pta.header.len = sizeof(pta) - sizeof(struct acx_header); + + pta.enable = SG_ENABLE; + + ret = wl12xx_cmd_configure(wl, &pta, sizeof(pta)); + if (ret < 0) { + wl12xx_warning("failed to set softgemini enable: %d", ret); + return ret; + } + + return 0; +} + +int wl12xx_acx_sg_cfg(struct wl12xx *wl) +{ + struct acx_bt_wlan_coex_param param; + int ret; + + wl12xx_debug(DEBUG_ACX, "acx sg cfg"); + + /* BT-WLAN coext parameters */ + param.header.id = ACX_SG_CFG; + param.header.len = sizeof(param) - sizeof(struct acx_header); + + param.min_rate = RATE_INDEX_24MBPS; + param.bt_hp_max_time = PTA_BT_HP_MAXTIME_DEF; + param.wlan_hp_max_time = PTA_WLAN_HP_MAX_TIME_DEF; + param.sense_disable_timer = PTA_SENSE_DISABLE_TIMER_DEF; + param.rx_time_bt_hp = PTA_PROTECTIVE_RX_TIME_DEF; + param.tx_time_bt_hp = PTA_PROTECTIVE_TX_TIME_DEF; + param.rx_time_bt_hp_fast = PTA_PROTECTIVE_RX_TIME_FAST_DEF; + param.tx_time_bt_hp_fast = PTA_PROTECTIVE_TX_TIME_FAST_DEF; + param.wlan_cycle_fast = PTA_CYCLE_TIME_FAST_DEF; + param.bt_anti_starvation_period = PTA_ANTI_STARVE_PERIOD_DEF; + param.next_bt_lp_packet = PTA_TIMEOUT_NEXT_BT_LP_PACKET_DEF; + param.wake_up_beacon = PTA_TIME_BEFORE_BEACON_DEF; + param.hp_dm_max_guard_time = PTA_HPDM_MAX_TIME_DEF; + param.next_wlan_packet = PTA_TIME_OUT_NEXT_WLAN_DEF; + param.antenna_type = PTA_ANTENNA_TYPE_DEF; + param.signal_type = PTA_SIGNALING_TYPE_DEF; + param.afh_leverage_on = PTA_AFH_LEVERAGE_ON_DEF; + param.quiet_cycle_num = PTA_NUMBER_QUIET_CYCLE_DEF; + param.max_cts = PTA_MAX_NUM_CTS_DEF; + param.wlan_packets_num = PTA_NUMBER_OF_WLAN_PACKETS_DEF; + param.bt_packets_num = PTA_NUMBER_OF_BT_PACKETS_DEF; + param.missed_rx_avalanche = PTA_RX_FOR_AVALANCHE_DEF; + param.wlan_elp_hp = PTA_ELP_HP_DEF; + param.bt_anti_starvation_cycles = PTA_ANTI_STARVE_NUM_CYCLE_DEF; + param.ack_mode_dual_ant = PTA_ACK_MODE_DEF; + param.pa_sd_enable = PTA_ALLOW_PA_SD_DEF; + param.pta_auto_mode_enable = PTA_AUTO_MODE_NO_CTS_DEF; + param.bt_hp_respected_num = PTA_BT_HP_RESPECTED_DEF; + + ret = wl12xx_cmd_configure(wl, ¶m, sizeof(param)); + if (ret < 0) { + wl12xx_warning("failed to set sg config: %d", ret); + return ret; + } + + return 0; +} + +int wl12xx_acx_cca_threshold(struct wl12xx *wl) +{ + struct acx_energy_detection detection; + int ret; + + wl12xx_debug(DEBUG_ACX, "acx cca threshold"); + + detection.header.id = ACX_CCA_THRESHOLD; + detection.header.len = sizeof(detection) - sizeof(struct acx_header); + + detection.rx_cca_threshold = CCA_THRSH_DISABLE_ENERGY_D; + detection.tx_energy_detection = 0; + + ret = wl12xx_cmd_configure(wl, &detection, sizeof(detection)); + if (ret < 0) { + wl12xx_warning("failed to set cca threshold: %d", ret); + return ret; + } + + return 0; +} + +int wl12xx_acx_bcn_dtim_options(struct wl12xx *wl) +{ + struct acx_beacon_broadcast bb; + int ret; + + wl12xx_debug(DEBUG_ACX, "acx bcn dtim options"); + + bb.header.id = ACX_BCN_DTIM_OPTIONS; + bb.header.len = sizeof(bb) - sizeof(struct acx_header); + + bb.beacon_rx_timeout = BCN_RX_TIMEOUT_DEF_VALUE; + bb.broadcast_timeout = BROADCAST_RX_TIMEOUT_DEF_VALUE; + bb.rx_broadcast_in_ps = RX_BROADCAST_IN_PS_DEF_VALUE; + bb.ps_poll_threshold = CONSECUTIVE_PS_POLL_FAILURE_DEF; + + ret = wl12xx_cmd_configure(wl, &bb, sizeof(bb)); + if (ret < 0) { + wl12xx_warning("failed to set rx config: %d", ret); + return ret; + } + + return 0; +} + +int wl12xx_acx_aid(struct wl12xx *wl, u16 aid) +{ + struct acx_aid acx_aid; + int ret; + + wl12xx_debug(DEBUG_ACX, "acx aid"); + + acx_aid.header.id = ACX_AID; + acx_aid.header.len = sizeof(acx_aid) - sizeof(struct acx_header); + + acx_aid.aid = aid; + + ret = wl12xx_cmd_configure(wl, &acx_aid, sizeof(acx_aid)); + if (ret < 0) { + wl12xx_warning("failed to set aid: %d", ret); + return ret; + } + + return 0; +} + +int wl12xx_acx_event_mbox_mask(struct wl12xx *wl, u32 event_mask) +{ + struct acx_event_mask mask; + int ret; + + wl12xx_debug(DEBUG_ACX, "acx event mbox mask"); + + mask.header.id = ACX_EVENT_MBOX_MASK; + mask.header.len = sizeof(mask) - sizeof(struct acx_header); + + /* high event mask is unused */ + mask.high_event_mask = 0xffffffff; + + mask.event_mask = event_mask; + + ret = wl12xx_cmd_configure(wl, &mask, sizeof(mask)); + if (ret < 0) { + wl12xx_warning("failed to set aid: %d", ret); + return ret; + } + + return 0; +} + +int wl12xx_acx_set_preamble(struct wl12xx *wl, enum acx_preamble_type preamble) +{ + struct acx_preamble ie; + int ret; + + wl12xx_debug(DEBUG_ACX, "acx_set_preamble"); + + memset(&ie, 0, sizeof(ie)); + + ie.header.id = ACX_PREAMBLE_TYPE; + ie.header.len = sizeof(ie) - sizeof(struct acx_header); + ie.preamble = preamble; + ret = wl12xx_cmd_configure(wl, &ie, sizeof(ie)); + if (ret < 0) { + wl12xx_warning("Setting of preamble failed: %d", ret); + return ret; + } + return 0; +} + +int wl12xx_acx_cts_protect(struct wl12xx *wl, + enum acx_ctsprotect_type ctsprotect) +{ + struct acx_ctsprotect ie; + int ret; + + wl12xx_debug(DEBUG_ACX, "acx_set_ctsprotect"); + + memset(&ie, 0, sizeof(ie)); + + ie.header.id = ACX_CTS_PROTECTION; + ie.header.len = sizeof(ie) - sizeof(struct acx_header); + ie.ctsprotect = ctsprotect; + ret = wl12xx_cmd_configure(wl, &ie, sizeof(ie)); + if (ret < 0) { + wl12xx_warning("Setting of ctsprotect failed: %d", ret); + return ret; + } + return 0; +} + +int wl12xx_acx_statistics(struct wl12xx *wl, struct acx_statistics *stats) +{ + struct wl12xx_command *answer; + int ret; + + wl12xx_debug(DEBUG_ACX, "acx statistics"); + + answer = kmalloc(sizeof(*answer), GFP_KERNEL); + if (!answer) { + wl12xx_warning("could not allocate memory for acx statistics"); + ret = -ENOMEM; + goto out; + } + + ret = wl12xx_cmd_interrogate(wl, ACX_STATISTICS, sizeof(*answer), + answer); + if (ret < 0) { + wl12xx_warning("acx statistics failed: %d", ret); + goto out; + } + + memcpy(stats, answer->parameters, sizeof(*stats)); + +out: + kfree(answer); + return ret; +} diff --git a/drivers/net/wireless/wl12xx/acx.h b/drivers/net/wireless/wl12xx/acx.h new file mode 100644 index 00000000000..fb2d2340993 --- /dev/null +++ b/drivers/net/wireless/wl12xx/acx.h @@ -0,0 +1,1245 @@ +/* + * This file is part of wl12xx + * + * Copyright (c) 1998-2007 Texas Instruments Incorporated + * Copyright (C) 2008 Nokia Corporation + * + * Contact: Kalle Valo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#ifndef __WL12XX_ACX_H__ +#define __WL12XX_ACX_H__ + +#include "wl12xx.h" + +/* Target's information element */ +struct acx_header { + u16 id; + u16 len; +}; + +struct acx_error_counter { + struct acx_header header; + + /* The number of PLCP errors since the last time this */ + /* information element was interrogated. This field is */ + /* automatically cleared when it is interrogated.*/ + u32 PLCP_error; + + /* The number of FCS errors since the last time this */ + /* information element was interrogated. This field is */ + /* automatically cleared when it is interrogated.*/ + u32 FCS_error; + + /* The number of MPDUs without PLCP header errors received*/ + /* since the last time this information element was interrogated. */ + /* This field is automatically cleared when it is interrogated.*/ + u32 valid_frame; + + /* the number of missed sequence numbers in the squentially */ + /* values of frames seq numbers */ + u32 seq_num_miss; +} __attribute__ ((packed)); + +struct acx_revision { + struct acx_header header; + + /* + * The WiLink firmware version, an ASCII string x.x.x.x, + * that uniquely identifies the current firmware. + * The left most digit is incremented each time a + * significant change is made to the firmware, such as + * code redesign or new platform support. + * The second digit is incremented when major enhancements + * are added or major fixes are made. + * The third digit is incremented for each GA release. + * The fourth digit is incremented for each build. + * The first two digits identify a firmware release version, + * in other words, a unique set of features. + * The first three digits identify a GA release. + */ + char fw_version[20]; + + /* + * This 4 byte field specifies the WiLink hardware version. + * bits 0 - 15: Reserved. + * bits 16 - 23: Version ID - The WiLink version ID + * (1 = first spin, 2 = second spin, and so on). + * bits 24 - 31: Chip ID - The WiLink chip ID. + */ + u32 hw_version; +} __attribute__ ((packed)); + +enum wl12xx_psm_mode { + /* Active mode */ + WL12XX_PSM_CAM = 0, + + /* Power save mode */ + WL12XX_PSM_PS = 1, + + /* Extreme low power */ + WL12XX_PSM_ELP = 2, +}; + +struct acx_sleep_auth { + struct acx_header header; + + /* The sleep level authorization of the device. */ + /* 0 - Always active*/ + /* 1 - Power down mode: light / fast sleep*/ + /* 2 - ELP mode: Deep / Max sleep*/ + u8 sleep_auth; + u8 padding[3]; +} __attribute__ ((packed)); + +#define TIM_ELE_ID 5 +#define PARTIAL_VBM_MAX 251 + +struct tim { + u8 identity; + u8 length; + u8 dtim_count; + u8 dtim_period; + u8 bitmap_ctrl; + u8 pvb_field[PARTIAL_VBM_MAX]; /* Partial Virtual Bitmap */ +} __attribute__ ((packed)); + +/* Virtual Bit Map update */ +struct vbm_update_request { + __le16 len; + u8 padding[2]; + struct tim tim; +} __attribute__ ((packed)); + +enum { + HOSTIF_PCI_MASTER_HOST_INDIRECT, + HOSTIF_PCI_MASTER_HOST_DIRECT, + HOSTIF_SLAVE, + HOSTIF_PKT_RING, + HOSTIF_DONTCARE = 0xFF +}; + +#define DEFAULT_UCAST_PRIORITY 0 +#define DEFAULT_RX_Q_PRIORITY 0 +#define DEFAULT_NUM_STATIONS 1 +#define DEFAULT_RXQ_PRIORITY 0 /* low 0 .. 15 high */ +#define DEFAULT_RXQ_TYPE 0x07 /* All frames, Data/Ctrl/Mgmt */ +#define TRACE_BUFFER_MAX_SIZE 256 + +#define DP_RX_PACKET_RING_CHUNK_SIZE 1600 +#define DP_TX_PACKET_RING_CHUNK_SIZE 1600 +#define DP_RX_PACKET_RING_CHUNK_NUM 2 +#define DP_TX_PACKET_RING_CHUNK_NUM 2 +#define DP_TX_COMPLETE_TIME_OUT 20 +#define FW_TX_CMPLT_BLOCK_SIZE 16 + +struct acx_data_path_params { + struct acx_header header; + + u16 rx_packet_ring_chunk_size; + u16 tx_packet_ring_chunk_size; + + u8 rx_packet_ring_chunk_num; + u8 tx_packet_ring_chunk_num; + + /* + * Maximum number of packets that can be gathered + * in the TX complete ring before an interrupt + * is generated. + */ + u8 tx_complete_threshold; + + /* Number of pending TX complete entries in cyclic ring.*/ + u8 tx_complete_ring_depth; + + /* + * Max num microseconds since a packet enters the TX + * complete ring until an interrupt is generated. + */ + u32 tx_complete_timeout; +} __attribute__ ((packed)); + + +struct acx_data_path_params_resp { + struct acx_header header; + + u16 rx_packet_ring_chunk_size; + u16 tx_packet_ring_chunk_size; + + u8 rx_packet_ring_chunk_num; + u8 tx_packet_ring_chunk_num; + + u8 pad[2]; + + u32 rx_packet_ring_addr; + u32 tx_packet_ring_addr; + + u32 rx_control_addr; + u32 tx_control_addr; + + u32 tx_complete_addr; +} __attribute__ ((packed)); + +#define TX_MSDU_LIFETIME_MIN 0 +#define TX_MSDU_LIFETIME_MAX 3000 +#define TX_MSDU_LIFETIME_DEF 512 +#define RX_MSDU_LIFETIME_MIN 0 +#define RX_MSDU_LIFETIME_MAX 0xFFFFFFFF +#define RX_MSDU_LIFETIME_DEF 512000 + +struct rx_msdu_lifetime { + struct acx_header header; + + /* + * The maximum amount of time, in TU, before the + * firmware discards the MSDU. + */ + u32 lifetime; +} __attribute__ ((packed)); + +/* + * RX Config Options Table + * Bit Definition + * === ========== + * 31:14 Reserved + * 13 Copy RX Status - when set, write three receive status words + * to top of rx'd MPDUs. + * When cleared, do not write three status words (added rev 1.5) + * 12 Reserved + * 11 RX Complete upon FCS error - when set, give rx complete + * interrupt for FCS errors, after the rx filtering, e.g. unicast + * frames not to us with FCS error will not generate an interrupt. + * 10 SSID Filter Enable - When set, the WiLink discards all beacon, + * probe request, and probe response frames with an SSID that does + * not match the SSID specified by the host in the START/JOIN + * command. + * When clear, the WiLink receives frames with any SSID. + * 9 Broadcast Filter Enable - When set, the WiLink discards all + * broadcast frames. When clear, the WiLink receives all received + * broadcast frames. + * 8:6 Reserved + * 5 BSSID Filter Enable - When set, the WiLink discards any frames + * with a BSSID that does not match the BSSID specified by the + * host. + * When clear, the WiLink receives frames from any BSSID. + * 4 MAC Addr Filter - When set, the WiLink discards any frames + * with a destination address that does not match the MAC address + * of the adaptor. + * When clear, the WiLink receives frames destined to any MAC + * address. + * 3 Promiscuous - When set, the WiLink receives all valid frames + * (i.e., all frames that pass the FCS check). + * When clear, only frames that pass the other filters specified + * are received. + * 2 FCS - When set, the WiLink includes the FCS with the received + * frame. + * When cleared, the FCS is discarded. + * 1 PLCP header - When set, write all data from baseband to frame + * buffer including PHY header. + * 0 Reserved - Always equal to 0. + * + * RX Filter Options Table + * Bit Definition + * === ========== + * 31:12 Reserved - Always equal to 0. + * 11 Association - When set, the WiLink receives all association + * related frames (association request/response, reassocation + * request/response, and disassociation). When clear, these frames + * are discarded. + * 10 Auth/De auth - When set, the WiLink receives all authentication + * and de-authentication frames. When clear, these frames are + * discarded. + * 9 Beacon - When set, the WiLink receives all beacon frames. + * When clear, these frames are discarded. + * 8 Contention Free - When set, the WiLink receives all contention + * free frames. + * When clear, these frames are discarded. + * 7 Control - When set, the WiLink receives all control frames. + * When clear, these frames are discarded. + * 6 Data - When set, the WiLink receives all data frames. + * When clear, these frames are discarded. + * 5 FCS Error - When set, the WiLink receives frames that have FCS + * errors. + * When clear, these frames are discarded. + * 4 Management - When set, the WiLink receives all management + * frames. + * When clear, these frames are discarded. + * 3 Probe Request - When set, the WiLink receives all probe request + * frames. + * When clear, these frames are discarded. + * 2 Probe Response - When set, the WiLink receives all probe + * response frames. + * When clear, these frames are discarded. + * 1 RTS/CTS/ACK - When set, the WiLink receives all RTS, CTS and ACK + * frames. + * When clear, these frames are discarded. + * 0 Rsvd Type/Sub Type - When set, the WiLink receives all frames + * that have reserved frame types and sub types as defined by the + * 802.11 specification. + * When clear, these frames are discarded. + */ +struct acx_rx_config { + struct acx_header header; + + u32 config_options; + u32 filter_options; +} __attribute__ ((packed)); + +enum { + QOS_AC_BE = 0, + QOS_AC_BK, + QOS_AC_VI, + QOS_AC_VO, + QOS_HIGHEST_AC_INDEX = QOS_AC_VO, +}; + +#define MAX_NUM_OF_AC (QOS_HIGHEST_AC_INDEX+1) +#define FIRST_AC_INDEX QOS_AC_BE +#define MAX_NUM_OF_802_1d_TAGS 8 +#define AC_PARAMS_MAX_TSID 15 +#define MAX_APSD_CONF 0xffff + +#define QOS_TX_HIGH_MIN (0) +#define QOS_TX_HIGH_MAX (100) + +#define QOS_TX_HIGH_BK_DEF (25) +#define QOS_TX_HIGH_BE_DEF (35) +#define QOS_TX_HIGH_VI_DEF (35) +#define QOS_TX_HIGH_VO_DEF (35) + +#define QOS_TX_LOW_BK_DEF (15) +#define QOS_TX_LOW_BE_DEF (25) +#define QOS_TX_LOW_VI_DEF (25) +#define QOS_TX_LOW_VO_DEF (25) + +struct acx_tx_queue_qos_config { + struct acx_header header; + + u8 qid; + u8 pad[3]; + + /* Max number of blocks allowd in the queue */ + u16 high_threshold; + + /* Lowest memory blocks guaranteed for this queue */ + u16 low_threshold; +} __attribute__ ((packed)); + +struct acx_packet_detection { + struct acx_header header; + + u32 threshold; +} __attribute__ ((packed)); + + +enum acx_slot_type { + SLOT_TIME_LONG = 0, + SLOT_TIME_SHORT = 1, + DEFAULT_SLOT_TIME = SLOT_TIME_SHORT, + MAX_SLOT_TIMES = 0xFF +}; + +#define STATION_WONE_INDEX 0 + +struct acx_slot { + struct acx_header header; + + u8 wone_index; /* Reserved */ + u8 slot_time; + u8 reserved[6]; +} __attribute__ ((packed)); + + +#define ADDRESS_GROUP_MAX (8) +#define ADDRESS_GROUP_MAX_LEN (ETH_ALEN * ADDRESS_GROUP_MAX) + +struct multicast_grp_addr_start { + struct acx_header header; + + u8 enabled; + u8 num_groups; + u8 pad[2]; + u8 mac_table[ADDRESS_GROUP_MAX_LEN]; +} __attribute__ ((packed)); + + +#define RX_TIMEOUT_PS_POLL_MIN 0 +#define RX_TIMEOUT_PS_POLL_MAX (200000) +#define RX_TIMEOUT_PS_POLL_DEF (15) +#define RX_TIMEOUT_UPSD_MIN 0 +#define RX_TIMEOUT_UPSD_MAX (200000) +#define RX_TIMEOUT_UPSD_DEF (15) + +struct acx_rx_timeout { + struct acx_header header; + + /* + * The longest time the STA will wait to receive + * traffic from the AP after a PS-poll has been + * transmitted. + */ + u16 ps_poll_timeout; + + /* + * The longest time the STA will wait to receive + * traffic from the AP after a frame has been sent + * from an UPSD enabled queue. + */ + u16 upsd_timeout; +} __attribute__ ((packed)); + +#define RTS_THRESHOLD_MIN 0 +#define RTS_THRESHOLD_MAX 4096 +#define RTS_THRESHOLD_DEF 2347 + +struct acx_rts_threshold { + struct acx_header header; + + u16 threshold; + u8 pad[2]; +} __attribute__ ((packed)); + +struct acx_beacon_filter_option { + struct acx_header header; + + u8 enable; + + /* + * The number of beacons without the unicast TIM + * bit set that the firmware buffers before + * signaling the host about ready frames. + * When set to 0 and the filter is enabled, beacons + * without the unicast TIM bit set are dropped. + */ + u8 max_num_beacons; + u8 pad[2]; +} __attribute__ ((packed)); + +/* + * ACXBeaconFilterEntry (not 221) + * Byte Offset Size (Bytes) Definition + * =========== ============ ========== + * 0 1 IE identifier + * 1 1 Treatment bit mask + * + * ACXBeaconFilterEntry (221) + * Byte Offset Size (Bytes) Definition + * =========== ============ ========== + * 0 1 IE identifier + * 1 1 Treatment bit mask + * 2 3 OUI + * 5 1 Type + * 6 2 Version + * + * + * Treatment bit mask - The information element handling: + * bit 0 - The information element is compared and transferred + * in case of change. + * bit 1 - The information element is transferred to the host + * with each appearance or disappearance. + * Note that both bits can be set at the same time. + */ +#define BEACON_FILTER_TABLE_MAX_IE_NUM (32) +#define BEACON_FILTER_TABLE_MAX_VENDOR_SPECIFIC_IE_NUM (6) +#define BEACON_FILTER_TABLE_IE_ENTRY_SIZE (2) +#define BEACON_FILTER_TABLE_EXTRA_VENDOR_SPECIFIC_IE_SIZE (6) +#define BEACON_FILTER_TABLE_MAX_SIZE ((BEACON_FILTER_TABLE_MAX_IE_NUM * \ + BEACON_FILTER_TABLE_IE_ENTRY_SIZE) + \ + (BEACON_FILTER_TABLE_MAX_VENDOR_SPECIFIC_IE_NUM * \ + BEACON_FILTER_TABLE_EXTRA_VENDOR_SPECIFIC_IE_SIZE)) + +struct acx_beacon_filter_ie_table { + struct acx_header header; + + u8 num_ie; + u8 table[BEACON_FILTER_TABLE_MAX_SIZE]; + u8 pad[3]; +} __attribute__ ((packed)); + +enum { + SG_ENABLE = 0, + SG_DISABLE, + SG_SENSE_NO_ACTIVITY, + SG_SENSE_ACTIVE +}; + +struct acx_bt_wlan_coex { + struct acx_header header; + + /* + * 0 -> PTA enabled + * 1 -> PTA disabled + * 2 -> sense no active mode, i.e. + * an interrupt is sent upon + * BT activity. + * 3 -> PTA is switched on in response + * to the interrupt sending. + */ + u8 enable; + u8 pad[3]; +} __attribute__ ((packed)); + +#define PTA_ANTENNA_TYPE_DEF (0) +#define PTA_BT_HP_MAXTIME_DEF (2000) +#define PTA_WLAN_HP_MAX_TIME_DEF (5000) +#define PTA_SENSE_DISABLE_TIMER_DEF (1350) +#define PTA_PROTECTIVE_RX_TIME_DEF (1500) +#define PTA_PROTECTIVE_TX_TIME_DEF (1500) +#define PTA_TIMEOUT_NEXT_BT_LP_PACKET_DEF (3000) +#define PTA_SIGNALING_TYPE_DEF (1) +#define PTA_AFH_LEVERAGE_ON_DEF (0) +#define PTA_NUMBER_QUIET_CYCLE_DEF (0) +#define PTA_MAX_NUM_CTS_DEF (3) +#define PTA_NUMBER_OF_WLAN_PACKETS_DEF (2) +#define PTA_NUMBER_OF_BT_PACKETS_DEF (2) +#define PTA_PROTECTIVE_RX_TIME_FAST_DEF (1500) +#define PTA_PROTECTIVE_TX_TIME_FAST_DEF (3000) +#define PTA_CYCLE_TIME_FAST_DEF (8700) +#define PTA_RX_FOR_AVALANCHE_DEF (5) +#define PTA_ELP_HP_DEF (0) +#define PTA_ANTI_STARVE_PERIOD_DEF (500) +#define PTA_ANTI_STARVE_NUM_CYCLE_DEF (4) +#define PTA_ALLOW_PA_SD_DEF (1) +#define PTA_TIME_BEFORE_BEACON_DEF (6300) +#define PTA_HPDM_MAX_TIME_DEF (1600) +#define PTA_TIME_OUT_NEXT_WLAN_DEF (2550) +#define PTA_AUTO_MODE_NO_CTS_DEF (0) +#define PTA_BT_HP_RESPECTED_DEF (3) +#define PTA_WLAN_RX_MIN_RATE_DEF (24) +#define PTA_ACK_MODE_DEF (1) + +struct acx_bt_wlan_coex_param { + struct acx_header header; + + /* + * The minimum rate of a received WLAN packet in the STA, + * during protective mode, of which a new BT-HP request + * during this Rx will always be respected and gain the antenna. + */ + u32 min_rate; + + /* Max time the BT HP will be respected. */ + u16 bt_hp_max_time; + + /* Max time the WLAN HP will be respected. */ + u16 wlan_hp_max_time; + + /* + * The time between the last BT activity + * and the moment when the sense mode returns + * to SENSE_INACTIVE. + */ + u16 sense_disable_timer; + + /* Time before the next BT HP instance */ + u16 rx_time_bt_hp; + u16 tx_time_bt_hp; + + /* range: 10-20000 default: 1500 */ + u16 rx_time_bt_hp_fast; + u16 tx_time_bt_hp_fast; + + /* range: 2000-65535 default: 8700 */ + u16 wlan_cycle_fast; + + /* range: 0 - 15000 (Msec) default: 1000 */ + u16 bt_anti_starvation_period; + + /* range 400-10000(Usec) default: 3000 */ + u16 next_bt_lp_packet; + + /* Deafult: worst case for BT DH5 traffic */ + u16 wake_up_beacon; + + /* range: 0-50000(Usec) default: 1050 */ + u16 hp_dm_max_guard_time; + + /* + * This is to prevent both BT & WLAN antenna + * starvation. + * Range: 100-50000(Usec) default:2550 + */ + u16 next_wlan_packet; + + /* 0 -> shared antenna */ + u8 antenna_type; + + /* + * 0 -> TI legacy + * 1 -> Palau + */ + u8 signal_type; + + /* + * BT AFH status + * 0 -> no AFH + * 1 -> from dedicated GPIO + * 2 -> AFH on (from host) + */ + u8 afh_leverage_on; + + /* + * The number of cycles during which no + * TX will be sent after 1 cycle of RX + * transaction in protective mode + */ + u8 quiet_cycle_num; + + /* + * The maximum number of CTSs that will + * be sent for receiving RX packet in + * protective mode + */ + u8 max_cts; + + /* + * The number of WLAN packets + * transferred in common mode before + * switching to BT. + */ + u8 wlan_packets_num; + + /* + * The number of BT packets + * transferred in common mode before + * switching to WLAN. + */ + u8 bt_packets_num; + + /* range: 1-255 default: 5 */ + u8 missed_rx_avalanche; + + /* range: 0-1 default: 1 */ + u8 wlan_elp_hp; + + /* range: 0 - 15 default: 4 */ + u8 bt_anti_starvation_cycles; + + u8 ack_mode_dual_ant; + + /* + * Allow PA_SD assertion/de-assertion + * during enabled BT activity. + */ + u8 pa_sd_enable; + + /* + * Enable/Disable PTA in auto mode: + * Support Both Active & P.S modes + */ + u8 pta_auto_mode_enable; + + /* range: 0 - 20 default: 1 */ + u8 bt_hp_respected_num; +} __attribute__ ((packed)); + +#define CCA_THRSH_ENABLE_ENERGY_D 0x140A +#define CCA_THRSH_DISABLE_ENERGY_D 0xFFEF + +struct acx_energy_detection { + struct acx_header header; + + /* The RX Clear Channel Assessment threshold in the PHY */ + u16 rx_cca_threshold; + u8 tx_energy_detection; + u8 pad; +} __attribute__ ((packed)); + +#define BCN_RX_TIMEOUT_DEF_VALUE 10000 +#define BROADCAST_RX_TIMEOUT_DEF_VALUE 20000 +#define RX_BROADCAST_IN_PS_DEF_VALUE 1 +#define CONSECUTIVE_PS_POLL_FAILURE_DEF 4 + +struct acx_beacon_broadcast { + struct acx_header header; + + u16 beacon_rx_timeout; + u16 broadcast_timeout; + + /* Enables receiving of broadcast packets in PS mode */ + u8 rx_broadcast_in_ps; + + /* Consecutive PS Poll failures before updating the host */ + u8 ps_poll_threshold; + u8 pad[2]; +} __attribute__ ((packed)); + +struct acx_event_mask { + struct acx_header header; + + u32 event_mask; + u32 high_event_mask; /* Unused */ +} __attribute__ ((packed)); + +#define CFG_RX_FCS BIT(2) +#define CFG_RX_ALL_GOOD BIT(3) +#define CFG_UNI_FILTER_EN BIT(4) +#define CFG_BSSID_FILTER_EN BIT(5) +#define CFG_MC_FILTER_EN BIT(6) +#define CFG_MC_ADDR0_EN BIT(7) +#define CFG_MC_ADDR1_EN BIT(8) +#define CFG_BC_REJECT_EN BIT(9) +#define CFG_SSID_FILTER_EN BIT(10) +#define CFG_RX_INT_FCS_ERROR BIT(11) +#define CFG_RX_INT_ENCRYPTED BIT(12) +#define CFG_RX_WR_RX_STATUS BIT(13) +#define CFG_RX_FILTER_NULTI BIT(14) +#define CFG_RX_RESERVE BIT(15) +#define CFG_RX_TIMESTAMP_TSF BIT(16) + +#define CFG_RX_RSV_EN BIT(0) +#define CFG_RX_RCTS_ACK BIT(1) +#define CFG_RX_PRSP_EN BIT(2) +#define CFG_RX_PREQ_EN BIT(3) +#define CFG_RX_MGMT_EN BIT(4) +#define CFG_RX_FCS_ERROR BIT(5) +#define CFG_RX_DATA_EN BIT(6) +#define CFG_RX_CTL_EN BIT(7) +#define CFG_RX_CF_EN BIT(8) +#define CFG_RX_BCN_EN BIT(9) +#define CFG_RX_AUTH_EN BIT(10) +#define CFG_RX_ASSOC_EN BIT(11) + +#define SCAN_PASSIVE BIT(0) +#define SCAN_5GHZ_BAND BIT(1) +#define SCAN_TRIGGERED BIT(2) +#define SCAN_PRIORITY_HIGH BIT(3) + +struct acx_fw_gen_frame_rates { + struct acx_header header; + + u8 tx_ctrl_frame_rate; /* RATE_* */ + u8 tx_ctrl_frame_mod; /* CCK_* or PBCC_* */ + u8 tx_mgt_frame_rate; + u8 tx_mgt_frame_mod; +} __attribute__ ((packed)); + +/* STA MAC */ +struct dot11_station_id { + struct acx_header header; + + u8 mac[ETH_ALEN]; + u8 pad[2]; +} __attribute__ ((packed)); + +/* HW encryption keys */ +#define NUM_ACCESS_CATEGORIES_COPY 4 +#define MAX_KEY_SIZE 32 + +/* When set, disable HW encryption */ +#define DF_ENCRYPTION_DISABLE 0x01 +/* When set, disable HW decryption */ +#define DF_SNIFF_MODE_ENABLE 0x80 + +struct acx_feature_config { + struct acx_header header; + + u32 options; + u32 data_flow_options; +} __attribute__ ((packed)); + +enum acx_key_action { + KEY_ADD_OR_REPLACE = 1, + KEY_REMOVE = 2, + KEY_SET_ID = 3, + MAX_KEY_ACTION = 0xffff, +}; + +enum acx_key_type { + KEY_WEP_DEFAULT = 0, + KEY_WEP_ADDR = 1, + KEY_AES_GROUP = 4, + KEY_AES_PAIRWISE = 5, + KEY_WEP_GROUP = 6, + KEY_TKIP_MIC_GROUP = 10, + KEY_TKIP_MIC_PAIRWISE = 11, +}; + +/* + * + * key_type_e key size key format + * ---------- --------- ---------- + * 0x00 5, 13, 29 Key data + * 0x01 5, 13, 29 Key data + * 0x04 16 16 bytes of key data + * 0x05 16 16 bytes of key data + * 0x0a 32 16 bytes of TKIP key data + * 8 bytes of RX MIC key data + * 8 bytes of TX MIC key data + * 0x0b 32 16 bytes of TKIP key data + * 8 bytes of RX MIC key data + * 8 bytes of TX MIC key data + * + */ + +struct acx_set_key { + /* Ignored for default WEP key */ + u8 addr[ETH_ALEN]; + + /* key_action_e */ + u16 key_action; + + u16 reserved_1; + + /* key size in bytes */ + u8 key_size; + + /* key_type_e */ + u8 key_type; + u8 ssid_profile; + + /* + * TKIP, AES: frame's key id field. + * For WEP default key: key id; + */ + u8 id; + u8 reserved_2[6]; + u8 key[MAX_KEY_SIZE]; + u16 ac_seq_num16[NUM_ACCESS_CATEGORIES_COPY]; + u32 ac_seq_num32[NUM_ACCESS_CATEGORIES_COPY]; +} __attribute__ ((packed)); + +struct acx_current_tx_power { + struct acx_header header; + + u8 current_tx_power; + u8 padding[3]; +} __attribute__ ((packed)); + +struct acx_dot11_default_key { + struct acx_header header; + + u8 id; + u8 pad[3]; +} __attribute__ ((packed)); + +struct acx_tsf_info { + struct acx_header header; + + u32 current_tsf_msb; + u32 current_tsf_lsb; + u32 last_TBTT_msb; + u32 last_TBTT_lsb; + u8 last_dtim_count; + u8 pad[3]; +} __attribute__ ((packed)); + +/* 802.11 PS */ +enum acx_ps_mode { + STATION_ACTIVE_MODE, + STATION_POWER_SAVE_MODE +}; + +struct acx_ps_params { + u8 ps_mode; /* STATION_* */ + u8 send_null_data; /* Do we have to send NULL data packet ? */ + u8 retries; /* Number of retires for the initial NULL data packet */ + + /* + * TUs during which the target stays awake after switching + * to power save mode. + */ + u8 hang_over_period; + u16 null_data_rate; + u8 pad[2]; +} __attribute__ ((packed)); + +enum acx_wake_up_event { + WAKE_UP_EVENT_BEACON_BITMAP = 0x01, /* Wake on every Beacon*/ + WAKE_UP_EVENT_DTIM_BITMAP = 0x02, /* Wake on every DTIM*/ + WAKE_UP_EVENT_N_DTIM_BITMAP = 0x04, /* Wake on every Nth DTIM */ + WAKE_UP_EVENT_N_BEACONS_BITMAP = 0x08, /* Wake on every Nth Beacon */ + WAKE_UP_EVENT_BITS_MASK = 0x0F +}; + +struct acx_wake_up_condition { + struct acx_header header; + + u8 wake_up_event; /* Only one bit can be set */ + u8 listen_interval; + u8 pad[2]; +} __attribute__ ((packed)); + +struct acx_aid { + struct acx_header header; + + /* + * To be set when associated with an AP. + */ + u16 aid; + u8 pad[2]; +} __attribute__ ((packed)); + +enum acx_preamble_type { + ACX_PREAMBLE_LONG = 0, + ACX_PREAMBLE_SHORT = 1 +}; + +struct acx_preamble { + struct acx_header header; + /* + * When set, the WiLink transmits the frames with a short preamble and + * when cleared, the WiLink transmits the frames with a long preamble. + */ + u8 preamble; + u8 padding[3]; +} __attribute__ ((packed)); + +enum acx_ctsprotect_type { + CTSPROTECT_DISABLE = 0, + CTSPROTECT_ENABLE = 1 +}; + +struct acx_ctsprotect { + struct acx_header header; + u8 ctsprotect; + u8 padding[3]; +} __attribute__ ((packed)); + +struct acx_tx_statistics { + u32 internal_desc_overflow; +} __attribute__ ((packed)); + +struct acx_rx_statistics { + u32 out_of_mem; + u32 hdr_overflow; + u32 hw_stuck; + u32 dropped; + u32 fcs_err; + u32 xfr_hint_trig; + u32 path_reset; + u32 reset_counter; +} __attribute__ ((packed)); + +struct acx_dma_statistics { + u32 rx_requested; + u32 rx_errors; + u32 tx_requested; + u32 tx_errors; +} __attribute__ ((packed)); + +struct acx_isr_statistics { + /* host command complete */ + u32 cmd_cmplt; + + /* fiqisr() */ + u32 fiqs; + + /* (INT_STS_ND & INT_TRIG_RX_HEADER) */ + u32 rx_headers; + + /* (INT_STS_ND & INT_TRIG_RX_CMPLT) */ + u32 rx_completes; + + /* (INT_STS_ND & INT_TRIG_NO_RX_BUF) */ + u32 rx_mem_overflow; + + /* (INT_STS_ND & INT_TRIG_S_RX_RDY) */ + u32 rx_rdys; + + /* irqisr() */ + u32 irqs; + + /* (INT_STS_ND & INT_TRIG_TX_PROC) */ + u32 tx_procs; + + /* (INT_STS_ND & INT_TRIG_DECRYPT_DONE) */ + u32 decrypt_done; + + /* (INT_STS_ND & INT_TRIG_DMA0) */ + u32 dma0_done; + + /* (INT_STS_ND & INT_TRIG_DMA1) */ + u32 dma1_done; + + /* (INT_STS_ND & INT_TRIG_TX_EXC_CMPLT) */ + u32 tx_exch_complete; + + /* (INT_STS_ND & INT_TRIG_COMMAND) */ + u32 commands; + + /* (INT_STS_ND & INT_TRIG_RX_PROC) */ + u32 rx_procs; + + /* (INT_STS_ND & INT_TRIG_PM_802) */ + u32 hw_pm_mode_changes; + + /* (INT_STS_ND & INT_TRIG_ACKNOWLEDGE) */ + u32 host_acknowledges; + + /* (INT_STS_ND & INT_TRIG_PM_PCI) */ + u32 pci_pm; + + /* (INT_STS_ND & INT_TRIG_ACM_WAKEUP) */ + u32 wakeups; + + /* (INT_STS_ND & INT_TRIG_LOW_RSSI) */ + u32 low_rssi; +} __attribute__ ((packed)); + +struct acx_wep_statistics { + /* WEP address keys configured */ + u32 addr_key_count; + + /* default keys configured */ + u32 default_key_count; + + u32 reserved; + + /* number of times that WEP key not found on lookup */ + u32 key_not_found; + + /* number of times that WEP key decryption failed */ + u32 decrypt_fail; + + /* WEP packets decrypted */ + u32 packets; + + /* WEP decrypt interrupts */ + u32 interrupt; +} __attribute__ ((packed)); + +#define ACX_MISSED_BEACONS_SPREAD 10 + +struct acx_pwr_statistics { + /* the amount of enters into power save mode (both PD & ELP) */ + u32 ps_enter; + + /* the amount of enters into ELP mode */ + u32 elp_enter; + + /* the amount of missing beacon interrupts to the host */ + u32 missing_bcns; + + /* the amount of wake on host-access times */ + u32 wake_on_host; + + /* the amount of wake on timer-expire */ + u32 wake_on_timer_exp; + + /* the number of packets that were transmitted with PS bit set */ + u32 tx_with_ps; + + /* the number of packets that were transmitted with PS bit clear */ + u32 tx_without_ps; + + /* the number of received beacons */ + u32 rcvd_beacons; + + /* the number of entering into PowerOn (power save off) */ + u32 power_save_off; + + /* the number of entries into power save mode */ + u16 enable_ps; + + /* + * the number of exits from power save, not including failed PS + * transitions + */ + u16 disable_ps; + + /* + * the number of times the TSF counter was adjusted because + * of drift + */ + u32 fix_tsf_ps; + + /* Gives statistics about the spread continuous missed beacons. + * The 16 LSB are dedicated for the PS mode. + * The 16 MSB are dedicated for the PS mode. + * cont_miss_bcns_spread[0] - single missed beacon. + * cont_miss_bcns_spread[1] - two continuous missed beacons. + * cont_miss_bcns_spread[2] - three continuous missed beacons. + * ... + * cont_miss_bcns_spread[9] - ten and more continuous missed beacons. + */ + u32 cont_miss_bcns_spread[ACX_MISSED_BEACONS_SPREAD]; + + /* the number of beacons in awake mode */ + u32 rcvd_awake_beacons; +} __attribute__ ((packed)); + +struct acx_mic_statistics { + u32 rx_pkts; + u32 calc_failure; +} __attribute__ ((packed)); + +struct acx_aes_statistics { + u32 encrypt_fail; + u32 decrypt_fail; + u32 encrypt_packets; + u32 decrypt_packets; + u32 encrypt_interrupt; + u32 decrypt_interrupt; +} __attribute__ ((packed)); + +struct acx_event_statistics { + u32 heart_beat; + u32 calibration; + u32 rx_mismatch; + u32 rx_mem_empty; + u32 rx_pool; + u32 oom_late; + u32 phy_transmit_error; + u32 tx_stuck; +} __attribute__ ((packed)); + +struct acx_ps_statistics { + u32 pspoll_timeouts; + u32 upsd_timeouts; + u32 upsd_max_sptime; + u32 upsd_max_apturn; + u32 pspoll_max_apturn; + u32 pspoll_utilization; + u32 upsd_utilization; +} __attribute__ ((packed)); + +struct acx_rxpipe_statistics { + u32 rx_prep_beacon_drop; + u32 descr_host_int_trig_rx_data; + u32 beacon_buffer_thres_host_int_trig_rx_data; + u32 missed_beacon_host_int_trig_rx_data; + u32 tx_xfr_host_int_trig_rx_data; +} __attribute__ ((packed)); + +struct acx_statistics { + struct acx_header header; + + struct acx_tx_statistics tx; + struct acx_rx_statistics rx; + struct acx_dma_statistics dma; + struct acx_isr_statistics isr; + struct acx_wep_statistics wep; + struct acx_pwr_statistics pwr; + struct acx_aes_statistics aes; + struct acx_mic_statistics mic; + struct acx_event_statistics event; + struct acx_ps_statistics ps; + struct acx_rxpipe_statistics rxpipe; +} __attribute__ ((packed)); + +enum { + ACX_WAKE_UP_CONDITIONS = 0x0002, + ACX_MEM_CFG = 0x0003, + ACX_SLOT = 0x0004, + ACX_QUEUE_HEAD = 0x0005, /* for MASTER mode only */ + ACX_AC_CFG = 0x0007, + ACX_MEM_MAP = 0x0008, + ACX_AID = 0x000A, + ACX_RADIO_PARAM = 0x000B, /* Not used */ + ACX_CFG = 0x000C, /* Not used */ + ACX_FW_REV = 0x000D, + ACX_MEDIUM_USAGE = 0x000F, + ACX_RX_CFG = 0x0010, + ACX_TX_QUEUE_CFG = 0x0011, /* FIXME: only used by wl1251 */ + ACX_BSS_IN_PS = 0x0012, /* for AP only */ + ACX_STATISTICS = 0x0013, /* Debug API */ + ACX_FEATURE_CFG = 0x0015, + ACX_MISC_CFG = 0x0017, /* Not used */ + ACX_TID_CFG = 0x001A, + ACX_BEACON_FILTER_OPT = 0x001F, + ACX_LOW_RSSI = 0x0020, + ACX_NOISE_HIST = 0x0021, + ACX_HDK_VERSION = 0x0022, /* ??? */ + ACX_PD_THRESHOLD = 0x0023, + ACX_DATA_PATH_PARAMS = 0x0024, /* WO */ + ACX_DATA_PATH_RESP_PARAMS = 0x0024, /* RO */ + ACX_CCA_THRESHOLD = 0x0025, + ACX_EVENT_MBOX_MASK = 0x0026, +#ifdef FW_RUNNING_AS_AP + ACX_DTIM_PERIOD = 0x0027, /* for AP only */ +#else + ACX_WR_TBTT_AND_DTIM = 0x0027, /* STA only */ +#endif + ACX_ACI_OPTION_CFG = 0x0029, /* OBSOLETE (for 1251)*/ + ACX_GPIO_CFG = 0x002A, /* Not used */ + ACX_GPIO_SET = 0x002B, /* Not used */ + ACX_PM_CFG = 0x002C, /* To Be Documented */ + ACX_CONN_MONIT_PARAMS = 0x002D, + ACX_AVERAGE_RSSI = 0x002E, /* Not used */ + ACX_CONS_TX_FAILURE = 0x002F, + ACX_BCN_DTIM_OPTIONS = 0x0031, + ACX_SG_ENABLE = 0x0032, + ACX_SG_CFG = 0x0033, + ACX_ANTENNA_DIVERSITY_CFG = 0x0035, /* To Be Documented */ + ACX_LOW_SNR = 0x0037, /* To Be Documented */ + ACX_BEACON_FILTER_TABLE = 0x0038, + ACX_ARP_IP_FILTER = 0x0039, + ACX_ROAMING_STATISTICS_TBL = 0x003B, + ACX_RATE_POLICY = 0x003D, + ACX_CTS_PROTECTION = 0x003E, + ACX_SLEEP_AUTH = 0x003F, + ACX_PREAMBLE_TYPE = 0x0040, + ACX_ERROR_CNT = 0x0041, + ACX_FW_GEN_FRAME_RATES = 0x0042, + ACX_IBSS_FILTER = 0x0044, + ACX_SERVICE_PERIOD_TIMEOUT = 0x0045, + ACX_TSF_INFO = 0x0046, + ACX_CONFIG_PS_WMM = 0x0049, + ACX_ENABLE_RX_DATA_FILTER = 0x004A, + ACX_SET_RX_DATA_FILTER = 0x004B, + ACX_GET_DATA_FILTER_STATISTICS = 0x004C, + ACX_POWER_LEVEL_TABLE = 0x004D, + ACX_BET_ENABLE = 0x0050, + DOT11_STATION_ID = 0x1001, + DOT11_RX_MSDU_LIFE_TIME = 0x1004, + DOT11_CUR_TX_PWR = 0x100D, + DOT11_DEFAULT_KEY = 0x1010, + DOT11_RX_DOT11_MODE = 0x1012, + DOT11_RTS_THRESHOLD = 0x1013, + DOT11_GROUP_ADDRESS_TBL = 0x1014, + + MAX_DOT11_IE = DOT11_GROUP_ADDRESS_TBL, + + MAX_IE = 0xFFFF +}; + + +int wl12xx_acx_frame_rates(struct wl12xx *wl, u8 ctrl_rate, u8 ctrl_mod, + u8 mgt_rate, u8 mgt_mod); +int wl12xx_acx_station_id(struct wl12xx *wl); +int wl12xx_acx_default_key(struct wl12xx *wl, u8 key_id); +int wl12xx_acx_wake_up_conditions(struct wl12xx *wl, u8 listen_interval); +int wl12xx_acx_sleep_auth(struct wl12xx *wl, u8 sleep_auth); +int wl12xx_acx_fw_version(struct wl12xx *wl, char *buf, size_t len); +int wl12xx_acx_tx_power(struct wl12xx *wl, int power); +int wl12xx_acx_feature_cfg(struct wl12xx *wl); +int wl12xx_acx_mem_map(struct wl12xx *wl, void *mem_map, size_t len); +int wl12xx_acx_data_path_params(struct wl12xx *wl, + struct acx_data_path_params_resp *data_path); +int wl12xx_acx_rx_msdu_life_time(struct wl12xx *wl, u32 life_time); +int wl12xx_acx_rx_config(struct wl12xx *wl, u32 config, u32 filter); +int wl12xx_acx_pd_threshold(struct wl12xx *wl); +int wl12xx_acx_slot(struct wl12xx *wl, enum acx_slot_type slot_time); +int wl12xx_acx_group_address_tbl(struct wl12xx *wl); +int wl12xx_acx_service_period_timeout(struct wl12xx *wl); +int wl12xx_acx_rts_threshold(struct wl12xx *wl, u16 rts_threshold); +int wl12xx_acx_beacon_filter_opt(struct wl12xx *wl); +int wl12xx_acx_beacon_filter_table(struct wl12xx *wl); +int wl12xx_acx_sg_enable(struct wl12xx *wl); +int wl12xx_acx_sg_cfg(struct wl12xx *wl); +int wl12xx_acx_cca_threshold(struct wl12xx *wl); +int wl12xx_acx_bcn_dtim_options(struct wl12xx *wl); +int wl12xx_acx_aid(struct wl12xx *wl, u16 aid); +int wl12xx_acx_event_mbox_mask(struct wl12xx *wl, u32 event_mask); +int wl12xx_acx_set_preamble(struct wl12xx *wl, enum acx_preamble_type preamble); +int wl12xx_acx_cts_protect(struct wl12xx *wl, + enum acx_ctsprotect_type ctsprotect); +int wl12xx_acx_statistics(struct wl12xx *wl, struct acx_statistics *stats); + +#endif /* __WL12XX_ACX_H__ */ diff --git a/drivers/net/wireless/wl12xx/boot.c b/drivers/net/wireless/wl12xx/boot.c new file mode 100644 index 00000000000..48ac08c429b --- /dev/null +++ b/drivers/net/wireless/wl12xx/boot.c @@ -0,0 +1,295 @@ +/* + * This file is part of wl12xx + * + * Copyright (C) 2008 Nokia Corporation + * + * Contact: Kalle Valo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#include + +#include "reg.h" +#include "boot.h" +#include "spi.h" +#include "event.h" + +static void wl12xx_boot_enable_interrupts(struct wl12xx *wl) +{ + enable_irq(wl->irq); +} + +void wl12xx_boot_target_enable_interrupts(struct wl12xx *wl) +{ + wl12xx_reg_write32(wl, ACX_REG_INTERRUPT_MASK, ~(wl->intr_mask)); + wl12xx_reg_write32(wl, HI_CFG, HI_CFG_DEF_VAL); +} + +int wl12xx_boot_soft_reset(struct wl12xx *wl) +{ + unsigned long timeout; + u32 boot_data; + + /* perform soft reset */ + wl12xx_reg_write32(wl, ACX_REG_SLV_SOFT_RESET, ACX_SLV_SOFT_RESET_BIT); + + /* SOFT_RESET is self clearing */ + timeout = jiffies + usecs_to_jiffies(SOFT_RESET_MAX_TIME); + while (1) { + boot_data = wl12xx_reg_read32(wl, ACX_REG_SLV_SOFT_RESET); + wl12xx_debug(DEBUG_BOOT, "soft reset bootdata 0x%x", boot_data); + if ((boot_data & ACX_SLV_SOFT_RESET_BIT) == 0) + break; + + if (time_after(jiffies, timeout)) { + /* 1.2 check pWhalBus->uSelfClearTime if the + * timeout was reached */ + wl12xx_error("soft reset timeout"); + return -1; + } + + udelay(SOFT_RESET_STALL_TIME); + } + + /* disable Rx/Tx */ + wl12xx_reg_write32(wl, ENABLE, 0x0); + + /* disable auto calibration on start*/ + wl12xx_reg_write32(wl, SPARE_A2, 0xffff); + + return 0; +} + +int wl12xx_boot_init_seq(struct wl12xx *wl) +{ + u32 scr_pad6, init_data, tmp, elp_cmd, ref_freq; + + /* + * col #1: INTEGER_DIVIDER + * col #2: FRACTIONAL_DIVIDER + * col #3: ATTN_BB + * col #4: ALPHA_BB + * col #5: STOP_TIME_BB + * col #6: BB_PLL_LOOP_FILTER + */ + static const u32 LUT[REF_FREQ_NUM][LUT_PARAM_NUM] = { + + { 83, 87381, 0xB, 5, 0xF00, 3}, /* REF_FREQ_19_2*/ + { 61, 141154, 0xB, 5, 0x1450, 2}, /* REF_FREQ_26_0*/ + { 41, 174763, 0xC, 6, 0x2D00, 1}, /* REF_FREQ_38_4*/ + { 40, 0, 0xC, 6, 0x2EE0, 1}, /* REF_FREQ_40_0*/ + { 47, 162280, 0xC, 6, 0x2760, 1} /* REF_FREQ_33_6 */ + }; + + /* read NVS params */ + scr_pad6 = wl12xx_reg_read32(wl, SCR_PAD6); + wl12xx_debug(DEBUG_BOOT, "scr_pad6 0x%x", scr_pad6); + + /* read ELP_CMD */ + elp_cmd = wl12xx_reg_read32(wl, ELP_CMD); + wl12xx_debug(DEBUG_BOOT, "elp_cmd 0x%x", elp_cmd); + + /* set the BB calibration time to be 300 usec (PLL_CAL_TIME) */ + ref_freq = scr_pad6 & 0x000000FF; + wl12xx_debug(DEBUG_BOOT, "ref_freq 0x%x", ref_freq); + + wl12xx_reg_write32(wl, PLL_CAL_TIME, 0x9); + + /* + * PG 1.2: set the clock buffer time to be 210 usec (CLK_BUF_TIME) + */ + wl12xx_reg_write32(wl, CLK_BUF_TIME, 0x6); + + /* + * set the clock detect feature to work in the restart wu procedure + * (ELP_CFG_MODE[14]) and Select the clock source type + * (ELP_CFG_MODE[13:12]) + */ + tmp = ((scr_pad6 & 0x0000FF00) << 4) | 0x00004000; + wl12xx_reg_write32(wl, ELP_CFG_MODE, tmp); + + /* PG 1.2: enable the BB PLL fix. Enable the PLL_LIMP_CLK_EN_CMD */ + elp_cmd |= 0x00000040; + wl12xx_reg_write32(wl, ELP_CMD, elp_cmd); + + /* PG 1.2: Set the BB PLL stable time to be 1000usec + * (PLL_STABLE_TIME) */ + wl12xx_reg_write32(wl, CFG_PLL_SYNC_CNT, 0x20); + + /* PG 1.2: read clock request time */ + init_data = wl12xx_reg_read32(wl, CLK_REQ_TIME); + + /* + * PG 1.2: set the clock request time to be ref_clk_settling_time - + * 1ms = 4ms + */ + if (init_data > 0x21) + tmp = init_data - 0x21; + else + tmp = 0; + wl12xx_reg_write32(wl, CLK_REQ_TIME, tmp); + + /* set BB PLL configurations in RF AFE */ + wl12xx_reg_write32(wl, 0x003058cc, 0x4B5); + + /* set RF_AFE_REG_5 */ + wl12xx_reg_write32(wl, 0x003058d4, 0x50); + + /* set RF_AFE_CTRL_REG_2 */ + wl12xx_reg_write32(wl, 0x00305948, 0x11c001); + + /* + * change RF PLL and BB PLL divider for VCO clock and adjust VCO + * bais current(RF_AFE_REG_13) + */ + wl12xx_reg_write32(wl, 0x003058f4, 0x1e); + + /* set BB PLL configurations */ + tmp = LUT[ref_freq][LUT_PARAM_INTEGER_DIVIDER] | 0x00017000; + wl12xx_reg_write32(wl, 0x00305840, tmp); + + /* set fractional divider according to Appendix C-BB PLL + * Calculations + */ + tmp = LUT[ref_freq][LUT_PARAM_FRACTIONAL_DIVIDER]; + wl12xx_reg_write32(wl, 0x00305844, tmp); + + /* set the initial data for the sigma delta */ + wl12xx_reg_write32(wl, 0x00305848, 0x3039); + + /* + * set the accumulator attenuation value, calibration loop1 + * (alpha), calibration loop2 (beta), calibration loop3 (gamma) and + * the VCO gain + */ + tmp = (LUT[ref_freq][LUT_PARAM_ATTN_BB] << 16) | + (LUT[ref_freq][LUT_PARAM_ALPHA_BB] << 12) | 0x1; + wl12xx_reg_write32(wl, 0x00305854, tmp); + + /* + * set the calibration stop time after holdoff time expires and set + * settling time HOLD_OFF_TIME_BB + */ + tmp = LUT[ref_freq][LUT_PARAM_STOP_TIME_BB] | 0x000A0000; + wl12xx_reg_write32(wl, 0x00305858, tmp); + + /* + * set BB PLL Loop filter capacitor3- BB_C3[2:0] and set BB PLL + * constant leakage current to linearize PFD to 0uA - + * BB_ILOOPF[7:3] + */ + tmp = LUT[ref_freq][LUT_PARAM_BB_PLL_LOOP_FILTER] | 0x00000030; + wl12xx_reg_write32(wl, 0x003058f8, tmp); + + /* + * set regulator output voltage for n divider to + * 1.35-BB_REFDIV[1:0], set charge pump current- BB_CPGAIN[4:2], + * set BB PLL Loop filter capacitor2- BB_C2[7:5], set gain of BB + * PLL auto-call to normal mode- BB_CALGAIN_3DB[8] + */ + wl12xx_reg_write32(wl, 0x003058f0, 0x29); + + /* enable restart wakeup sequence (ELP_CMD[0]) */ + wl12xx_reg_write32(wl, ELP_CMD, elp_cmd | 0x1); + + /* restart sequence completed */ + udelay(2000); + + return 0; +} + +int wl12xx_boot_run_firmware(struct wl12xx *wl) +{ + int loop, ret; + u32 chip_id, interrupt; + + wl->chip.op_set_ecpu_ctrl(wl, ECPU_CONTROL_HALT); + + chip_id = wl12xx_reg_read32(wl, CHIP_ID_B); + + wl12xx_debug(DEBUG_BOOT, "chip id after firmware boot: 0x%x", chip_id); + + if (chip_id != wl->chip.id) { + wl12xx_error("chip id doesn't match after firmware boot"); + return -EIO; + } + + /* wait for init to complete */ + loop = 0; + while (loop++ < INIT_LOOP) { + udelay(INIT_LOOP_DELAY); + interrupt = wl12xx_reg_read32(wl, ACX_REG_INTERRUPT_NO_CLEAR); + + if (interrupt == 0xffffffff) { + wl12xx_error("error reading hardware complete " + "init indication"); + return -EIO; + } + /* check that ACX_INTR_INIT_COMPLETE is enabled */ + else if (interrupt & wl->chip.intr_init_complete) { + wl12xx_reg_write32(wl, ACX_REG_INTERRUPT_ACK, + wl->chip.intr_init_complete); + break; + } + } + + if (loop >= INIT_LOOP) { + wl12xx_error("timeout waiting for the hardware to " + "complete initialization"); + return -EIO; + } + + /* get hardware config command mail box */ + wl->cmd_box_addr = wl12xx_reg_read32(wl, REG_COMMAND_MAILBOX_PTR); + + /* get hardware config event mail box */ + wl->event_box_addr = wl12xx_reg_read32(wl, REG_EVENT_MAILBOX_PTR); + + /* set the working partition to its "running" mode offset */ + wl12xx_set_partition(wl, + wl->chip.p_table[PART_WORK].mem.start, + wl->chip.p_table[PART_WORK].mem.size, + wl->chip.p_table[PART_WORK].reg.start, + wl->chip.p_table[PART_WORK].reg.size); + + wl12xx_debug(DEBUG_MAILBOX, "cmd_box_addr 0x%x event_box_addr 0x%x", + wl->cmd_box_addr, wl->event_box_addr); + + /* + * in case of full asynchronous mode the firmware event must be + * ready to receive event from the command mailbox + */ + + /* enable gpio interrupts */ + wl12xx_boot_enable_interrupts(wl); + + wl->chip.op_target_enable_interrupts(wl); + + /* unmask all mbox events */ + wl->event_mask = 0xffffffff; + + ret = wl12xx_event_unmask(wl); + if (ret < 0) { + wl12xx_error("EVENT mask setting failed"); + return ret; + } + + wl12xx_event_mbox_config(wl); + + /* firmware startup completed */ + return 0; +} diff --git a/drivers/net/wireless/wl12xx/boot.h b/drivers/net/wireless/wl12xx/boot.h new file mode 100644 index 00000000000..4fa73132baa --- /dev/null +++ b/drivers/net/wireless/wl12xx/boot.h @@ -0,0 +1,40 @@ +/* + * This file is part of wl12xx + * + * Copyright (C) 2008 Nokia Corporation + * + * Contact: Kalle Valo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#ifndef __BOOT_H__ +#define __BOOT_H__ + +#include "wl12xx.h" + +int wl12xx_boot_soft_reset(struct wl12xx *wl); +int wl12xx_boot_init_seq(struct wl12xx *wl); +int wl12xx_boot_run_firmware(struct wl12xx *wl); +void wl12xx_boot_target_enable_interrupts(struct wl12xx *wl); + +/* number of times we try to read the INIT interrupt */ +#define INIT_LOOP 20000 + +/* delay between retries */ +#define INIT_LOOP_DELAY 50 + +#endif diff --git a/drivers/net/wireless/wl12xx/cmd.c b/drivers/net/wireless/wl12xx/cmd.c new file mode 100644 index 00000000000..f73ab602b7a --- /dev/null +++ b/drivers/net/wireless/wl12xx/cmd.c @@ -0,0 +1,353 @@ +#include "cmd.h" + +#include +#include +#include + +#include "wl12xx.h" +#include "wl12xx_80211.h" +#include "reg.h" +#include "spi.h" +#include "ps.h" + +int wl12xx_cmd_send(struct wl12xx *wl, u16 type, void *buf, size_t buf_len) +{ + struct wl12xx_command cmd; + unsigned long timeout; + size_t cmd_len; + u32 intr; + int ret = 0; + + memset(&cmd, 0, sizeof(cmd)); + cmd.id = type; + cmd.status = 0; + memcpy(cmd.parameters, buf, buf_len); + cmd_len = ALIGN(buf_len, 4) + CMDMBOX_HEADER_LEN; + + wl12xx_ps_elp_wakeup(wl); + + wl12xx_spi_mem_write(wl, wl->cmd_box_addr, &cmd, cmd_len); + + wl12xx_reg_write32(wl, ACX_REG_INTERRUPT_TRIG, INTR_TRIG_CMD); + + timeout = jiffies + msecs_to_jiffies(WL12XX_COMMAND_TIMEOUT); + + intr = wl12xx_reg_read32(wl, ACX_REG_INTERRUPT_NO_CLEAR); + while (!(intr & wl->chip.intr_cmd_complete)) { + if (time_after(jiffies, timeout)) { + wl12xx_error("command complete timeout"); + ret = -ETIMEDOUT; + goto out; + } + + msleep(1); + + intr = wl12xx_reg_read32(wl, ACX_REG_INTERRUPT_NO_CLEAR); + } + + wl12xx_reg_write32(wl, ACX_REG_INTERRUPT_ACK, + wl->chip.intr_cmd_complete); + +out: + wl12xx_ps_elp_sleep(wl); + + return ret; +} + +int wl12xx_cmd_test(struct wl12xx *wl, void *buf, size_t buf_len, u8 answer) +{ + int ret; + + wl12xx_debug(DEBUG_CMD, "cmd test"); + + ret = wl12xx_cmd_send(wl, CMD_TEST, buf, buf_len); + if (ret < 0) { + wl12xx_warning("TEST command failed"); + return ret; + } + + if (answer) { + struct wl12xx_command *cmd_answer; + + /* + * The test command got in, we can read the answer. + * The answer would be a wl12xx_command, where the + * parameter array contains the actual answer. + */ + + wl12xx_ps_elp_wakeup(wl); + + wl12xx_spi_mem_read(wl, wl->cmd_box_addr, buf, buf_len); + + wl12xx_ps_elp_sleep(wl); + + cmd_answer = buf; + if (cmd_answer->status != CMD_STATUS_SUCCESS) + wl12xx_error("TEST command answer error: %d", + cmd_answer->status); + } + + return 0; +} + + +int wl12xx_cmd_interrogate(struct wl12xx *wl, u16 ie_id, u16 ie_len, + void *answer) +{ + struct wl12xx_command *cmd; + struct acx_header header; + int ret; + + wl12xx_debug(DEBUG_CMD, "cmd interrogate"); + + header.id = ie_id; + header.len = ie_len - sizeof(header); + + ret = wl12xx_cmd_send(wl, CMD_INTERROGATE, &header, sizeof(header)); + if (ret < 0) { + wl12xx_error("INTERROGATE command failed"); + return ret; + } + + wl12xx_ps_elp_wakeup(wl); + + /* the interrogate command got in, we can read the answer */ + wl12xx_spi_mem_read(wl, wl->cmd_box_addr, answer, + CMDMBOX_HEADER_LEN + ie_len); + + wl12xx_ps_elp_sleep(wl); + + cmd = answer; + if (cmd->status != CMD_STATUS_SUCCESS) + wl12xx_error("INTERROGATE command error: %d", + cmd->status); + + return 0; + +} + +int wl12xx_cmd_configure(struct wl12xx *wl, void *ie, int ie_len) +{ + int ret; + + wl12xx_debug(DEBUG_CMD, "cmd configure"); + + ret = wl12xx_cmd_send(wl, CMD_CONFIGURE, ie, + ie_len); + if (ret < 0) { + wl12xx_warning("CONFIGURE command NOK"); + return ret; + } + + return 0; + +} + +int wl12xx_cmd_vbm(struct wl12xx *wl, u8 identity, + void *bitmap, u16 bitmap_len, u8 bitmap_control) +{ + struct vbm_update_request vbm; + int ret; + + wl12xx_debug(DEBUG_CMD, "cmd vbm"); + + /* Count and period will be filled by the target */ + vbm.tim.bitmap_ctrl = bitmap_control; + if (bitmap_len > PARTIAL_VBM_MAX) { + wl12xx_warning("cmd vbm len is %d B, truncating to %d", + bitmap_len, PARTIAL_VBM_MAX); + bitmap_len = PARTIAL_VBM_MAX; + } + memcpy(vbm.tim.pvb_field, bitmap, bitmap_len); + vbm.tim.identity = identity; + vbm.tim.length = bitmap_len + 3; + + vbm.len = cpu_to_le16(bitmap_len + 5); + + ret = wl12xx_cmd_send(wl, CMD_VBM, &vbm, sizeof(vbm)); + if (ret < 0) { + wl12xx_error("VBM command failed"); + return ret; + } + + return 0; +} + +int wl12xx_cmd_data_path(struct wl12xx *wl, u8 channel, u8 enable) +{ + int ret; + u16 cmd_rx, cmd_tx; + + wl12xx_debug(DEBUG_CMD, "cmd data path"); + + if (enable) { + cmd_rx = CMD_ENABLE_RX; + cmd_tx = CMD_ENABLE_TX; + } else { + cmd_rx = CMD_DISABLE_RX; + cmd_tx = CMD_DISABLE_TX; + } + + ret = wl12xx_cmd_send(wl, cmd_rx, &channel, sizeof(channel)); + if (ret < 0) { + wl12xx_error("rx %s cmd for channel %d failed", + enable ? "start" : "stop", channel); + return ret; + } + + wl12xx_debug(DEBUG_BOOT, "rx %s cmd channel %d", + enable ? "start" : "stop", channel); + + ret = wl12xx_cmd_send(wl, cmd_tx, &channel, sizeof(channel)); + if (ret < 0) { + wl12xx_error("tx %s cmd for channel %d failed", + enable ? "start" : "stop", channel); + return ret; + } + + wl12xx_debug(DEBUG_BOOT, "tx %s cmd channel %d", + enable ? "start" : "stop", channel); + + return 0; +} + +int wl12xx_cmd_join(struct wl12xx *wl, u8 bss_type, u8 dtim_interval, + u16 beacon_interval, u8 wait) +{ + unsigned long timeout; + struct cmd_join join = {}; + int ret, i; + u8 *bssid; + + /* FIXME: this should be in main.c */ + ret = wl12xx_acx_frame_rates(wl, DEFAULT_HW_GEN_TX_RATE, + DEFAULT_HW_GEN_MODULATION_TYPE, + wl->tx_mgmt_frm_rate, + wl->tx_mgmt_frm_mod); + if (ret < 0) + return ret; + + wl12xx_debug(DEBUG_CMD, "cmd join"); + + /* Reverse order BSSID */ + bssid = (u8 *)&join.bssid_lsb; + for (i = 0; i < ETH_ALEN; i++) + bssid[i] = wl->bssid[ETH_ALEN - i - 1]; + + join.rx_config_options = wl->rx_config; + join.rx_filter_options = wl->rx_filter; + + join.basic_rate_set = RATE_MASK_1MBPS | RATE_MASK_2MBPS | + RATE_MASK_5_5MBPS | RATE_MASK_11MBPS; + + join.beacon_interval = beacon_interval; + join.dtim_interval = dtim_interval; + join.bss_type = bss_type; + join.channel = wl->channel; + join.ctrl = JOIN_CMD_CTRL_TX_FLUSH; + + ret = wl12xx_cmd_send(wl, CMD_START_JOIN, &join, sizeof(join)); + if (ret < 0) { + wl12xx_error("failed to initiate cmd join"); + return ret; + } + + timeout = msecs_to_jiffies(JOIN_TIMEOUT); + + /* + * ugly hack: we should wait for JOIN_EVENT_COMPLETE_ID but to + * simplify locking we just sleep instead, for now + */ + if (wait) + msleep(10); + + return 0; +} + +int wl12xx_cmd_ps_mode(struct wl12xx *wl, u8 ps_mode) +{ + int ret; + struct acx_ps_params ps_params; + + /* FIXME: this should be in ps.c */ + ret = wl12xx_acx_wake_up_conditions(wl, wl->listen_int); + if (ret < 0) { + wl12xx_error("Couldnt set wake up conditions"); + return ret; + } + + wl12xx_debug(DEBUG_CMD, "cmd set ps mode"); + + ps_params.ps_mode = ps_mode; + ps_params.send_null_data = 1; + ps_params.retries = 5; + ps_params.hang_over_period = 128; + ps_params.null_data_rate = 1; /* 1 Mbps */ + + ret = wl12xx_cmd_send(wl, CMD_SET_PS_MODE, &ps_params, + sizeof(ps_params)); + if (ret < 0) { + wl12xx_error("cmd set_ps_mode failed"); + return ret; + } + + return 0; +} + +int wl12xx_cmd_read_memory(struct wl12xx *wl, u32 addr, u32 len, void *answer) +{ + struct cmd_read_write_memory mem_cmd, *mem_answer; + struct wl12xx_command cmd; + int ret; + + wl12xx_debug(DEBUG_CMD, "cmd read memory"); + + memset(&mem_cmd, 0, sizeof(mem_cmd)); + mem_cmd.addr = addr; + mem_cmd.size = len; + + ret = wl12xx_cmd_send(wl, CMD_READ_MEMORY, &mem_cmd, sizeof(mem_cmd)); + if (ret < 0) { + wl12xx_error("read memory command failed: %d", ret); + return ret; + } + + /* the read command got in, we can now read the answer */ + wl12xx_spi_mem_read(wl, wl->cmd_box_addr, &cmd, + CMDMBOX_HEADER_LEN + sizeof(mem_cmd)); + + if (cmd.status != CMD_STATUS_SUCCESS) + wl12xx_error("error in read command result: %d", cmd.status); + + mem_answer = (struct cmd_read_write_memory *) cmd.parameters; + memcpy(answer, mem_answer->value, len); + + return 0; +} + +int wl12xx_cmd_template_set(struct wl12xx *wl, u16 cmd_id, + void *buf, size_t buf_len) +{ + struct wl12xx_cmd_packet_template template; + int ret; + + wl12xx_debug(DEBUG_CMD, "cmd template %d", cmd_id); + + memset(&template, 0, sizeof(template)); + + WARN_ON(buf_len > WL12XX_MAX_TEMPLATE_SIZE); + buf_len = min_t(size_t, buf_len, WL12XX_MAX_TEMPLATE_SIZE); + template.size = cpu_to_le16(buf_len); + + if (buf) + memcpy(template.template, buf, buf_len); + + ret = wl12xx_cmd_send(wl, cmd_id, &template, + sizeof(template.size) + buf_len); + if (ret < 0) { + wl12xx_warning("cmd set_template failed: %d", ret); + return ret; + } + + return 0; +} diff --git a/drivers/net/wireless/wl12xx/cmd.h b/drivers/net/wireless/wl12xx/cmd.h new file mode 100644 index 00000000000..aa307dcd081 --- /dev/null +++ b/drivers/net/wireless/wl12xx/cmd.h @@ -0,0 +1,265 @@ +/* + * This file is part of wl12xx + * + * Copyright (c) 1998-2007 Texas Instruments Incorporated + * Copyright (C) 2008 Nokia Corporation + * + * Contact: Kalle Valo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#ifndef __WL12XX_CMD_H__ +#define __WL12XX_CMD_H__ + +#include "wl12xx.h" + +int wl12xx_cmd_send(struct wl12xx *wl, u16 type, void *buf, size_t buf_len); +int wl12xx_cmd_test(struct wl12xx *wl, void *buf, size_t buf_len, u8 answer); +int wl12xx_cmd_interrogate(struct wl12xx *wl, u16 ie_id, u16 ie_len, + void *answer); +int wl12xx_cmd_configure(struct wl12xx *wl, void *ie, int ie_len); +int wl12xx_cmd_vbm(struct wl12xx *wl, u8 identity, + void *bitmap, u16 bitmap_len, u8 bitmap_control); +int wl12xx_cmd_data_path(struct wl12xx *wl, u8 channel, u8 enable); +int wl12xx_cmd_join(struct wl12xx *wl, u8 bss_type, u8 dtim_interval, + u16 beacon_interval, u8 wait); +int wl12xx_cmd_ps_mode(struct wl12xx *wl, u8 ps_mode); +int wl12xx_cmd_read_memory(struct wl12xx *wl, u32 addr, u32 len, void *answer); +int wl12xx_cmd_template_set(struct wl12xx *wl, u16 cmd_id, + void *buf, size_t buf_len); + +/* unit ms */ +#define WL12XX_COMMAND_TIMEOUT 2000 + +#define WL12XX_MAX_TEMPLATE_SIZE 300 + +struct wl12xx_cmd_packet_template { + __le16 size; + u8 template[WL12XX_MAX_TEMPLATE_SIZE]; +} __attribute__ ((packed)); + +enum wl12xx_commands { + CMD_RESET = 0, + CMD_INTERROGATE = 1, /*use this to read information elements*/ + CMD_CONFIGURE = 2, /*use this to write information elements*/ + CMD_ENABLE_RX = 3, + CMD_ENABLE_TX = 4, + CMD_DISABLE_RX = 5, + CMD_DISABLE_TX = 6, + CMD_SCAN = 8, + CMD_STOP_SCAN = 9, + CMD_VBM = 10, + CMD_START_JOIN = 11, + CMD_SET_KEYS = 12, + CMD_READ_MEMORY = 13, + CMD_WRITE_MEMORY = 14, + CMD_BEACON = 19, + CMD_PROBE_RESP = 20, + CMD_NULL_DATA = 21, + CMD_PROBE_REQ = 22, + CMD_TEST = 23, + CMD_RADIO_CALIBRATE = 25, /* OBSOLETE */ + CMD_ENABLE_RX_PATH = 27, /* OBSOLETE */ + CMD_NOISE_HIST = 28, + CMD_RX_RESET = 29, + CMD_PS_POLL = 30, + CMD_QOS_NULL_DATA = 31, + CMD_LNA_CONTROL = 32, + CMD_SET_BCN_MODE = 33, + CMD_MEASUREMENT = 34, + CMD_STOP_MEASUREMENT = 35, + CMD_DISCONNECT = 36, + CMD_SET_PS_MODE = 37, + CMD_CHANNEL_SWITCH = 38, + CMD_STOP_CHANNEL_SWICTH = 39, + CMD_AP_DISCOVERY = 40, + CMD_STOP_AP_DISCOVERY = 41, + CMD_SPS_SCAN = 42, + CMD_STOP_SPS_SCAN = 43, + CMD_HEALTH_CHECK = 45, + CMD_DEBUG = 46, + CMD_TRIGGER_SCAN_TO = 47, + + NUM_COMMANDS, + MAX_COMMAND_ID = 0xFFFF, +}; + +#define MAX_CMD_PARAMS 572 + +struct wl12xx_command { + u16 id; + u16 status; + u8 parameters[MAX_CMD_PARAMS]; +}; + +enum { + CMD_MAILBOX_IDLE = 0, + CMD_STATUS_SUCCESS = 1, + CMD_STATUS_UNKNOWN_CMD = 2, + CMD_STATUS_UNKNOWN_IE = 3, + CMD_STATUS_REJECT_MEAS_SG_ACTIVE = 11, + CMD_STATUS_RX_BUSY = 13, + CMD_STATUS_INVALID_PARAM = 14, + CMD_STATUS_TEMPLATE_TOO_LARGE = 15, + CMD_STATUS_OUT_OF_MEMORY = 16, + CMD_STATUS_STA_TABLE_FULL = 17, + CMD_STATUS_RADIO_ERROR = 18, + CMD_STATUS_WRONG_NESTING = 19, + CMD_STATUS_TIMEOUT = 21, /* Driver internal use.*/ + CMD_STATUS_FW_RESET = 22, /* Driver internal use.*/ + MAX_COMMAND_STATUS = 0xff +}; + + +/* + * CMD_READ_MEMORY + * + * The host issues this command to read the WiLink device memory/registers. + * + * Note: The Base Band address has special handling (16 bits registers and + * addresses). For more information, see the hardware specification. + */ +/* + * CMD_WRITE_MEMORY + * + * The host issues this command to write the WiLink device memory/registers. + * + * The Base Band address has special handling (16 bits registers and + * addresses). For more information, see the hardware specification. + */ +#define MAX_READ_SIZE 256 + +struct cmd_read_write_memory { + /* The address of the memory to read from or write to.*/ + u32 addr; + + /* The amount of data in bytes to read from or write to the WiLink + * device.*/ + u32 size; + + /* The actual value read from or written to the Wilink. The source + of this field is the Host in WRITE command or the Wilink in READ + command. */ + u8 value[MAX_READ_SIZE]; +}; + +#define CMDMBOX_HEADER_LEN 4 +#define CMDMBOX_INFO_ELEM_HEADER_LEN 4 + + +struct basic_scan_parameters { + u32 rx_config_options; + u32 rx_filter_options; + + /* + * Scan options: + * bit 0: When this bit is set, passive scan. + * bit 1: Band, when this bit is set we scan + * in the 5Ghz band. + * bit 2: voice mode, 0 for normal scan. + * bit 3: scan priority, 1 for high priority. + */ + u16 scan_options; + + /* Number of channels to scan */ + u8 num_channels; + + /* Number opf probe requests to send, per channel */ + u8 num_probe_requests; + + /* Rate and modulation for probe requests */ + u16 tx_rate; + + u8 tid_trigger; + u8 ssid_len; + u32 ssid[8]; + +} __attribute__ ((packed)); + +struct basic_scan_channel_parameters { + u32 min_duration; /* in TU */ + u32 max_duration; /* in TU */ + u32 bssid_lsb; + u16 bssid_msb; + + /* + * bits 0-3: Early termination count. + * bits 4-5: Early termination condition. + */ + u8 early_termination; + + u8 tx_power_att; + u8 channel; + u8 pad[3]; +} __attribute__ ((packed)); + +/* SCAN parameters */ +#define SCAN_MAX_NUM_OF_CHANNELS 16 + +struct cmd_scan { + struct basic_scan_parameters params; + struct basic_scan_channel_parameters channels[SCAN_MAX_NUM_OF_CHANNELS]; +} __attribute__ ((packed)); + +enum { + BSS_TYPE_IBSS = 0, + BSS_TYPE_STA_BSS = 2, + BSS_TYPE_AP_BSS = 3, + MAX_BSS_TYPE = 0xFF +}; + +#define JOIN_CMD_CTRL_TX_FLUSH 0x80 /* Firmware flushes all Tx */ +#define JOIN_CMD_CTRL_EARLY_WAKEUP_ENABLE 0x01 /* Early wakeup time */ + + +struct cmd_join { + u32 bssid_lsb; + u16 bssid_msb; + u16 beacon_interval; /* in TBTTs */ + u32 rx_config_options; + u32 rx_filter_options; + + /* + * The target uses this field to determine the rate at + * which to transmit control frame responses (such as + * ACK or CTS frames). + */ + u16 basic_rate_set; + u8 dtim_interval; + u8 tx_ctrl_frame_rate; /* OBSOLETE */ + u8 tx_ctrl_frame_mod; /* OBSOLETE */ + /* + * bits 0-2: This bitwise field specifies the type + * of BSS to start or join (BSS_TYPE_*). + * bit 4: Band - The radio band in which to join + * or start. + * 0 - 2.4GHz band + * 1 - 5GHz band + * bits 3, 5-7: Reserved + */ + u8 bss_type; + u8 channel; + u8 ssid_len; + u8 ssid[IW_ESSID_MAX_SIZE]; + u8 ctrl; /* JOIN_CMD_CTRL_* */ + u8 tx_mgt_frame_rate; /* OBSOLETE */ + u8 tx_mgt_frame_mod; /* OBSOLETE */ + u8 reserved; +} __attribute__ ((packed)); + + +#endif /* __WL12XX_CMD_H__ */ diff --git a/drivers/net/wireless/wl12xx/debugfs.c b/drivers/net/wireless/wl12xx/debugfs.c new file mode 100644 index 00000000000..cdb368ce4da --- /dev/null +++ b/drivers/net/wireless/wl12xx/debugfs.c @@ -0,0 +1,508 @@ +/* + * This file is part of wl12xx + * + * Copyright (C) 2009 Nokia Corporation + * + * Contact: Kalle Valo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#include "debugfs.h" + +#include + +#include "wl12xx.h" +#include "acx.h" + +/* ms */ +#define WL12XX_DEBUGFS_STATS_LIFETIME 1000 + +/* debugfs macros idea from mac80211 */ + +#define DEBUGFS_READONLY_FILE(name, buflen, fmt, value...) \ +static ssize_t name## _read(struct file *file, char __user *userbuf, \ + size_t count, loff_t *ppos) \ +{ \ + struct wl12xx *wl = file->private_data; \ + char buf[buflen]; \ + int res; \ + \ + res = scnprintf(buf, buflen, fmt "\n", ##value); \ + return simple_read_from_buffer(userbuf, count, ppos, buf, res); \ +} \ + \ +static const struct file_operations name## _ops = { \ + .read = name## _read, \ + .open = wl12xx_open_file_generic, \ +}; + +#define DEBUGFS_ADD(name, parent) \ + wl->debugfs.name = debugfs_create_file(#name, 0400, parent, \ + wl, &name## _ops); \ + if (IS_ERR(wl->debugfs.name)) { \ + ret = PTR_ERR(wl->debugfs.name); \ + wl->debugfs.name = NULL; \ + goto out; \ + } + +#define DEBUGFS_DEL(name) \ + do { \ + debugfs_remove(wl->debugfs.name); \ + wl->debugfs.name = NULL; \ + } while (0) + +#define DEBUGFS_FWSTATS_FILE(sub, name, buflen, fmt) \ +static ssize_t sub## _ ##name## _read(struct file *file, \ + char __user *userbuf, \ + size_t count, loff_t *ppos) \ +{ \ + struct wl12xx *wl = file->private_data; \ + char buf[buflen]; \ + int res; \ + \ + wl12xx_debugfs_update_stats(wl); \ + \ + res = scnprintf(buf, buflen, fmt "\n", \ + wl->stats.fw_stats->sub.name); \ + return simple_read_from_buffer(userbuf, count, ppos, buf, res); \ +} \ + \ +static const struct file_operations sub## _ ##name## _ops = { \ + .read = sub## _ ##name## _read, \ + .open = wl12xx_open_file_generic, \ +}; + +#define DEBUGFS_FWSTATS_ADD(sub, name) \ + DEBUGFS_ADD(sub## _ ##name, wl->debugfs.fw_statistics) + +#define DEBUGFS_FWSTATS_DEL(sub, name) \ + DEBUGFS_DEL(sub## _ ##name) + +static void wl12xx_debugfs_update_stats(struct wl12xx *wl) +{ + mutex_lock(&wl->mutex); + + if (wl->state == WL12XX_STATE_ON && + time_after(jiffies, wl->stats.fw_stats_update + + msecs_to_jiffies(WL12XX_DEBUGFS_STATS_LIFETIME))) { + wl12xx_acx_statistics(wl, wl->stats.fw_stats); + wl->stats.fw_stats_update = jiffies; + } + + mutex_unlock(&wl->mutex); +} + +static int wl12xx_open_file_generic(struct inode *inode, struct file *file) +{ + file->private_data = inode->i_private; + return 0; +} + +DEBUGFS_FWSTATS_FILE(tx, internal_desc_overflow, 20, "%u"); + +DEBUGFS_FWSTATS_FILE(rx, out_of_mem, 20, "%u"); +DEBUGFS_FWSTATS_FILE(rx, hdr_overflow, 20, "%u"); +DEBUGFS_FWSTATS_FILE(rx, hw_stuck, 20, "%u"); +DEBUGFS_FWSTATS_FILE(rx, dropped, 20, "%u"); +DEBUGFS_FWSTATS_FILE(rx, fcs_err, 20, "%u"); +DEBUGFS_FWSTATS_FILE(rx, xfr_hint_trig, 20, "%u"); +DEBUGFS_FWSTATS_FILE(rx, path_reset, 20, "%u"); +DEBUGFS_FWSTATS_FILE(rx, reset_counter, 20, "%u"); + +DEBUGFS_FWSTATS_FILE(dma, rx_requested, 20, "%u"); +DEBUGFS_FWSTATS_FILE(dma, rx_errors, 20, "%u"); +DEBUGFS_FWSTATS_FILE(dma, tx_requested, 20, "%u"); +DEBUGFS_FWSTATS_FILE(dma, tx_errors, 20, "%u"); + +DEBUGFS_FWSTATS_FILE(isr, cmd_cmplt, 20, "%u"); +DEBUGFS_FWSTATS_FILE(isr, fiqs, 20, "%u"); +DEBUGFS_FWSTATS_FILE(isr, rx_headers, 20, "%u"); +DEBUGFS_FWSTATS_FILE(isr, rx_mem_overflow, 20, "%u"); +DEBUGFS_FWSTATS_FILE(isr, rx_rdys, 20, "%u"); +DEBUGFS_FWSTATS_FILE(isr, irqs, 20, "%u"); +DEBUGFS_FWSTATS_FILE(isr, tx_procs, 20, "%u"); +DEBUGFS_FWSTATS_FILE(isr, decrypt_done, 20, "%u"); +DEBUGFS_FWSTATS_FILE(isr, dma0_done, 20, "%u"); +DEBUGFS_FWSTATS_FILE(isr, dma1_done, 20, "%u"); +DEBUGFS_FWSTATS_FILE(isr, tx_exch_complete, 20, "%u"); +DEBUGFS_FWSTATS_FILE(isr, commands, 20, "%u"); +DEBUGFS_FWSTATS_FILE(isr, rx_procs, 20, "%u"); +DEBUGFS_FWSTATS_FILE(isr, hw_pm_mode_changes, 20, "%u"); +DEBUGFS_FWSTATS_FILE(isr, host_acknowledges, 20, "%u"); +DEBUGFS_FWSTATS_FILE(isr, pci_pm, 20, "%u"); +DEBUGFS_FWSTATS_FILE(isr, wakeups, 20, "%u"); +DEBUGFS_FWSTATS_FILE(isr, low_rssi, 20, "%u"); + +DEBUGFS_FWSTATS_FILE(wep, addr_key_count, 20, "%u"); +DEBUGFS_FWSTATS_FILE(wep, default_key_count, 20, "%u"); +/* skipping wep.reserved */ +DEBUGFS_FWSTATS_FILE(wep, key_not_found, 20, "%u"); +DEBUGFS_FWSTATS_FILE(wep, decrypt_fail, 20, "%u"); +DEBUGFS_FWSTATS_FILE(wep, packets, 20, "%u"); +DEBUGFS_FWSTATS_FILE(wep, interrupt, 20, "%u"); + +DEBUGFS_FWSTATS_FILE(pwr, ps_enter, 20, "%u"); +DEBUGFS_FWSTATS_FILE(pwr, elp_enter, 20, "%u"); +DEBUGFS_FWSTATS_FILE(pwr, missing_bcns, 20, "%u"); +DEBUGFS_FWSTATS_FILE(pwr, wake_on_host, 20, "%u"); +DEBUGFS_FWSTATS_FILE(pwr, wake_on_timer_exp, 20, "%u"); +DEBUGFS_FWSTATS_FILE(pwr, tx_with_ps, 20, "%u"); +DEBUGFS_FWSTATS_FILE(pwr, tx_without_ps, 20, "%u"); +DEBUGFS_FWSTATS_FILE(pwr, rcvd_beacons, 20, "%u"); +DEBUGFS_FWSTATS_FILE(pwr, power_save_off, 20, "%u"); +DEBUGFS_FWSTATS_FILE(pwr, enable_ps, 20, "%u"); +DEBUGFS_FWSTATS_FILE(pwr, disable_ps, 20, "%u"); +DEBUGFS_FWSTATS_FILE(pwr, fix_tsf_ps, 20, "%u"); +/* skipping cont_miss_bcns_spread for now */ +DEBUGFS_FWSTATS_FILE(pwr, rcvd_awake_beacons, 20, "%u"); + +DEBUGFS_FWSTATS_FILE(mic, rx_pkts, 20, "%u"); +DEBUGFS_FWSTATS_FILE(mic, calc_failure, 20, "%u"); + +DEBUGFS_FWSTATS_FILE(aes, encrypt_fail, 20, "%u"); +DEBUGFS_FWSTATS_FILE(aes, decrypt_fail, 20, "%u"); +DEBUGFS_FWSTATS_FILE(aes, encrypt_packets, 20, "%u"); +DEBUGFS_FWSTATS_FILE(aes, decrypt_packets, 20, "%u"); +DEBUGFS_FWSTATS_FILE(aes, encrypt_interrupt, 20, "%u"); +DEBUGFS_FWSTATS_FILE(aes, decrypt_interrupt, 20, "%u"); + +DEBUGFS_FWSTATS_FILE(event, heart_beat, 20, "%u"); +DEBUGFS_FWSTATS_FILE(event, calibration, 20, "%u"); +DEBUGFS_FWSTATS_FILE(event, rx_mismatch, 20, "%u"); +DEBUGFS_FWSTATS_FILE(event, rx_mem_empty, 20, "%u"); +DEBUGFS_FWSTATS_FILE(event, rx_pool, 20, "%u"); +DEBUGFS_FWSTATS_FILE(event, oom_late, 20, "%u"); +DEBUGFS_FWSTATS_FILE(event, phy_transmit_error, 20, "%u"); +DEBUGFS_FWSTATS_FILE(event, tx_stuck, 20, "%u"); + +DEBUGFS_FWSTATS_FILE(ps, pspoll_timeouts, 20, "%u"); +DEBUGFS_FWSTATS_FILE(ps, upsd_timeouts, 20, "%u"); +DEBUGFS_FWSTATS_FILE(ps, upsd_max_sptime, 20, "%u"); +DEBUGFS_FWSTATS_FILE(ps, upsd_max_apturn, 20, "%u"); +DEBUGFS_FWSTATS_FILE(ps, pspoll_max_apturn, 20, "%u"); +DEBUGFS_FWSTATS_FILE(ps, pspoll_utilization, 20, "%u"); +DEBUGFS_FWSTATS_FILE(ps, upsd_utilization, 20, "%u"); + +DEBUGFS_FWSTATS_FILE(rxpipe, rx_prep_beacon_drop, 20, "%u"); +DEBUGFS_FWSTATS_FILE(rxpipe, descr_host_int_trig_rx_data, 20, "%u"); +DEBUGFS_FWSTATS_FILE(rxpipe, beacon_buffer_thres_host_int_trig_rx_data, + 20, "%u"); +DEBUGFS_FWSTATS_FILE(rxpipe, missed_beacon_host_int_trig_rx_data, 20, "%u"); +DEBUGFS_FWSTATS_FILE(rxpipe, tx_xfr_host_int_trig_rx_data, 20, "%u"); + +DEBUGFS_READONLY_FILE(retry_count, 20, "%u", wl->stats.retry_count); +DEBUGFS_READONLY_FILE(excessive_retries, 20, "%u", + wl->stats.excessive_retries); + +static ssize_t tx_queue_len_read(struct file *file, char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct wl12xx *wl = file->private_data; + u32 queue_len; + char buf[20]; + int res; + + queue_len = skb_queue_len(&wl->tx_queue); + + res = scnprintf(buf, sizeof(buf), "%u\n", queue_len); + return simple_read_from_buffer(userbuf, count, ppos, buf, res); +} + +static const struct file_operations tx_queue_len_ops = { + .read = tx_queue_len_read, + .open = wl12xx_open_file_generic, +}; + +static void wl12xx_debugfs_delete_files(struct wl12xx *wl) +{ + DEBUGFS_FWSTATS_DEL(tx, internal_desc_overflow); + + DEBUGFS_FWSTATS_DEL(rx, out_of_mem); + DEBUGFS_FWSTATS_DEL(rx, hdr_overflow); + DEBUGFS_FWSTATS_DEL(rx, hw_stuck); + DEBUGFS_FWSTATS_DEL(rx, dropped); + DEBUGFS_FWSTATS_DEL(rx, fcs_err); + DEBUGFS_FWSTATS_DEL(rx, xfr_hint_trig); + DEBUGFS_FWSTATS_DEL(rx, path_reset); + DEBUGFS_FWSTATS_DEL(rx, reset_counter); + + DEBUGFS_FWSTATS_DEL(dma, rx_requested); + DEBUGFS_FWSTATS_DEL(dma, rx_errors); + DEBUGFS_FWSTATS_DEL(dma, tx_requested); + DEBUGFS_FWSTATS_DEL(dma, tx_errors); + + DEBUGFS_FWSTATS_DEL(isr, cmd_cmplt); + DEBUGFS_FWSTATS_DEL(isr, fiqs); + DEBUGFS_FWSTATS_DEL(isr, rx_headers); + DEBUGFS_FWSTATS_DEL(isr, rx_mem_overflow); + DEBUGFS_FWSTATS_DEL(isr, rx_rdys); + DEBUGFS_FWSTATS_DEL(isr, irqs); + DEBUGFS_FWSTATS_DEL(isr, tx_procs); + DEBUGFS_FWSTATS_DEL(isr, decrypt_done); + DEBUGFS_FWSTATS_DEL(isr, dma0_done); + DEBUGFS_FWSTATS_DEL(isr, dma1_done); + DEBUGFS_FWSTATS_DEL(isr, tx_exch_complete); + DEBUGFS_FWSTATS_DEL(isr, commands); + DEBUGFS_FWSTATS_DEL(isr, rx_procs); + DEBUGFS_FWSTATS_DEL(isr, hw_pm_mode_changes); + DEBUGFS_FWSTATS_DEL(isr, host_acknowledges); + DEBUGFS_FWSTATS_DEL(isr, pci_pm); + DEBUGFS_FWSTATS_DEL(isr, wakeups); + DEBUGFS_FWSTATS_DEL(isr, low_rssi); + + DEBUGFS_FWSTATS_DEL(wep, addr_key_count); + DEBUGFS_FWSTATS_DEL(wep, default_key_count); + /* skipping wep.reserved */ + DEBUGFS_FWSTATS_DEL(wep, key_not_found); + DEBUGFS_FWSTATS_DEL(wep, decrypt_fail); + DEBUGFS_FWSTATS_DEL(wep, packets); + DEBUGFS_FWSTATS_DEL(wep, interrupt); + + DEBUGFS_FWSTATS_DEL(pwr, ps_enter); + DEBUGFS_FWSTATS_DEL(pwr, elp_enter); + DEBUGFS_FWSTATS_DEL(pwr, missing_bcns); + DEBUGFS_FWSTATS_DEL(pwr, wake_on_host); + DEBUGFS_FWSTATS_DEL(pwr, wake_on_timer_exp); + DEBUGFS_FWSTATS_DEL(pwr, tx_with_ps); + DEBUGFS_FWSTATS_DEL(pwr, tx_without_ps); + DEBUGFS_FWSTATS_DEL(pwr, rcvd_beacons); + DEBUGFS_FWSTATS_DEL(pwr, power_save_off); + DEBUGFS_FWSTATS_DEL(pwr, enable_ps); + DEBUGFS_FWSTATS_DEL(pwr, disable_ps); + DEBUGFS_FWSTATS_DEL(pwr, fix_tsf_ps); + /* skipping cont_miss_bcns_spread for now */ + DEBUGFS_FWSTATS_DEL(pwr, rcvd_awake_beacons); + + DEBUGFS_FWSTATS_DEL(mic, rx_pkts); + DEBUGFS_FWSTATS_DEL(mic, calc_failure); + + DEBUGFS_FWSTATS_DEL(aes, encrypt_fail); + DEBUGFS_FWSTATS_DEL(aes, decrypt_fail); + DEBUGFS_FWSTATS_DEL(aes, encrypt_packets); + DEBUGFS_FWSTATS_DEL(aes, decrypt_packets); + DEBUGFS_FWSTATS_DEL(aes, encrypt_interrupt); + DEBUGFS_FWSTATS_DEL(aes, decrypt_interrupt); + + DEBUGFS_FWSTATS_DEL(event, heart_beat); + DEBUGFS_FWSTATS_DEL(event, calibration); + DEBUGFS_FWSTATS_DEL(event, rx_mismatch); + DEBUGFS_FWSTATS_DEL(event, rx_mem_empty); + DEBUGFS_FWSTATS_DEL(event, rx_pool); + DEBUGFS_FWSTATS_DEL(event, oom_late); + DEBUGFS_FWSTATS_DEL(event, phy_transmit_error); + DEBUGFS_FWSTATS_DEL(event, tx_stuck); + + DEBUGFS_FWSTATS_DEL(ps, pspoll_timeouts); + DEBUGFS_FWSTATS_DEL(ps, upsd_timeouts); + DEBUGFS_FWSTATS_DEL(ps, upsd_max_sptime); + DEBUGFS_FWSTATS_DEL(ps, upsd_max_apturn); + DEBUGFS_FWSTATS_DEL(ps, pspoll_max_apturn); + DEBUGFS_FWSTATS_DEL(ps, pspoll_utilization); + DEBUGFS_FWSTATS_DEL(ps, upsd_utilization); + + DEBUGFS_FWSTATS_DEL(rxpipe, rx_prep_beacon_drop); + DEBUGFS_FWSTATS_DEL(rxpipe, descr_host_int_trig_rx_data); + DEBUGFS_FWSTATS_DEL(rxpipe, beacon_buffer_thres_host_int_trig_rx_data); + DEBUGFS_FWSTATS_DEL(rxpipe, missed_beacon_host_int_trig_rx_data); + DEBUGFS_FWSTATS_DEL(rxpipe, tx_xfr_host_int_trig_rx_data); + + DEBUGFS_DEL(tx_queue_len); + DEBUGFS_DEL(retry_count); + DEBUGFS_DEL(excessive_retries); +} + +static int wl12xx_debugfs_add_files(struct wl12xx *wl) +{ + int ret = 0; + + DEBUGFS_FWSTATS_ADD(tx, internal_desc_overflow); + + DEBUGFS_FWSTATS_ADD(rx, out_of_mem); + DEBUGFS_FWSTATS_ADD(rx, hdr_overflow); + DEBUGFS_FWSTATS_ADD(rx, hw_stuck); + DEBUGFS_FWSTATS_ADD(rx, dropped); + DEBUGFS_FWSTATS_ADD(rx, fcs_err); + DEBUGFS_FWSTATS_ADD(rx, xfr_hint_trig); + DEBUGFS_FWSTATS_ADD(rx, path_reset); + DEBUGFS_FWSTATS_ADD(rx, reset_counter); + + DEBUGFS_FWSTATS_ADD(dma, rx_requested); + DEBUGFS_FWSTATS_ADD(dma, rx_errors); + DEBUGFS_FWSTATS_ADD(dma, tx_requested); + DEBUGFS_FWSTATS_ADD(dma, tx_errors); + + DEBUGFS_FWSTATS_ADD(isr, cmd_cmplt); + DEBUGFS_FWSTATS_ADD(isr, fiqs); + DEBUGFS_FWSTATS_ADD(isr, rx_headers); + DEBUGFS_FWSTATS_ADD(isr, rx_mem_overflow); + DEBUGFS_FWSTATS_ADD(isr, rx_rdys); + DEBUGFS_FWSTATS_ADD(isr, irqs); + DEBUGFS_FWSTATS_ADD(isr, tx_procs); + DEBUGFS_FWSTATS_ADD(isr, decrypt_done); + DEBUGFS_FWSTATS_ADD(isr, dma0_done); + DEBUGFS_FWSTATS_ADD(isr, dma1_done); + DEBUGFS_FWSTATS_ADD(isr, tx_exch_complete); + DEBUGFS_FWSTATS_ADD(isr, commands); + DEBUGFS_FWSTATS_ADD(isr, rx_procs); + DEBUGFS_FWSTATS_ADD(isr, hw_pm_mode_changes); + DEBUGFS_FWSTATS_ADD(isr, host_acknowledges); + DEBUGFS_FWSTATS_ADD(isr, pci_pm); + DEBUGFS_FWSTATS_ADD(isr, wakeups); + DEBUGFS_FWSTATS_ADD(isr, low_rssi); + + DEBUGFS_FWSTATS_ADD(wep, addr_key_count); + DEBUGFS_FWSTATS_ADD(wep, default_key_count); + /* skipping wep.reserved */ + DEBUGFS_FWSTATS_ADD(wep, key_not_found); + DEBUGFS_FWSTATS_ADD(wep, decrypt_fail); + DEBUGFS_FWSTATS_ADD(wep, packets); + DEBUGFS_FWSTATS_ADD(wep, interrupt); + + DEBUGFS_FWSTATS_ADD(pwr, ps_enter); + DEBUGFS_FWSTATS_ADD(pwr, elp_enter); + DEBUGFS_FWSTATS_ADD(pwr, missing_bcns); + DEBUGFS_FWSTATS_ADD(pwr, wake_on_host); + DEBUGFS_FWSTATS_ADD(pwr, wake_on_timer_exp); + DEBUGFS_FWSTATS_ADD(pwr, tx_with_ps); + DEBUGFS_FWSTATS_ADD(pwr, tx_without_ps); + DEBUGFS_FWSTATS_ADD(pwr, rcvd_beacons); + DEBUGFS_FWSTATS_ADD(pwr, power_save_off); + DEBUGFS_FWSTATS_ADD(pwr, enable_ps); + DEBUGFS_FWSTATS_ADD(pwr, disable_ps); + DEBUGFS_FWSTATS_ADD(pwr, fix_tsf_ps); + /* skipping cont_miss_bcns_spread for now */ + DEBUGFS_FWSTATS_ADD(pwr, rcvd_awake_beacons); + + DEBUGFS_FWSTATS_ADD(mic, rx_pkts); + DEBUGFS_FWSTATS_ADD(mic, calc_failure); + + DEBUGFS_FWSTATS_ADD(aes, encrypt_fail); + DEBUGFS_FWSTATS_ADD(aes, decrypt_fail); + DEBUGFS_FWSTATS_ADD(aes, encrypt_packets); + DEBUGFS_FWSTATS_ADD(aes, decrypt_packets); + DEBUGFS_FWSTATS_ADD(aes, encrypt_interrupt); + DEBUGFS_FWSTATS_ADD(aes, decrypt_interrupt); + + DEBUGFS_FWSTATS_ADD(event, heart_beat); + DEBUGFS_FWSTATS_ADD(event, calibration); + DEBUGFS_FWSTATS_ADD(event, rx_mismatch); + DEBUGFS_FWSTATS_ADD(event, rx_mem_empty); + DEBUGFS_FWSTATS_ADD(event, rx_pool); + DEBUGFS_FWSTATS_ADD(event, oom_late); + DEBUGFS_FWSTATS_ADD(event, phy_transmit_error); + DEBUGFS_FWSTATS_ADD(event, tx_stuck); + + DEBUGFS_FWSTATS_ADD(ps, pspoll_timeouts); + DEBUGFS_FWSTATS_ADD(ps, upsd_timeouts); + DEBUGFS_FWSTATS_ADD(ps, upsd_max_sptime); + DEBUGFS_FWSTATS_ADD(ps, upsd_max_apturn); + DEBUGFS_FWSTATS_ADD(ps, pspoll_max_apturn); + DEBUGFS_FWSTATS_ADD(ps, pspoll_utilization); + DEBUGFS_FWSTATS_ADD(ps, upsd_utilization); + + DEBUGFS_FWSTATS_ADD(rxpipe, rx_prep_beacon_drop); + DEBUGFS_FWSTATS_ADD(rxpipe, descr_host_int_trig_rx_data); + DEBUGFS_FWSTATS_ADD(rxpipe, beacon_buffer_thres_host_int_trig_rx_data); + DEBUGFS_FWSTATS_ADD(rxpipe, missed_beacon_host_int_trig_rx_data); + DEBUGFS_FWSTATS_ADD(rxpipe, tx_xfr_host_int_trig_rx_data); + + DEBUGFS_ADD(tx_queue_len, wl->debugfs.rootdir); + DEBUGFS_ADD(retry_count, wl->debugfs.rootdir); + DEBUGFS_ADD(excessive_retries, wl->debugfs.rootdir); + +out: + if (ret < 0) + wl12xx_debugfs_delete_files(wl); + + return ret; +} + +void wl12xx_debugfs_reset(struct wl12xx *wl) +{ + memset(wl->stats.fw_stats, 0, sizeof(*wl->stats.fw_stats)); + wl->stats.retry_count = 0; + wl->stats.excessive_retries = 0; +} + +int wl12xx_debugfs_init(struct wl12xx *wl) +{ + int ret; + + wl->debugfs.rootdir = debugfs_create_dir(KBUILD_MODNAME, NULL); + + if (IS_ERR(wl->debugfs.rootdir)) { + ret = PTR_ERR(wl->debugfs.rootdir); + wl->debugfs.rootdir = NULL; + goto err; + } + + wl->debugfs.fw_statistics = debugfs_create_dir("fw-statistics", + wl->debugfs.rootdir); + + if (IS_ERR(wl->debugfs.fw_statistics)) { + ret = PTR_ERR(wl->debugfs.fw_statistics); + wl->debugfs.fw_statistics = NULL; + goto err_root; + } + + wl->stats.fw_stats = kzalloc(sizeof(*wl->stats.fw_stats), + GFP_KERNEL); + + if (!wl->stats.fw_stats) { + ret = -ENOMEM; + goto err_fw; + } + + wl->stats.fw_stats_update = jiffies; + + ret = wl12xx_debugfs_add_files(wl); + + if (ret < 0) + goto err_file; + + return 0; + +err_file: + kfree(wl->stats.fw_stats); + wl->stats.fw_stats = NULL; + +err_fw: + debugfs_remove(wl->debugfs.fw_statistics); + wl->debugfs.fw_statistics = NULL; + +err_root: + debugfs_remove(wl->debugfs.rootdir); + wl->debugfs.rootdir = NULL; + +err: + return ret; +} + +void wl12xx_debugfs_exit(struct wl12xx *wl) +{ + wl12xx_debugfs_delete_files(wl); + + kfree(wl->stats.fw_stats); + wl->stats.fw_stats = NULL; + + debugfs_remove(wl->debugfs.fw_statistics); + wl->debugfs.fw_statistics = NULL; + + debugfs_remove(wl->debugfs.rootdir); + wl->debugfs.rootdir = NULL; + +} diff --git a/drivers/net/wireless/wl12xx/debugfs.h b/drivers/net/wireless/wl12xx/debugfs.h new file mode 100644 index 00000000000..562cdcbcc87 --- /dev/null +++ b/drivers/net/wireless/wl12xx/debugfs.h @@ -0,0 +1,33 @@ +/* + * This file is part of wl12xx + * + * Copyright (C) 2009 Nokia Corporation + * + * Contact: Kalle Valo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#ifndef WL12XX_DEBUGFS_H +#define WL12XX_DEBUGFS_H + +#include "wl12xx.h" + +int wl12xx_debugfs_init(struct wl12xx *wl); +void wl12xx_debugfs_exit(struct wl12xx *wl); +void wl12xx_debugfs_reset(struct wl12xx *wl); + +#endif /* WL12XX_DEBUGFS_H */ diff --git a/drivers/net/wireless/wl12xx/event.c b/drivers/net/wireless/wl12xx/event.c new file mode 100644 index 00000000000..99529ca89a7 --- /dev/null +++ b/drivers/net/wireless/wl12xx/event.c @@ -0,0 +1,127 @@ +/* + * This file is part of wl12xx + * + * Copyright (c) 1998-2007 Texas Instruments Incorporated + * Copyright (C) 2008 Nokia Corporation + * + * Contact: Kalle Valo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#include "wl12xx.h" +#include "reg.h" +#include "spi.h" +#include "event.h" +#include "ps.h" + +static int wl12xx_event_scan_complete(struct wl12xx *wl, + struct event_mailbox *mbox) +{ + wl12xx_debug(DEBUG_EVENT, "status: 0x%x, channels: %d", + mbox->scheduled_scan_status, + mbox->scheduled_scan_channels); + + if (wl->scanning) { + mutex_unlock(&wl->mutex); + ieee80211_scan_completed(wl->hw, false); + mutex_lock(&wl->mutex); + wl->scanning = false; + } + + return 0; +} + +static void wl12xx_event_mbox_dump(struct event_mailbox *mbox) +{ + wl12xx_debug(DEBUG_EVENT, "MBOX DUMP:"); + wl12xx_debug(DEBUG_EVENT, "\tvector: 0x%x", mbox->events_vector); + wl12xx_debug(DEBUG_EVENT, "\tmask: 0x%x", mbox->events_mask); +} + +static int wl12xx_event_process(struct wl12xx *wl, struct event_mailbox *mbox) +{ + int ret; + u32 vector; + + wl12xx_event_mbox_dump(mbox); + + vector = mbox->events_vector & ~(mbox->events_mask); + wl12xx_debug(DEBUG_EVENT, "vector: 0x%x", vector); + + if (vector & SCAN_COMPLETE_EVENT_ID) { + ret = wl12xx_event_scan_complete(wl, mbox); + if (ret < 0) + return ret; + } + + if (vector & BSS_LOSE_EVENT_ID) { + wl12xx_debug(DEBUG_EVENT, "BSS_LOSE_EVENT"); + + if (wl->psm_requested && wl->psm) { + ret = wl12xx_ps_set_mode(wl, STATION_ACTIVE_MODE); + if (ret < 0) + return ret; + } + } + + return 0; +} + +int wl12xx_event_unmask(struct wl12xx *wl) +{ + int ret; + + ret = wl12xx_acx_event_mbox_mask(wl, ~(wl->event_mask)); + if (ret < 0) + return ret; + + return 0; +} + +void wl12xx_event_mbox_config(struct wl12xx *wl) +{ + wl->mbox_ptr[0] = wl12xx_reg_read32(wl, REG_EVENT_MAILBOX_PTR); + wl->mbox_ptr[1] = wl->mbox_ptr[0] + sizeof(struct event_mailbox); + + wl12xx_debug(DEBUG_EVENT, "MBOX ptrs: 0x%x 0x%x", + wl->mbox_ptr[0], wl->mbox_ptr[1]); +} + +int wl12xx_event_handle(struct wl12xx *wl, u8 mbox_num) +{ + struct event_mailbox mbox; + int ret; + + wl12xx_debug(DEBUG_EVENT, "EVENT on mbox %d", mbox_num); + + if (mbox_num > 1) + return -EINVAL; + + /* first we read the mbox descriptor */ + wl12xx_spi_mem_read(wl, wl->mbox_ptr[mbox_num], &mbox, + sizeof(struct event_mailbox)); + + /* process the descriptor */ + ret = wl12xx_event_process(wl, &mbox); + if (ret < 0) + return ret; + + /* then we let the firmware know it can go on...*/ + wl12xx_reg_write32(wl, ACX_REG_INTERRUPT_TRIG, INTR_TRIG_EVENT_ACK); + + return 0; +} diff --git a/drivers/net/wireless/wl12xx/event.h b/drivers/net/wireless/wl12xx/event.h new file mode 100644 index 00000000000..1f4c2f7438a --- /dev/null +++ b/drivers/net/wireless/wl12xx/event.h @@ -0,0 +1,121 @@ +/* + * This file is part of wl12xx + * + * Copyright (c) 1998-2007 Texas Instruments Incorporated + * Copyright (C) 2008 Nokia Corporation + * + * Contact: Kalle Valo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#ifndef __WL12XX_EVENT_H__ +#define __WL12XX_EVENT_H__ + +/* + * Mbox events + * + * The event mechanism is based on a pair of event buffers (buffers A and + * B) at fixed locations in the target's memory. The host processes one + * buffer while the other buffer continues to collect events. If the host + * is not processing events, an interrupt is issued to signal that a buffer + * is ready. Once the host is done with processing events from one buffer, + * it signals the target (with an ACK interrupt) that the event buffer is + * free. + */ + +enum { + RESERVED1_EVENT_ID = BIT(0), + RESERVED2_EVENT_ID = BIT(1), + MEASUREMENT_START_EVENT_ID = BIT(2), + SCAN_COMPLETE_EVENT_ID = BIT(3), + CALIBRATION_COMPLETE_EVENT_ID = BIT(4), + ROAMING_TRIGGER_LOW_RSSI_EVENT_ID = BIT(5), + PS_REPORT_EVENT_ID = BIT(6), + SYNCHRONIZATION_TIMEOUT_EVENT_ID = BIT(7), + HEALTH_REPORT_EVENT_ID = BIT(8), + ACI_DETECTION_EVENT_ID = BIT(9), + DEBUG_REPORT_EVENT_ID = BIT(10), + MAC_STATUS_EVENT_ID = BIT(11), + DISCONNECT_EVENT_COMPLETE_ID = BIT(12), + JOIN_EVENT_COMPLETE_ID = BIT(13), + CHANNEL_SWITCH_COMPLETE_EVENT_ID = BIT(14), + BSS_LOSE_EVENT_ID = BIT(15), + ROAMING_TRIGGER_MAX_TX_RETRY_EVENT_ID = BIT(16), + MEASUREMENT_COMPLETE_EVENT_ID = BIT(17), + AP_DISCOVERY_COMPLETE_EVENT_ID = BIT(18), + SCHEDULED_SCAN_COMPLETE_EVENT_ID = BIT(19), + PSPOLL_DELIVERY_FAILURE_EVENT_ID = BIT(20), + RESET_BSS_EVENT_ID = BIT(21), + REGAINED_BSS_EVENT_ID = BIT(22), + ROAMING_TRIGGER_REGAINED_RSSI_EVENT_ID = BIT(23), + ROAMING_TRIGGER_LOW_SNR_EVENT_ID = BIT(24), + ROAMING_TRIGGER_REGAINED_SNR_EVENT_ID = BIT(25), + + DBG_EVENT_ID = BIT(26), + BT_PTA_SENSE_EVENT_ID = BIT(27), + BT_PTA_PREDICTION_EVENT_ID = BIT(28), + BT_PTA_AVALANCHE_EVENT_ID = BIT(29), + + PLT_RX_CALIBRATION_COMPLETE_EVENT_ID = BIT(30), + + EVENT_MBOX_ALL_EVENT_ID = 0x7fffffff, +}; + +struct event_debug_report { + u8 debug_event_id; + u8 num_params; + u16 pad; + u32 report_1; + u32 report_2; + u32 report_3; +} __attribute__ ((packed)); + +struct event_mailbox { + u32 events_vector; + u32 events_mask; + u32 reserved_1; + u32 reserved_2; + + char average_rssi_level; + u8 ps_status; + u8 channel_switch_status; + u8 scheduled_scan_status; + + /* Channels scanned by the scheduled scan */ + u16 scheduled_scan_channels; + + /* If bit 0 is set -> target's fatal error */ + u16 health_report; + u16 bad_fft_counter; + u8 bt_pta_sense_info; + u8 bt_pta_protective_info; + u32 reserved; + u32 debug_report[2]; + + /* Number of FCS errors since last event */ + u32 fcs_err_counter; + + struct event_debug_report report; + u8 average_snr_level; + u8 padding[19]; +} __attribute__ ((packed)); + +int wl12xx_event_unmask(struct wl12xx *wl); +void wl12xx_event_mbox_config(struct wl12xx *wl); +int wl12xx_event_handle(struct wl12xx *wl, u8 mbox); + +#endif diff --git a/drivers/net/wireless/wl12xx/init.c b/drivers/net/wireless/wl12xx/init.c new file mode 100644 index 00000000000..2a573a6010b --- /dev/null +++ b/drivers/net/wireless/wl12xx/init.c @@ -0,0 +1,200 @@ +/* + * This file is part of wl12xx + * + * Copyright (C) 2009 Nokia Corporation + * + * Contact: Kalle Valo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#include +#include + +#include "init.h" +#include "wl12xx_80211.h" +#include "acx.h" +#include "cmd.h" + +int wl12xx_hw_init_hwenc_config(struct wl12xx *wl) +{ + int ret; + + ret = wl12xx_acx_feature_cfg(wl); + if (ret < 0) { + wl12xx_warning("couldn't set feature config"); + return ret; + } + + ret = wl12xx_acx_default_key(wl, wl->default_key); + if (ret < 0) { + wl12xx_warning("couldn't set default key"); + return ret; + } + + return 0; +} + +int wl12xx_hw_init_templates_config(struct wl12xx *wl) +{ + int ret; + u8 partial_vbm[PARTIAL_VBM_MAX]; + + /* send empty templates for fw memory reservation */ + ret = wl12xx_cmd_template_set(wl, CMD_PROBE_REQ, NULL, + sizeof(struct wl12xx_probe_req_template)); + if (ret < 0) + return ret; + + ret = wl12xx_cmd_template_set(wl, CMD_NULL_DATA, NULL, + sizeof(struct wl12xx_null_data_template)); + if (ret < 0) + return ret; + + ret = wl12xx_cmd_template_set(wl, CMD_PS_POLL, NULL, + sizeof(struct wl12xx_ps_poll_template)); + if (ret < 0) + return ret; + + ret = wl12xx_cmd_template_set(wl, CMD_QOS_NULL_DATA, NULL, + sizeof + (struct wl12xx_qos_null_data_template)); + if (ret < 0) + return ret; + + ret = wl12xx_cmd_template_set(wl, CMD_PROBE_RESP, NULL, + sizeof + (struct wl12xx_probe_resp_template)); + if (ret < 0) + return ret; + + ret = wl12xx_cmd_template_set(wl, CMD_BEACON, NULL, + sizeof + (struct wl12xx_beacon_template)); + if (ret < 0) + return ret; + + /* tim templates, first reserve space then allocate an empty one */ + memset(partial_vbm, 0, PARTIAL_VBM_MAX); + ret = wl12xx_cmd_vbm(wl, TIM_ELE_ID, partial_vbm, PARTIAL_VBM_MAX, 0); + if (ret < 0) + return ret; + + ret = wl12xx_cmd_vbm(wl, TIM_ELE_ID, partial_vbm, 1, 0); + if (ret < 0) + return ret; + + return 0; +} + +int wl12xx_hw_init_rx_config(struct wl12xx *wl, u32 config, u32 filter) +{ + int ret; + + ret = wl12xx_acx_rx_msdu_life_time(wl, RX_MSDU_LIFETIME_DEF); + if (ret < 0) + return ret; + + ret = wl12xx_acx_rx_config(wl, config, filter); + if (ret < 0) + return ret; + + return 0; +} + +int wl12xx_hw_init_phy_config(struct wl12xx *wl) +{ + int ret; + + ret = wl12xx_acx_pd_threshold(wl); + if (ret < 0) + return ret; + + ret = wl12xx_acx_slot(wl, DEFAULT_SLOT_TIME); + if (ret < 0) + return ret; + + ret = wl12xx_acx_group_address_tbl(wl); + if (ret < 0) + return ret; + + ret = wl12xx_acx_service_period_timeout(wl); + if (ret < 0) + return ret; + + ret = wl12xx_acx_rts_threshold(wl, RTS_THRESHOLD_DEF); + if (ret < 0) + return ret; + + return 0; +} + +int wl12xx_hw_init_beacon_filter(struct wl12xx *wl) +{ + int ret; + + ret = wl12xx_acx_beacon_filter_opt(wl); + if (ret < 0) + return ret; + + ret = wl12xx_acx_beacon_filter_table(wl); + if (ret < 0) + return ret; + + return 0; +} + +int wl12xx_hw_init_pta(struct wl12xx *wl) +{ + int ret; + + ret = wl12xx_acx_sg_enable(wl); + if (ret < 0) + return ret; + + ret = wl12xx_acx_sg_cfg(wl); + if (ret < 0) + return ret; + + return 0; +} + +int wl12xx_hw_init_energy_detection(struct wl12xx *wl) +{ + int ret; + + ret = wl12xx_acx_cca_threshold(wl); + if (ret < 0) + return ret; + + return 0; +} + +int wl12xx_hw_init_beacon_broadcast(struct wl12xx *wl) +{ + int ret; + + ret = wl12xx_acx_bcn_dtim_options(wl); + if (ret < 0) + return ret; + + return 0; +} + +int wl12xx_hw_init_power_auth(struct wl12xx *wl) +{ + return wl12xx_acx_sleep_auth(wl, WL12XX_PSM_CAM); +} diff --git a/drivers/net/wireless/wl12xx/init.h b/drivers/net/wireless/wl12xx/init.h new file mode 100644 index 00000000000..c8b6cd0b7c3 --- /dev/null +++ b/drivers/net/wireless/wl12xx/init.h @@ -0,0 +1,40 @@ +/* + * This file is part of wl12xx + * + * Copyright (C) 2009 Nokia Corporation + * + * Contact: Kalle Valo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#ifndef __WL12XX_INIT_H__ +#define __WL12XX_INIT_H__ + +#include "wl12xx.h" + +int wl12xx_hw_init_hwenc_config(struct wl12xx *wl); +int wl12xx_hw_init_templates_config(struct wl12xx *wl); +int wl12xx_hw_init_mem_config(struct wl12xx *wl); +int wl12xx_hw_init_rx_config(struct wl12xx *wl, u32 config, u32 filter); +int wl12xx_hw_init_phy_config(struct wl12xx *wl); +int wl12xx_hw_init_beacon_filter(struct wl12xx *wl); +int wl12xx_hw_init_pta(struct wl12xx *wl); +int wl12xx_hw_init_energy_detection(struct wl12xx *wl); +int wl12xx_hw_init_beacon_broadcast(struct wl12xx *wl); +int wl12xx_hw_init_power_auth(struct wl12xx *wl); + +#endif diff --git a/drivers/net/wireless/wl12xx/main.c b/drivers/net/wireless/wl12xx/main.c new file mode 100644 index 00000000000..744f4ce5993 --- /dev/null +++ b/drivers/net/wireless/wl12xx/main.c @@ -0,0 +1,1358 @@ +/* + * This file is part of wl12xx + * + * Copyright (C) 2008-2009 Nokia Corporation + * + * Contact: Kalle Valo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "wl12xx.h" +#include "wl12xx_80211.h" +#include "reg.h" +#include "wl1251.h" +#include "spi.h" +#include "event.h" +#include "tx.h" +#include "rx.h" +#include "ps.h" +#include "init.h" +#include "debugfs.h" + +static void wl12xx_disable_interrupts(struct wl12xx *wl) +{ + disable_irq(wl->irq); +} + +static void wl12xx_power_off(struct wl12xx *wl) +{ + wl->set_power(false); +} + +static void wl12xx_power_on(struct wl12xx *wl) +{ + wl->set_power(true); +} + +static irqreturn_t wl12xx_irq(int irq, void *cookie) +{ + struct wl12xx *wl; + + wl12xx_debug(DEBUG_IRQ, "IRQ"); + + wl = cookie; + + schedule_work(&wl->irq_work); + + return IRQ_HANDLED; +} + +static int wl12xx_fetch_firmware(struct wl12xx *wl) +{ + const struct firmware *fw; + int ret; + + ret = request_firmware(&fw, wl->chip.fw_filename, &wl->spi->dev); + + if (ret < 0) { + wl12xx_error("could not get firmware: %d", ret); + return ret; + } + + if (fw->size % 4) { + wl12xx_error("firmware size is not multiple of 32 bits: %d", + fw->size); + ret = -EILSEQ; + goto out; + } + + wl->fw_len = fw->size; + wl->fw = kmalloc(wl->fw_len, GFP_KERNEL); + + if (!wl->fw) { + wl12xx_error("could not allocate memory for the firmware"); + ret = -ENOMEM; + goto out; + } + + memcpy(wl->fw, fw->data, wl->fw_len); + + ret = 0; + +out: + release_firmware(fw); + + return ret; +} + +static int wl12xx_fetch_nvs(struct wl12xx *wl) +{ + const struct firmware *fw; + int ret; + + ret = request_firmware(&fw, wl->chip.nvs_filename, &wl->spi->dev); + + if (ret < 0) { + wl12xx_error("could not get nvs file: %d", ret); + return ret; + } + + if (fw->size % 4) { + wl12xx_error("nvs size is not multiple of 32 bits: %d", + fw->size); + ret = -EILSEQ; + goto out; + } + + wl->nvs_len = fw->size; + wl->nvs = kmalloc(wl->nvs_len, GFP_KERNEL); + + if (!wl->nvs) { + wl12xx_error("could not allocate memory for the nvs file"); + ret = -ENOMEM; + goto out; + } + + memcpy(wl->nvs, fw->data, wl->nvs_len); + + ret = 0; + +out: + release_firmware(fw); + + return ret; +} + +static void wl12xx_fw_wakeup(struct wl12xx *wl) +{ + u32 elp_reg; + + elp_reg = ELPCTRL_WAKE_UP; + wl12xx_write32(wl, HW_ACCESS_ELP_CTRL_REG_ADDR, elp_reg); + elp_reg = wl12xx_read32(wl, HW_ACCESS_ELP_CTRL_REG_ADDR); + + if (!(elp_reg & ELPCTRL_WLAN_READY)) { + wl12xx_warning("WLAN not ready"); + elp_reg = ELPCTRL_WAKE_UP_WLAN_READY; + wl12xx_write32(wl, HW_ACCESS_ELP_CTRL_REG_ADDR, elp_reg); + } +} + +static int wl12xx_chip_wakeup(struct wl12xx *wl) +{ + int ret = 0; + + wl12xx_power_on(wl); + msleep(wl->chip.power_on_sleep); + wl12xx_spi_reset(wl); + wl12xx_spi_init(wl); + + /* We don't need a real memory partition here, because we only want + * to use the registers at this point. */ + wl12xx_set_partition(wl, + 0x00000000, + 0x00000000, + REGISTERS_BASE, + REGISTERS_DOWN_SIZE); + + /* ELP module wake up */ + wl12xx_fw_wakeup(wl); + + /* whal_FwCtrl_BootSm() */ + + /* 0. read chip id from CHIP_ID */ + wl->chip.id = wl12xx_reg_read32(wl, CHIP_ID_B); + + /* 1. check if chip id is valid */ + + switch (wl->chip.id) { + case CHIP_ID_1251_PG12: + wl12xx_debug(DEBUG_BOOT, "chip id 0x%x (1251 PG12)", + wl->chip.id); + + wl1251_setup(wl); + + break; + case CHIP_ID_1271_PG10: + case CHIP_ID_1251_PG10: + case CHIP_ID_1251_PG11: + default: + wl12xx_error("unsupported chip id: 0x%x", wl->chip.id); + ret = -ENODEV; + goto out; + } + + if (wl->fw == NULL) { + ret = wl12xx_fetch_firmware(wl); + if (ret < 0) + goto out; + } + + /* No NVS from netlink, try to get it from the filesystem */ + if (wl->nvs == NULL) { + ret = wl12xx_fetch_nvs(wl); + if (ret < 0) + goto out; + } + +out: + return ret; +} + +static void wl12xx_filter_work(struct work_struct *work) +{ + struct wl12xx *wl = + container_of(work, struct wl12xx, filter_work); + int ret; + + mutex_lock(&wl->mutex); + + if (wl->state == WL12XX_STATE_OFF) + goto out; + + ret = wl12xx_cmd_join(wl, wl->bss_type, 1, 100, 0); + if (ret < 0) + goto out; + +out: + mutex_unlock(&wl->mutex); +} + +int wl12xx_plt_start(struct wl12xx *wl) +{ + int ret; + + wl12xx_notice("power up"); + + if (wl->state != WL12XX_STATE_OFF) { + wl12xx_error("cannot go into PLT state because not " + "in off state: %d", wl->state); + return -EBUSY; + } + + wl->state = WL12XX_STATE_PLT; + + ret = wl12xx_chip_wakeup(wl); + if (ret < 0) + return ret; + + ret = wl->chip.op_boot(wl); + if (ret < 0) + return ret; + + wl12xx_notice("firmware booted in PLT mode (%s)", wl->chip.fw_ver); + + ret = wl->chip.op_plt_init(wl); + if (ret < 0) + return ret; + + return 0; +} + +int wl12xx_plt_stop(struct wl12xx *wl) +{ + wl12xx_notice("power down"); + + if (wl->state != WL12XX_STATE_PLT) { + wl12xx_error("cannot power down because not in PLT " + "state: %d", wl->state); + return -EBUSY; + } + + wl12xx_disable_interrupts(wl); + wl12xx_power_off(wl); + + wl->state = WL12XX_STATE_OFF; + + return 0; +} + + +static int wl12xx_op_tx(struct ieee80211_hw *hw, struct sk_buff *skb) +{ + struct wl12xx *wl = hw->priv; + + skb_queue_tail(&wl->tx_queue, skb); + + schedule_work(&wl->tx_work); + + /* + * The workqueue is slow to process the tx_queue and we need stop + * the queue here, otherwise the queue will get too long. + */ + if (skb_queue_len(&wl->tx_queue) >= WL12XX_TX_QUEUE_MAX_LENGTH) { + ieee80211_stop_queues(wl->hw); + + /* + * FIXME: this is racy, the variable is not properly + * protected. Maybe fix this by removing the stupid + * variable altogether and checking the real queue state? + */ + wl->tx_queue_stopped = true; + } + + return NETDEV_TX_OK; +} + +static int wl12xx_op_start(struct ieee80211_hw *hw) +{ + struct wl12xx *wl = hw->priv; + int ret = 0; + + wl12xx_debug(DEBUG_MAC80211, "mac80211 start"); + + mutex_lock(&wl->mutex); + + if (wl->state != WL12XX_STATE_OFF) { + wl12xx_error("cannot start because not in off state: %d", + wl->state); + ret = -EBUSY; + goto out; + } + + ret = wl12xx_chip_wakeup(wl); + if (ret < 0) + return ret; + + ret = wl->chip.op_boot(wl); + if (ret < 0) + goto out; + + ret = wl->chip.op_hw_init(wl); + if (ret < 0) + goto out; + + ret = wl12xx_acx_station_id(wl); + if (ret < 0) + goto out; + + wl->state = WL12XX_STATE_ON; + + wl12xx_info("firmware booted (%s)", wl->chip.fw_ver); + +out: + if (ret < 0) + wl12xx_power_off(wl); + + mutex_unlock(&wl->mutex); + + return ret; +} + +static void wl12xx_op_stop(struct ieee80211_hw *hw) +{ + struct wl12xx *wl = hw->priv; + + wl12xx_info("down"); + + wl12xx_debug(DEBUG_MAC80211, "mac80211 stop"); + + mutex_lock(&wl->mutex); + + WARN_ON(wl->state != WL12XX_STATE_ON); + + if (wl->scanning) { + mutex_unlock(&wl->mutex); + ieee80211_scan_completed(wl->hw, true); + mutex_lock(&wl->mutex); + wl->scanning = false; + } + + wl->state = WL12XX_STATE_OFF; + + wl12xx_disable_interrupts(wl); + + mutex_unlock(&wl->mutex); + + cancel_work_sync(&wl->irq_work); + cancel_work_sync(&wl->tx_work); + cancel_work_sync(&wl->filter_work); + + mutex_lock(&wl->mutex); + + /* let's notify MAC80211 about the remaining pending TX frames */ + wl12xx_tx_flush(wl); + + wl12xx_power_off(wl); + + memset(wl->bssid, 0, ETH_ALEN); + wl->listen_int = 1; + wl->bss_type = MAX_BSS_TYPE; + + wl->data_in_count = 0; + wl->rx_counter = 0; + wl->rx_handled = 0; + wl->rx_current_buffer = 0; + wl->rx_last_id = 0; + wl->next_tx_complete = 0; + wl->elp = false; + wl->psm = 0; + wl->tx_queue_stopped = false; + wl->power_level = WL12XX_DEFAULT_POWER_LEVEL; + + wl12xx_debugfs_reset(wl); + + mutex_unlock(&wl->mutex); +} + +static int wl12xx_op_add_interface(struct ieee80211_hw *hw, + struct ieee80211_if_init_conf *conf) +{ + struct wl12xx *wl = hw->priv; + DECLARE_MAC_BUF(mac); + int ret = 0; + + wl12xx_debug(DEBUG_MAC80211, "mac80211 add interface type %d mac %s", + conf->type, print_mac(mac, conf->mac_addr)); + + mutex_lock(&wl->mutex); + + switch (conf->type) { + case NL80211_IFTYPE_STATION: + wl->bss_type = BSS_TYPE_STA_BSS; + break; + case NL80211_IFTYPE_ADHOC: + wl->bss_type = BSS_TYPE_IBSS; + break; + default: + ret = -EOPNOTSUPP; + goto out; + } + + if (memcmp(wl->mac_addr, conf->mac_addr, ETH_ALEN)) { + memcpy(wl->mac_addr, conf->mac_addr, ETH_ALEN); + SET_IEEE80211_PERM_ADDR(wl->hw, wl->mac_addr); + ret = wl12xx_acx_station_id(wl); + if (ret < 0) + goto out; + } + +out: + mutex_unlock(&wl->mutex); + return ret; +} + +static void wl12xx_op_remove_interface(struct ieee80211_hw *hw, + struct ieee80211_if_init_conf *conf) +{ + wl12xx_debug(DEBUG_MAC80211, "mac80211 remove interface"); +} + +static int wl12xx_build_null_data(struct wl12xx *wl) +{ + struct wl12xx_null_data_template template; + + if (!is_zero_ether_addr(wl->bssid)) { + memcpy(template.header.da, wl->bssid, ETH_ALEN); + memcpy(template.header.bssid, wl->bssid, ETH_ALEN); + } else { + memset(template.header.da, 0xff, ETH_ALEN); + memset(template.header.bssid, 0xff, ETH_ALEN); + } + + memcpy(template.header.sa, wl->mac_addr, ETH_ALEN); + template.header.frame_ctl = cpu_to_le16(IEEE80211_FTYPE_DATA | + IEEE80211_STYPE_NULLFUNC); + + return wl12xx_cmd_template_set(wl, CMD_NULL_DATA, &template, + sizeof(template)); + +} + +static int wl12xx_build_ps_poll(struct wl12xx *wl, u16 aid) +{ + struct wl12xx_ps_poll_template template; + + memcpy(template.bssid, wl->bssid, ETH_ALEN); + memcpy(template.ta, wl->mac_addr, ETH_ALEN); + template.aid = aid; + template.fc = cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_PSPOLL); + + return wl12xx_cmd_template_set(wl, CMD_PS_POLL, &template, + sizeof(template)); + +} + +static int wl12xx_op_config(struct ieee80211_hw *hw, u32 changed) +{ + struct wl12xx *wl = hw->priv; + struct ieee80211_conf *conf = &hw->conf; + int channel, ret = 0; + + channel = ieee80211_frequency_to_channel(conf->channel->center_freq); + + wl12xx_debug(DEBUG_MAC80211, "mac80211 config ch %d psm %s power %d", + channel, + conf->flags & IEEE80211_CONF_PS ? "on" : "off", + conf->power_level); + + mutex_lock(&wl->mutex); + + if (channel != wl->channel) { + /* FIXME: use beacon interval provided by mac80211 */ + ret = wl12xx_cmd_join(wl, wl->bss_type, 1, 100, 0); + if (ret < 0) + goto out; + + wl->channel = channel; + } + + ret = wl12xx_build_null_data(wl); + if (ret < 0) + goto out; + + if (conf->flags & IEEE80211_CONF_PS && !wl->psm_requested) { + wl12xx_info("psm enabled"); + + wl->psm_requested = true; + + /* + * We enter PSM only if we're already associated. + * If we're not, we'll enter it when joining an SSID, + * through the bss_info_changed() hook. + */ + ret = wl12xx_ps_set_mode(wl, STATION_POWER_SAVE_MODE); + } else if (!(conf->flags & IEEE80211_CONF_PS) && + wl->psm_requested) { + wl12xx_info("psm disabled"); + + wl->psm_requested = false; + + if (wl->psm) + ret = wl12xx_ps_set_mode(wl, STATION_ACTIVE_MODE); + } + + if (conf->power_level != wl->power_level) { + ret = wl12xx_acx_tx_power(wl, conf->power_level); + if (ret < 0) + goto out; + + wl->power_level = conf->power_level; + } + +out: + mutex_unlock(&wl->mutex); + return ret; +} + +#define WL12XX_SUPPORTED_FILTERS (FIF_PROMISC_IN_BSS | \ + FIF_ALLMULTI | \ + FIF_FCSFAIL | \ + FIF_BCN_PRBRESP_PROMISC | \ + FIF_CONTROL | \ + FIF_OTHER_BSS) + +static void wl12xx_op_configure_filter(struct ieee80211_hw *hw, + unsigned int changed, + unsigned int *total, + int mc_count, + struct dev_addr_list *mc_list) +{ + struct wl12xx *wl = hw->priv; + + wl12xx_debug(DEBUG_MAC80211, "mac80211 configure filter"); + + *total &= WL12XX_SUPPORTED_FILTERS; + changed &= WL12XX_SUPPORTED_FILTERS; + + if (changed == 0) + /* no filters which we support changed */ + return; + + /* FIXME: wl->rx_config and wl->rx_filter are not protected */ + + wl->rx_config = WL12XX_DEFAULT_RX_CONFIG; + wl->rx_filter = WL12XX_DEFAULT_RX_FILTER; + + if (*total & FIF_PROMISC_IN_BSS) { + wl->rx_config |= CFG_BSSID_FILTER_EN; + wl->rx_config |= CFG_RX_ALL_GOOD; + } + if (*total & FIF_ALLMULTI) + /* + * CFG_MC_FILTER_EN in rx_config needs to be 0 to receive + * all multicast frames + */ + wl->rx_config &= ~CFG_MC_FILTER_EN; + if (*total & FIF_FCSFAIL) + wl->rx_filter |= CFG_RX_FCS_ERROR; + if (*total & FIF_BCN_PRBRESP_PROMISC) { + wl->rx_config &= ~CFG_BSSID_FILTER_EN; + wl->rx_config &= ~CFG_SSID_FILTER_EN; + } + if (*total & FIF_CONTROL) + wl->rx_filter |= CFG_RX_CTL_EN; + if (*total & FIF_OTHER_BSS) + wl->rx_filter &= ~CFG_BSSID_FILTER_EN; + + /* + * FIXME: workqueues need to be properly cancelled on stop(), for + * now let's just disable changing the filter settings. They will + * be updated any on config(). + */ + /* schedule_work(&wl->filter_work); */ +} + +/* HW encryption */ +static int wl12xx_set_key_type(struct wl12xx *wl, struct acx_set_key *key, + enum set_key_cmd cmd, + struct ieee80211_key_conf *mac80211_key, + const u8 *addr) +{ + switch (mac80211_key->alg) { + case ALG_WEP: + if (is_broadcast_ether_addr(addr)) + key->key_type = KEY_WEP_DEFAULT; + else + key->key_type = KEY_WEP_ADDR; + + mac80211_key->hw_key_idx = mac80211_key->keyidx; + break; + case ALG_TKIP: + if (is_broadcast_ether_addr(addr)) + key->key_type = KEY_TKIP_MIC_GROUP; + else + key->key_type = KEY_TKIP_MIC_PAIRWISE; + + mac80211_key->hw_key_idx = mac80211_key->keyidx; + break; + case ALG_CCMP: + if (is_broadcast_ether_addr(addr)) + key->key_type = KEY_AES_GROUP; + else + key->key_type = KEY_AES_PAIRWISE; + mac80211_key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV; + break; + default: + wl12xx_error("Unknown key algo 0x%x", mac80211_key->alg); + return -EOPNOTSUPP; + } + + return 0; +} + +static int wl12xx_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta, + struct ieee80211_key_conf *key) +{ + struct wl12xx *wl = hw->priv; + struct acx_set_key wl_key; + const u8 *addr; + int ret; + + static const u8 bcast_addr[ETH_ALEN] = + { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + + wl12xx_debug(DEBUG_MAC80211, "mac80211 set key"); + + memset(&wl_key, 0, sizeof(wl_key)); + + addr = sta ? sta->addr : bcast_addr; + + wl12xx_debug(DEBUG_CRYPT, "CMD: 0x%x", cmd); + wl12xx_dump(DEBUG_CRYPT, "ADDR: ", addr, ETH_ALEN); + wl12xx_debug(DEBUG_CRYPT, "Key: algo:0x%x, id:%d, len:%d flags 0x%x", + key->alg, key->keyidx, key->keylen, key->flags); + wl12xx_dump(DEBUG_CRYPT, "KEY: ", key->key, key->keylen); + + mutex_lock(&wl->mutex); + + switch (cmd) { + case SET_KEY: + wl_key.key_action = KEY_ADD_OR_REPLACE; + break; + case DISABLE_KEY: + wl_key.key_action = KEY_REMOVE; + break; + default: + wl12xx_error("Unsupported key cmd 0x%x", cmd); + break; + } + + ret = wl12xx_set_key_type(wl, &wl_key, cmd, key, addr); + if (ret < 0) { + wl12xx_error("Set KEY type failed"); + goto out; + } + + if (wl_key.key_type != KEY_WEP_DEFAULT) + memcpy(wl_key.addr, addr, ETH_ALEN); + + if ((wl_key.key_type == KEY_TKIP_MIC_GROUP) || + (wl_key.key_type == KEY_TKIP_MIC_PAIRWISE)) { + /* + * We get the key in the following form: + * TKIP (16 bytes) - TX MIC (8 bytes) - RX MIC (8 bytes) + * but the target is expecting: + * TKIP - RX MIC - TX MIC + */ + memcpy(wl_key.key, key->key, 16); + memcpy(wl_key.key + 16, key->key + 24, 8); + memcpy(wl_key.key + 24, key->key + 16, 8); + + } else { + memcpy(wl_key.key, key->key, key->keylen); + } + wl_key.key_size = key->keylen; + + wl_key.id = key->keyidx; + wl_key.ssid_profile = 0; + + wl12xx_dump(DEBUG_CRYPT, "TARGET KEY: ", &wl_key, sizeof(wl_key)); + + if (wl12xx_cmd_send(wl, CMD_SET_KEYS, &wl_key, sizeof(wl_key)) < 0) { + wl12xx_error("Set KEY failed"); + ret = -EOPNOTSUPP; + goto out; + } + +out: + mutex_unlock(&wl->mutex); + return ret; +} + +static int wl12xx_build_basic_rates(char *rates) +{ + u8 index = 0; + + rates[index++] = IEEE80211_BASIC_RATE_MASK | IEEE80211_CCK_RATE_1MB; + rates[index++] = IEEE80211_BASIC_RATE_MASK | IEEE80211_CCK_RATE_2MB; + rates[index++] = IEEE80211_BASIC_RATE_MASK | IEEE80211_CCK_RATE_5MB; + rates[index++] = IEEE80211_BASIC_RATE_MASK | IEEE80211_CCK_RATE_11MB; + + return index; +} + +static int wl12xx_build_extended_rates(char *rates) +{ + u8 index = 0; + + rates[index++] = IEEE80211_OFDM_RATE_6MB; + rates[index++] = IEEE80211_OFDM_RATE_9MB; + rates[index++] = IEEE80211_OFDM_RATE_12MB; + rates[index++] = IEEE80211_OFDM_RATE_18MB; + rates[index++] = IEEE80211_OFDM_RATE_24MB; + rates[index++] = IEEE80211_OFDM_RATE_36MB; + rates[index++] = IEEE80211_OFDM_RATE_48MB; + rates[index++] = IEEE80211_OFDM_RATE_54MB; + + return index; +} + + +static int wl12xx_build_probe_req(struct wl12xx *wl, u8 *ssid, size_t ssid_len) +{ + struct wl12xx_probe_req_template template; + struct wl12xx_ie_rates *rates; + char *ptr; + u16 size; + + ptr = (char *)&template; + size = sizeof(struct ieee80211_header); + + memset(template.header.da, 0xff, ETH_ALEN); + memset(template.header.bssid, 0xff, ETH_ALEN); + memcpy(template.header.sa, wl->mac_addr, ETH_ALEN); + template.header.frame_ctl = cpu_to_le16(IEEE80211_STYPE_PROBE_REQ); + + /* IEs */ + /* SSID */ + template.ssid.header.id = WLAN_EID_SSID; + template.ssid.header.len = ssid_len; + if (ssid_len && ssid) + memcpy(template.ssid.ssid, ssid, ssid_len); + size += sizeof(struct wl12xx_ie_header) + ssid_len; + ptr += size; + + /* Basic Rates */ + rates = (struct wl12xx_ie_rates *)ptr; + rates->header.id = WLAN_EID_SUPP_RATES; + rates->header.len = wl12xx_build_basic_rates(rates->rates); + size += sizeof(struct wl12xx_ie_header) + rates->header.len; + ptr += sizeof(struct wl12xx_ie_header) + rates->header.len; + + /* Extended rates */ + rates = (struct wl12xx_ie_rates *)ptr; + rates->header.id = WLAN_EID_EXT_SUPP_RATES; + rates->header.len = wl12xx_build_extended_rates(rates->rates); + size += sizeof(struct wl12xx_ie_header) + rates->header.len; + + wl12xx_dump(DEBUG_SCAN, "PROBE REQ: ", &template, size); + + return wl12xx_cmd_template_set(wl, CMD_PROBE_REQ, &template, + size); +} + +static int wl12xx_hw_scan(struct wl12xx *wl, u8 *ssid, size_t len, + u8 active_scan, u8 high_prio, u8 num_channels, + u8 probe_requests) +{ + int i, ret; + u32 split_scan = 0; + u16 scan_options = 0; + struct cmd_scan *params; + struct wl12xx_command *cmd_answer; + + if (wl->scanning) + return -EINVAL; + + params = kzalloc(sizeof(*params), GFP_KERNEL); + if (!params) + return -ENOMEM; + + params->params.rx_config_options = cpu_to_le32(CFG_RX_ALL_GOOD); + params->params.rx_filter_options = + cpu_to_le32(CFG_RX_PRSP_EN | CFG_RX_MGMT_EN | CFG_RX_BCN_EN); + + /* High priority scan */ + if (!active_scan) + scan_options |= SCAN_PASSIVE; + if (high_prio) + scan_options |= SCAN_PRIORITY_HIGH; + params->params.scan_options = scan_options; + + params->params.num_channels = num_channels; + params->params.num_probe_requests = probe_requests; + params->params.tx_rate = cpu_to_le16(1 << 1); /* 2 Mbps */ + params->params.tid_trigger = 0; + + for (i = 0; i < num_channels; i++) { + params->channels[i].min_duration = cpu_to_le32(30000); + params->channels[i].max_duration = cpu_to_le32(60000); + memset(¶ms->channels[i].bssid_lsb, 0xff, 4); + memset(¶ms->channels[i].bssid_msb, 0xff, 2); + params->channels[i].early_termination = 0; + params->channels[i].tx_power_att = 0; + params->channels[i].channel = i + 1; + memset(params->channels[i].pad, 0, 3); + } + + for (i = num_channels; i < SCAN_MAX_NUM_OF_CHANNELS; i++) + memset(¶ms->channels[i], 0, + sizeof(struct basic_scan_channel_parameters)); + + if (len && ssid) { + params->params.ssid_len = len; + memcpy(params->params.ssid, ssid, len); + } else { + params->params.ssid_len = 0; + memset(params->params.ssid, 0, 32); + } + + ret = wl12xx_build_probe_req(wl, ssid, len); + if (ret < 0) { + wl12xx_error("PROBE request template failed"); + goto out; + } + + ret = wl12xx_cmd_send(wl, CMD_TRIGGER_SCAN_TO, &split_scan, + sizeof(u32)); + if (ret < 0) { + wl12xx_error("Split SCAN failed"); + goto out; + } + + wl12xx_dump(DEBUG_SCAN, "SCAN: ", params, sizeof(*params)); + + wl->scanning = true; + + ret = wl12xx_cmd_send(wl, CMD_SCAN, params, sizeof(*params)); + if (ret < 0) + wl12xx_error("SCAN failed"); + + wl12xx_spi_mem_read(wl, wl->cmd_box_addr, params, sizeof(*params)); + + cmd_answer = (struct wl12xx_command *) params; + if (cmd_answer->status != CMD_STATUS_SUCCESS) { + wl12xx_error("TEST command answer error: %d", + cmd_answer->status); + wl->scanning = false; + ret = -EIO; + goto out; + } + +out: + kfree(params); + return ret; + +} + +static int wl12xx_op_hw_scan(struct ieee80211_hw *hw, + struct cfg80211_scan_request *req) +{ + struct wl12xx *wl = hw->priv; + int ret; + u8 *ssid = NULL; + size_t ssid_len = 0; + + wl12xx_debug(DEBUG_MAC80211, "mac80211 hw scan"); + + if (req->n_ssids) { + ssid = req->ssids[0].ssid; + ssid_len = req->ssids[0].ssid_len; + } + + mutex_lock(&wl->mutex); + ret = wl12xx_hw_scan(hw->priv, ssid, ssid_len, 1, 0, 13, 3); + mutex_unlock(&wl->mutex); + + return ret; +} + +static int wl12xx_op_set_rts_threshold(struct ieee80211_hw *hw, u32 value) +{ + struct wl12xx *wl = hw->priv; + int ret; + + ret = wl12xx_acx_rts_threshold(wl, (u16) value); + + if (ret < 0) + wl12xx_warning("wl12xx_op_set_rts_threshold failed: %d", ret); + + return ret; +} + +static void wl12xx_op_bss_info_changed(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_bss_conf *bss_conf, + u32 changed) +{ + enum acx_ps_mode mode; + struct wl12xx *wl = hw->priv; + struct sk_buff *beacon; + int ret; + + wl12xx_debug(DEBUG_MAC80211, "mac80211 bss info changed"); + + mutex_lock(&wl->mutex); + + if (changed & BSS_CHANGED_ASSOC) { + if (bss_conf->assoc) { + wl->aid = bss_conf->aid; + + ret = wl12xx_build_ps_poll(wl, wl->aid); + if (ret < 0) + goto out; + + ret = wl12xx_acx_aid(wl, wl->aid); + if (ret < 0) + goto out; + + /* If we want to go in PSM but we're not there yet */ + if (wl->psm_requested && !wl->psm) { + mode = STATION_POWER_SAVE_MODE; + ret = wl12xx_ps_set_mode(wl, mode); + if (ret < 0) + goto out; + } + } + } + if (changed & BSS_CHANGED_ERP_SLOT) { + if (bss_conf->use_short_slot) + ret = wl12xx_acx_slot(wl, SLOT_TIME_SHORT); + else + ret = wl12xx_acx_slot(wl, SLOT_TIME_LONG); + if (ret < 0) { + wl12xx_warning("Set slot time failed %d", ret); + goto out; + } + } + + if (changed & BSS_CHANGED_ERP_PREAMBLE) { + if (bss_conf->use_short_preamble) + wl12xx_acx_set_preamble(wl, ACX_PREAMBLE_SHORT); + else + wl12xx_acx_set_preamble(wl, ACX_PREAMBLE_LONG); + } + + if (changed & BSS_CHANGED_ERP_CTS_PROT) { + if (bss_conf->use_cts_prot) + ret = wl12xx_acx_cts_protect(wl, CTSPROTECT_ENABLE); + else + ret = wl12xx_acx_cts_protect(wl, CTSPROTECT_DISABLE); + if (ret < 0) { + wl12xx_warning("Set ctsprotect failed %d", ret); + goto out; + } + } + + if (changed & BSS_CHANGED_BSSID) { + memcpy(wl->bssid, bss_conf->bssid, ETH_ALEN); + + ret = wl12xx_build_null_data(wl); + if (ret < 0) + goto out; + + if (wl->bss_type != BSS_TYPE_IBSS) { + ret = wl12xx_cmd_join(wl, wl->bss_type, 5, 100, 1); + if (ret < 0) + goto out; + } + } + + if (changed & BSS_CHANGED_BEACON) { + beacon = ieee80211_beacon_get(hw, vif); + ret = wl12xx_cmd_template_set(wl, CMD_BEACON, beacon->data, + beacon->len); + + if (ret < 0) { + dev_kfree_skb(beacon); + goto out; + } + + ret = wl12xx_cmd_template_set(wl, CMD_PROBE_RESP, beacon->data, + beacon->len); + + dev_kfree_skb(beacon); + + if (ret < 0) + goto out; + + ret = wl12xx_cmd_join(wl, wl->bss_type, 1, 100, 0); + + if (ret < 0) + goto out; + } + +out: + mutex_unlock(&wl->mutex); +} + + +/* can't be const, mac80211 writes to this */ +static struct ieee80211_rate wl12xx_rates[] = { + { .bitrate = 10, + .hw_value = 0x1, + .hw_value_short = 0x1, }, + { .bitrate = 20, + .hw_value = 0x2, + .hw_value_short = 0x2, + .flags = IEEE80211_RATE_SHORT_PREAMBLE }, + { .bitrate = 55, + .hw_value = 0x4, + .hw_value_short = 0x4, + .flags = IEEE80211_RATE_SHORT_PREAMBLE }, + { .bitrate = 110, + .hw_value = 0x20, + .hw_value_short = 0x20, + .flags = IEEE80211_RATE_SHORT_PREAMBLE }, + { .bitrate = 60, + .hw_value = 0x8, + .hw_value_short = 0x8, }, + { .bitrate = 90, + .hw_value = 0x10, + .hw_value_short = 0x10, }, + { .bitrate = 120, + .hw_value = 0x40, + .hw_value_short = 0x40, }, + { .bitrate = 180, + .hw_value = 0x80, + .hw_value_short = 0x80, }, + { .bitrate = 240, + .hw_value = 0x200, + .hw_value_short = 0x200, }, + { .bitrate = 360, + .hw_value = 0x400, + .hw_value_short = 0x400, }, + { .bitrate = 480, + .hw_value = 0x800, + .hw_value_short = 0x800, }, + { .bitrate = 540, + .hw_value = 0x1000, + .hw_value_short = 0x1000, }, +}; + +/* can't be const, mac80211 writes to this */ +static struct ieee80211_channel wl12xx_channels[] = { + { .hw_value = 1, .center_freq = 2412}, + { .hw_value = 2, .center_freq = 2417}, + { .hw_value = 3, .center_freq = 2422}, + { .hw_value = 4, .center_freq = 2427}, + { .hw_value = 5, .center_freq = 2432}, + { .hw_value = 6, .center_freq = 2437}, + { .hw_value = 7, .center_freq = 2442}, + { .hw_value = 8, .center_freq = 2447}, + { .hw_value = 9, .center_freq = 2452}, + { .hw_value = 10, .center_freq = 2457}, + { .hw_value = 11, .center_freq = 2462}, + { .hw_value = 12, .center_freq = 2467}, + { .hw_value = 13, .center_freq = 2472}, +}; + +/* can't be const, mac80211 writes to this */ +static struct ieee80211_supported_band wl12xx_band_2ghz = { + .channels = wl12xx_channels, + .n_channels = ARRAY_SIZE(wl12xx_channels), + .bitrates = wl12xx_rates, + .n_bitrates = ARRAY_SIZE(wl12xx_rates), +}; + +static const struct ieee80211_ops wl12xx_ops = { + .start = wl12xx_op_start, + .stop = wl12xx_op_stop, + .add_interface = wl12xx_op_add_interface, + .remove_interface = wl12xx_op_remove_interface, + .config = wl12xx_op_config, + .configure_filter = wl12xx_op_configure_filter, + .tx = wl12xx_op_tx, + .set_key = wl12xx_op_set_key, + .hw_scan = wl12xx_op_hw_scan, + .bss_info_changed = wl12xx_op_bss_info_changed, + .set_rts_threshold = wl12xx_op_set_rts_threshold, +}; + +static int wl12xx_register_hw(struct wl12xx *wl) +{ + int ret; + + if (wl->mac80211_registered) + return 0; + + SET_IEEE80211_PERM_ADDR(wl->hw, wl->mac_addr); + + ret = ieee80211_register_hw(wl->hw); + if (ret < 0) { + wl12xx_error("unable to register mac80211 hw: %d", ret); + return ret; + } + + wl->mac80211_registered = true; + + wl12xx_notice("loaded"); + + return 0; +} + +static int wl12xx_init_ieee80211(struct wl12xx *wl) +{ + /* The tx descriptor buffer and the TKIP space */ + wl->hw->extra_tx_headroom = sizeof(struct tx_double_buffer_desc) + + WL12XX_TKIP_IV_SPACE; + + /* unit us */ + /* FIXME: find a proper value */ + wl->hw->channel_change_time = 10000; + + wl->hw->flags = IEEE80211_HW_SIGNAL_DBM | + IEEE80211_HW_NOISE_DBM; + + wl->hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION); + wl->hw->wiphy->max_scan_ssids = 1; + wl->hw->wiphy->bands[IEEE80211_BAND_2GHZ] = &wl12xx_band_2ghz; + + SET_IEEE80211_DEV(wl->hw, &wl->spi->dev); + + return 0; +} + +#define WL12XX_DEFAULT_CHANNEL 1 +static int __devinit wl12xx_probe(struct spi_device *spi) +{ + struct wl12xx_platform_data *pdata; + struct ieee80211_hw *hw; + struct wl12xx *wl; + int ret, i; + static const u8 nokia_oui[3] = {0x00, 0x1f, 0xdf}; + + pdata = spi->dev.platform_data; + if (!pdata) { + wl12xx_error("no platform data"); + return -ENODEV; + } + + hw = ieee80211_alloc_hw(sizeof(*wl), &wl12xx_ops); + if (!hw) { + wl12xx_error("could not alloc ieee80211_hw"); + return -ENOMEM; + } + + wl = hw->priv; + memset(wl, 0, sizeof(*wl)); + + wl->hw = hw; + dev_set_drvdata(&spi->dev, wl); + wl->spi = spi; + + wl->data_in_count = 0; + + skb_queue_head_init(&wl->tx_queue); + + INIT_WORK(&wl->tx_work, wl12xx_tx_work); + INIT_WORK(&wl->filter_work, wl12xx_filter_work); + wl->channel = WL12XX_DEFAULT_CHANNEL; + wl->scanning = false; + wl->default_key = 0; + wl->listen_int = 1; + wl->rx_counter = 0; + wl->rx_handled = 0; + wl->rx_current_buffer = 0; + wl->rx_last_id = 0; + wl->rx_config = WL12XX_DEFAULT_RX_CONFIG; + wl->rx_filter = WL12XX_DEFAULT_RX_FILTER; + wl->elp = false; + wl->psm = 0; + wl->psm_requested = false; + wl->tx_queue_stopped = false; + wl->power_level = WL12XX_DEFAULT_POWER_LEVEL; + + /* We use the default power on sleep time until we know which chip + * we're using */ + wl->chip.power_on_sleep = WL12XX_DEFAULT_POWER_ON_SLEEP; + + for (i = 0; i < FW_TX_CMPLT_BLOCK_SIZE; i++) + wl->tx_frames[i] = NULL; + + wl->next_tx_complete = 0; + + /* + * In case our MAC address is not correctly set, + * we use a random but Nokia MAC. + */ + memcpy(wl->mac_addr, nokia_oui, 3); + get_random_bytes(wl->mac_addr + 3, 3); + + wl->state = WL12XX_STATE_OFF; + mutex_init(&wl->mutex); + + wl->tx_mgmt_frm_rate = DEFAULT_HW_GEN_TX_RATE; + wl->tx_mgmt_frm_mod = DEFAULT_HW_GEN_MODULATION_TYPE; + + /* This is the only SPI value that we need to set here, the rest + * comes from the board-peripherals file */ + spi->bits_per_word = 32; + + ret = spi_setup(spi); + if (ret < 0) { + wl12xx_error("spi_setup failed"); + goto out_free; + } + + wl->set_power = pdata->set_power; + if (!wl->set_power) { + wl12xx_error("set power function missing in platform data"); + return -ENODEV; + } + + wl->irq = spi->irq; + if (wl->irq < 0) { + wl12xx_error("irq missing in platform data"); + return -ENODEV; + } + + ret = request_irq(wl->irq, wl12xx_irq, 0, DRIVER_NAME, wl); + if (ret < 0) { + wl12xx_error("request_irq() failed: %d", ret); + goto out_free; + } + + set_irq_type(wl->irq, IRQ_TYPE_EDGE_RISING); + + disable_irq(wl->irq); + + ret = wl12xx_init_ieee80211(wl); + if (ret) + goto out_irq; + + ret = wl12xx_register_hw(wl); + if (ret) + goto out_irq; + + wl12xx_debugfs_init(wl); + + wl12xx_notice("initialized"); + + return 0; + + out_irq: + free_irq(wl->irq, wl); + + out_free: + ieee80211_free_hw(hw); + + return ret; +} + +static int __devexit wl12xx_remove(struct spi_device *spi) +{ + struct wl12xx *wl = dev_get_drvdata(&spi->dev); + + ieee80211_unregister_hw(wl->hw); + + wl12xx_debugfs_exit(wl); + + free_irq(wl->irq, wl); + kfree(wl->target_mem_map); + kfree(wl->data_path); + kfree(wl->fw); + wl->fw = NULL; + kfree(wl->nvs); + wl->nvs = NULL; + ieee80211_free_hw(wl->hw); + + return 0; +} + + +static struct spi_driver wl12xx_spi_driver = { + .driver = { + .name = "wl12xx", + .bus = &spi_bus_type, + .owner = THIS_MODULE, + }, + + .probe = wl12xx_probe, + .remove = __devexit_p(wl12xx_remove), +}; + +static int __init wl12xx_init(void) +{ + int ret; + + ret = spi_register_driver(&wl12xx_spi_driver); + if (ret < 0) { + wl12xx_error("failed to register spi driver: %d", ret); + goto out; + } + +out: + return ret; +} + +static void __exit wl12xx_exit(void) +{ + spi_unregister_driver(&wl12xx_spi_driver); + + wl12xx_notice("unloaded"); +} + +module_init(wl12xx_init); +module_exit(wl12xx_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Kalle Valo , " + "Luciano Coelho "); diff --git a/drivers/net/wireless/wl12xx/ps.c b/drivers/net/wireless/wl12xx/ps.c new file mode 100644 index 00000000000..83a10117330 --- /dev/null +++ b/drivers/net/wireless/wl12xx/ps.c @@ -0,0 +1,151 @@ +/* + * This file is part of wl12xx + * + * Copyright (C) 2008 Nokia Corporation + * + * Contact: Kalle Valo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#include "reg.h" +#include "ps.h" +#include "spi.h" + +#define WL12XX_WAKEUP_TIMEOUT 2000 + +/* Routines to toggle sleep mode while in ELP */ +void wl12xx_ps_elp_sleep(struct wl12xx *wl) +{ + if (wl->elp || !wl->psm) + return; + + wl12xx_debug(DEBUG_PSM, "chip to elp"); + + wl12xx_write32(wl, HW_ACCESS_ELP_CTRL_REG_ADDR, ELPCTRL_SLEEP); + + wl->elp = true; +} + +int wl12xx_ps_elp_wakeup(struct wl12xx *wl) +{ + unsigned long timeout; + u32 elp_reg; + + if (!wl->elp) + return 0; + + wl12xx_debug(DEBUG_PSM, "waking up chip from elp"); + + timeout = jiffies + msecs_to_jiffies(WL12XX_WAKEUP_TIMEOUT); + + wl12xx_write32(wl, HW_ACCESS_ELP_CTRL_REG_ADDR, ELPCTRL_WAKE_UP); + + elp_reg = wl12xx_read32(wl, HW_ACCESS_ELP_CTRL_REG_ADDR); + + /* + * FIXME: we should wait for irq from chip but, as a temporary + * solution to simplify locking, let's poll instead + */ + while (!(elp_reg & ELPCTRL_WLAN_READY)) { + if (time_after(jiffies, timeout)) { + wl12xx_error("elp wakeup timeout"); + return -ETIMEDOUT; + } + msleep(1); + elp_reg = wl12xx_read32(wl, HW_ACCESS_ELP_CTRL_REG_ADDR); + } + + wl12xx_debug(DEBUG_PSM, "wakeup time: %u ms", + jiffies_to_msecs(jiffies) - + (jiffies_to_msecs(timeout) - WL12XX_WAKEUP_TIMEOUT)); + + wl->elp = false; + + return 0; +} + +static int wl12xx_ps_set_elp(struct wl12xx *wl, bool enable) +{ + int ret; + + if (enable) { + wl12xx_debug(DEBUG_PSM, "sleep auth psm/elp"); + + /* + * FIXME: we should PSM_ELP, but because of firmware wakeup + * problems let's use only PSM_PS + */ + ret = wl12xx_acx_sleep_auth(wl, WL12XX_PSM_PS); + if (ret < 0) + return ret; + + wl12xx_ps_elp_sleep(wl); + } else { + wl12xx_debug(DEBUG_PSM, "sleep auth cam"); + + /* + * When the target is in ELP, we can only + * access the ELP control register. Thus, + * we have to wake the target up before + * changing the power authorization. + */ + + wl12xx_ps_elp_wakeup(wl); + + ret = wl12xx_acx_sleep_auth(wl, WL12XX_PSM_CAM); + if (ret < 0) + return ret; + } + + return 0; +} + +int wl12xx_ps_set_mode(struct wl12xx *wl, enum acx_ps_mode mode) +{ + int ret; + + switch (mode) { + case STATION_POWER_SAVE_MODE: + wl12xx_debug(DEBUG_PSM, "entering psm"); + ret = wl12xx_cmd_ps_mode(wl, STATION_POWER_SAVE_MODE); + if (ret < 0) + return ret; + + ret = wl12xx_ps_set_elp(wl, true); + if (ret < 0) + return ret; + + wl->psm = 1; + break; + case STATION_ACTIVE_MODE: + default: + wl12xx_debug(DEBUG_PSM, "leaving psm"); + ret = wl12xx_ps_set_elp(wl, false); + if (ret < 0) + return ret; + + ret = wl12xx_cmd_ps_mode(wl, STATION_ACTIVE_MODE); + if (ret < 0) + return ret; + + wl->psm = 0; + break; + } + + return ret; +} + diff --git a/drivers/net/wireless/wl12xx/ps.h b/drivers/net/wireless/wl12xx/ps.h new file mode 100644 index 00000000000..5d7c5255383 --- /dev/null +++ b/drivers/net/wireless/wl12xx/ps.h @@ -0,0 +1,36 @@ +#ifndef __WL12XX_PS_H__ +#define __WL12XX_PS_H__ + +/* + * This file is part of wl12xx + * + * Copyright (c) 1998-2007 Texas Instruments Incorporated + * Copyright (C) 2008 Nokia Corporation + * + * Contact: Kalle Valo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#include "wl12xx.h" +#include "acx.h" + +int wl12xx_ps_set_mode(struct wl12xx *wl, enum acx_ps_mode mode); +void wl12xx_ps_elp_sleep(struct wl12xx *wl); +int wl12xx_ps_elp_wakeup(struct wl12xx *wl); + + +#endif /* __WL12XX_PS_H__ */ diff --git a/drivers/net/wireless/wl12xx/reg.h b/drivers/net/wireless/wl12xx/reg.h new file mode 100644 index 00000000000..e421643215c --- /dev/null +++ b/drivers/net/wireless/wl12xx/reg.h @@ -0,0 +1,745 @@ +/* + * This file is part of wl12xx + * + * Copyright (c) 1998-2007 Texas Instruments Incorporated + * Copyright (C) 2008 Nokia Corporation + * + * Contact: Kalle Valo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#ifndef __REG_H__ +#define __REG_H__ + +#include +#include "wl12xx.h" + +#define REGISTERS_BASE 0x00300000 +#define DRPW_BASE 0x00310000 + +#define REGISTERS_DOWN_SIZE 0x00008800 +#define REGISTERS_WORK_SIZE 0x0000b000 + +#define HW_ACCESS_ELP_CTRL_REG_ADDR 0x1FFFC + +/* ELP register commands */ +#define ELPCTRL_WAKE_UP 0x1 +#define ELPCTRL_WAKE_UP_WLAN_READY 0x5 +#define ELPCTRL_SLEEP 0x0 +/* ELP WLAN_READY bit */ +#define ELPCTRL_WLAN_READY 0x2 + +/* + * Interrupt registers. + * 64 bit interrupt sources registers ws ced. + * sme interupts were removed and new ones were added. + * Order was changed. + */ +#define FIQ_MASK (REGISTERS_BASE + 0x0400) +#define FIQ_MASK_L (REGISTERS_BASE + 0x0400) +#define FIQ_MASK_H (REGISTERS_BASE + 0x0404) +#define FIQ_MASK_SET (REGISTERS_BASE + 0x0408) +#define FIQ_MASK_SET_L (REGISTERS_BASE + 0x0408) +#define FIQ_MASK_SET_H (REGISTERS_BASE + 0x040C) +#define FIQ_MASK_CLR (REGISTERS_BASE + 0x0410) +#define FIQ_MASK_CLR_L (REGISTERS_BASE + 0x0410) +#define FIQ_MASK_CLR_H (REGISTERS_BASE + 0x0414) +#define IRQ_MASK (REGISTERS_BASE + 0x0418) +#define IRQ_MASK_L (REGISTERS_BASE + 0x0418) +#define IRQ_MASK_H (REGISTERS_BASE + 0x041C) +#define IRQ_MASK_SET (REGISTERS_BASE + 0x0420) +#define IRQ_MASK_SET_L (REGISTERS_BASE + 0x0420) +#define IRQ_MASK_SET_H (REGISTERS_BASE + 0x0424) +#define IRQ_MASK_CLR (REGISTERS_BASE + 0x0428) +#define IRQ_MASK_CLR_L (REGISTERS_BASE + 0x0428) +#define IRQ_MASK_CLR_H (REGISTERS_BASE + 0x042C) +#define ECPU_MASK (REGISTERS_BASE + 0x0448) +#define FIQ_STS_L (REGISTERS_BASE + 0x044C) +#define FIQ_STS_H (REGISTERS_BASE + 0x0450) +#define IRQ_STS_L (REGISTERS_BASE + 0x0454) +#define IRQ_STS_H (REGISTERS_BASE + 0x0458) +#define INT_STS_ND (REGISTERS_BASE + 0x0464) +#define INT_STS_RAW_L (REGISTERS_BASE + 0x0464) +#define INT_STS_RAW_H (REGISTERS_BASE + 0x0468) +#define INT_STS_CLR (REGISTERS_BASE + 0x04B4) +#define INT_STS_CLR_L (REGISTERS_BASE + 0x04B4) +#define INT_STS_CLR_H (REGISTERS_BASE + 0x04B8) +#define INT_ACK (REGISTERS_BASE + 0x046C) +#define INT_ACK_L (REGISTERS_BASE + 0x046C) +#define INT_ACK_H (REGISTERS_BASE + 0x0470) +#define INT_TRIG (REGISTERS_BASE + 0x0474) +#define INT_TRIG_L (REGISTERS_BASE + 0x0474) +#define INT_TRIG_H (REGISTERS_BASE + 0x0478) +#define HOST_STS_L (REGISTERS_BASE + 0x045C) +#define HOST_STS_H (REGISTERS_BASE + 0x0460) +#define HOST_MASK (REGISTERS_BASE + 0x0430) +#define HOST_MASK_L (REGISTERS_BASE + 0x0430) +#define HOST_MASK_H (REGISTERS_BASE + 0x0434) +#define HOST_MASK_SET (REGISTERS_BASE + 0x0438) +#define HOST_MASK_SET_L (REGISTERS_BASE + 0x0438) +#define HOST_MASK_SET_H (REGISTERS_BASE + 0x043C) +#define HOST_MASK_CLR (REGISTERS_BASE + 0x0440) +#define HOST_MASK_CLR_L (REGISTERS_BASE + 0x0440) +#define HOST_MASK_CLR_H (REGISTERS_BASE + 0x0444) + +/* Host Interrupts*/ +#define HINT_MASK (REGISTERS_BASE + 0x0494) +#define HINT_MASK_SET (REGISTERS_BASE + 0x0498) +#define HINT_MASK_CLR (REGISTERS_BASE + 0x049C) +#define HINT_STS_ND_MASKED (REGISTERS_BASE + 0x04A0) +/*1150 spec calls this HINT_STS_RAW*/ +#define HINT_STS_ND (REGISTERS_BASE + 0x04B0) +#define HINT_STS_CLR (REGISTERS_BASE + 0x04A4) +#define HINT_ACK (REGISTERS_BASE + 0x04A8) +#define HINT_TRIG (REGISTERS_BASE + 0x04AC) + +/* Device Configuration registers*/ +#define SOR_CFG (REGISTERS_BASE + 0x0800) +#define ECPU_CTRL (REGISTERS_BASE + 0x0804) +#define HI_CFG (REGISTERS_BASE + 0x0808) +#define EE_START (REGISTERS_BASE + 0x080C) + +#define CHIP_ID_B (REGISTERS_BASE + 0x5674) + +#define CHIP_ID_1251_PG10 (0x7010101) +#define CHIP_ID_1251_PG11 (0x7020101) +#define CHIP_ID_1251_PG12 (0x7030101) + +#define ENABLE (REGISTERS_BASE + 0x5450) + +/* Power Management registers */ +#define ELP_CFG_MODE (REGISTERS_BASE + 0x5804) +#define ELP_CMD (REGISTERS_BASE + 0x5808) +#define PLL_CAL_TIME (REGISTERS_BASE + 0x5810) +#define CLK_REQ_TIME (REGISTERS_BASE + 0x5814) +#define CLK_BUF_TIME (REGISTERS_BASE + 0x5818) + +#define CFG_PLL_SYNC_CNT (REGISTERS_BASE + 0x5820) + +/* Scratch Pad registers*/ +#define SCR_PAD0 (REGISTERS_BASE + 0x5608) +#define SCR_PAD1 (REGISTERS_BASE + 0x560C) +#define SCR_PAD2 (REGISTERS_BASE + 0x5610) +#define SCR_PAD3 (REGISTERS_BASE + 0x5614) +#define SCR_PAD4 (REGISTERS_BASE + 0x5618) +#define SCR_PAD4_SET (REGISTERS_BASE + 0x561C) +#define SCR_PAD4_CLR (REGISTERS_BASE + 0x5620) +#define SCR_PAD5 (REGISTERS_BASE + 0x5624) +#define SCR_PAD5_SET (REGISTERS_BASE + 0x5628) +#define SCR_PAD5_CLR (REGISTERS_BASE + 0x562C) +#define SCR_PAD6 (REGISTERS_BASE + 0x5630) +#define SCR_PAD7 (REGISTERS_BASE + 0x5634) +#define SCR_PAD8 (REGISTERS_BASE + 0x5638) +#define SCR_PAD9 (REGISTERS_BASE + 0x563C) + +/* Spare registers*/ +#define SPARE_A1 (REGISTERS_BASE + 0x0994) +#define SPARE_A2 (REGISTERS_BASE + 0x0998) +#define SPARE_A3 (REGISTERS_BASE + 0x099C) +#define SPARE_A4 (REGISTERS_BASE + 0x09A0) +#define SPARE_A5 (REGISTERS_BASE + 0x09A4) +#define SPARE_A6 (REGISTERS_BASE + 0x09A8) +#define SPARE_A7 (REGISTERS_BASE + 0x09AC) +#define SPARE_A8 (REGISTERS_BASE + 0x09B0) +#define SPARE_B1 (REGISTERS_BASE + 0x5420) +#define SPARE_B2 (REGISTERS_BASE + 0x5424) +#define SPARE_B3 (REGISTERS_BASE + 0x5428) +#define SPARE_B4 (REGISTERS_BASE + 0x542C) +#define SPARE_B5 (REGISTERS_BASE + 0x5430) +#define SPARE_B6 (REGISTERS_BASE + 0x5434) +#define SPARE_B7 (REGISTERS_BASE + 0x5438) +#define SPARE_B8 (REGISTERS_BASE + 0x543C) + +enum wl12xx_acx_int_reg { + ACX_REG_INTERRUPT_TRIG, + ACX_REG_INTERRUPT_TRIG_H, + +/*============================================= + Host Interrupt Mask Register - 32bit (RW) + ------------------------------------------ + Setting a bit in this register masks the + corresponding interrupt to the host. + 0 - RX0 - Rx first dubble buffer Data Interrupt + 1 - TXD - Tx Data Interrupt + 2 - TXXFR - Tx Transfer Interrupt + 3 - RX1 - Rx second dubble buffer Data Interrupt + 4 - RXXFR - Rx Transfer Interrupt + 5 - EVENT_A - Event Mailbox interrupt + 6 - EVENT_B - Event Mailbox interrupt + 7 - WNONHST - Wake On Host Interrupt + 8 - TRACE_A - Debug Trace interrupt + 9 - TRACE_B - Debug Trace interrupt + 10 - CDCMP - Command Complete Interrupt + 11 - + 12 - + 13 - + 14 - ICOMP - Initialization Complete Interrupt + 16 - SG SE - Soft Gemini - Sense enable interrupt + 17 - SG SD - Soft Gemini - Sense disable interrupt + 18 - - + 19 - - + 20 - - + 21- - + Default: 0x0001 +*==============================================*/ + ACX_REG_INTERRUPT_MASK, + +/*============================================= + Host Interrupt Mask Set 16bit, (Write only) + ------------------------------------------ + Setting a bit in this register sets + the corresponding bin in ACX_HINT_MASK register + without effecting the mask + state of other bits (0 = no effect). +==============================================*/ + ACX_REG_HINT_MASK_SET, + +/*============================================= + Host Interrupt Mask Clear 16bit,(Write only) + ------------------------------------------ + Setting a bit in this register clears + the corresponding bin in ACX_HINT_MASK register + without effecting the mask + state of other bits (0 = no effect). +=============================================*/ + ACX_REG_HINT_MASK_CLR, + +/*============================================= + Host Interrupt Status Nondestructive Read + 16bit,(Read only) + ------------------------------------------ + The host can read this register to determine + which interrupts are active. + Reading this register doesn't + effect its content. +=============================================*/ + ACX_REG_INTERRUPT_NO_CLEAR, + +/*============================================= + Host Interrupt Status Clear on Read Register + 16bit,(Read only) + ------------------------------------------ + The host can read this register to determine + which interrupts are active. + Reading this register clears it, + thus making all interrupts inactive. +==============================================*/ + ACX_REG_INTERRUPT_CLEAR, + +/*============================================= + Host Interrupt Acknowledge Register + 16bit,(Write only) + ------------------------------------------ + The host can set individual bits in this + register to clear (acknowledge) the corresp. + interrupt status bits in the HINT_STS_CLR and + HINT_STS_ND registers, thus making the + assotiated interrupt inactive. (0-no effect) +==============================================*/ + ACX_REG_INTERRUPT_ACK, + +/*=============================================== + Host Software Reset - 32bit RW + ------------------------------------------ + [31:1] Reserved + 0 SOFT_RESET Soft Reset - When this bit is set, + it holds the Wlan hardware in a soft reset state. + This reset disables all MAC and baseband processor + clocks except the CardBus/PCI interface clock. + It also initializes all MAC state machines except + the host interface. It does not reload the + contents of the EEPROM. When this bit is cleared + (not self-clearing), the Wlan hardware + exits the software reset state. +===============================================*/ + ACX_REG_SLV_SOFT_RESET, + +/*=============================================== + EEPROM Burst Read Start - 32bit RW + ------------------------------------------ + [31:1] Reserved + 0 ACX_EE_START - EEPROM Burst Read Start 0 + Setting this bit starts a burst read from + the external EEPROM. + If this bit is set (after reset) before an EEPROM read/write, + the burst read starts at EEPROM address 0. + Otherwise, it starts at the address + following the address of the previous access. + TheWlan hardware hardware clears this bit automatically. + + Default: 0x00000000 +*================================================*/ + ACX_REG_EE_START, + +/* Embedded ARM CPU Control */ + +/*=============================================== + Halt eCPU - 32bit RW + ------------------------------------------ + 0 HALT_ECPU Halt Embedded CPU - This bit is the + compliment of bit 1 (MDATA2) in the SOR_CFG register. + During a hardware reset, this bit holds + the inverse of MDATA2. + When downloading firmware from the host, + set this bit (pull down MDATA2). + The host clears this bit after downloading the firmware into + zero-wait-state SSRAM. + When loading firmware from Flash, clear this bit (pull up MDATA2) + so that the eCPU can run the bootloader code in Flash + HALT_ECPU eCPU State + -------------------- + 1 halt eCPU + 0 enable eCPU + ===============================================*/ + ACX_REG_ECPU_CONTROL, + + ACX_REG_TABLE_LEN +}; + +#define ACX_SLV_SOFT_RESET_BIT BIT(1) +#define ACX_REG_EEPROM_START_BIT BIT(1) + +/* Command/Information Mailbox Pointers */ + +/*=============================================== + Command Mailbox Pointer - 32bit RW + ------------------------------------------ + This register holds the start address of + the command mailbox located in the Wlan hardware memory. + The host must read this pointer after a reset to + find the location of the command mailbox. + The Wlan hardware initializes the command mailbox + pointer with the default address of the command mailbox. + The command mailbox pointer is not valid until after + the host receives the Init Complete interrupt from + the Wlan hardware. + ===============================================*/ +#define REG_COMMAND_MAILBOX_PTR (SCR_PAD0) + +/*=============================================== + Information Mailbox Pointer - 32bit RW + ------------------------------------------ + This register holds the start address of + the information mailbox located in the Wlan hardware memory. + The host must read this pointer after a reset to find + the location of the information mailbox. + The Wlan hardware initializes the information mailbox pointer + with the default address of the information mailbox. + The information mailbox pointer is not valid + until after the host receives the Init Complete interrupt from + the Wlan hardware. + ===============================================*/ +#define REG_EVENT_MAILBOX_PTR (SCR_PAD1) + + +/* Misc */ + +#define REG_ENABLE_TX_RX (ENABLE) +/* + * Rx configuration (filter) information element + * --------------------------------------------- + */ +#define REG_RX_CONFIG (RX_CFG) +#define REG_RX_FILTER (RX_FILTER_CFG) + + +#define RX_CFG_ENABLE_PHY_HEADER_PLCP 0x0002 + +/* promiscuous - receives all valid frames */ +#define RX_CFG_PROMISCUOUS 0x0008 + +/* receives frames from any BSSID */ +#define RX_CFG_BSSID 0x0020 + +/* receives frames destined to any MAC address */ +#define RX_CFG_MAC 0x0010 + +#define RX_CFG_ENABLE_ONLY_MY_DEST_MAC 0x0010 +#define RX_CFG_ENABLE_ANY_DEST_MAC 0x0000 +#define RX_CFG_ENABLE_ONLY_MY_BSSID 0x0020 +#define RX_CFG_ENABLE_ANY_BSSID 0x0000 + +/* discards all broadcast frames */ +#define RX_CFG_DISABLE_BCAST 0x0200 + +#define RX_CFG_ENABLE_ONLY_MY_SSID 0x0400 +#define RX_CFG_ENABLE_RX_CMPLT_FCS_ERROR 0x0800 +#define RX_CFG_COPY_RX_STATUS 0x2000 +#define RX_CFG_TSF 0x10000 + +#define RX_CONFIG_OPTION_ANY_DST_MY_BSS (RX_CFG_ENABLE_ANY_DEST_MAC | \ + RX_CFG_ENABLE_ONLY_MY_BSSID) + +#define RX_CONFIG_OPTION_MY_DST_ANY_BSS (RX_CFG_ENABLE_ONLY_MY_DEST_MAC\ + | RX_CFG_ENABLE_ANY_BSSID) + +#define RX_CONFIG_OPTION_ANY_DST_ANY_BSS (RX_CFG_ENABLE_ANY_DEST_MAC | \ + RX_CFG_ENABLE_ANY_BSSID) + +#define RX_CONFIG_OPTION_MY_DST_MY_BSS (RX_CFG_ENABLE_ONLY_MY_DEST_MAC\ + | RX_CFG_ENABLE_ONLY_MY_BSSID) + +#define RX_CONFIG_OPTION_FOR_SCAN (RX_CFG_ENABLE_PHY_HEADER_PLCP \ + | RX_CFG_ENABLE_RX_CMPLT_FCS_ERROR \ + | RX_CFG_COPY_RX_STATUS | RX_CFG_TSF) + +#define RX_CONFIG_OPTION_FOR_MEASUREMENT (RX_CFG_ENABLE_ANY_DEST_MAC) + +#define RX_CONFIG_OPTION_FOR_JOIN (RX_CFG_ENABLE_ONLY_MY_BSSID | \ + RX_CFG_ENABLE_ONLY_MY_DEST_MAC) + +#define RX_CONFIG_OPTION_FOR_IBSS_JOIN (RX_CFG_ENABLE_ONLY_MY_SSID | \ + RX_CFG_ENABLE_ONLY_MY_DEST_MAC) + +#define RX_FILTER_OPTION_DEF (CFG_RX_MGMT_EN | CFG_RX_DATA_EN\ + | CFG_RX_CTL_EN | CFG_RX_BCN_EN\ + | CFG_RX_AUTH_EN | CFG_RX_ASSOC_EN) + +#define RX_FILTER_OPTION_FILTER_ALL 0 + +#define RX_FILTER_OPTION_DEF_PRSP_BCN (CFG_RX_PRSP_EN | CFG_RX_MGMT_EN\ + | CFG_RX_RCTS_ACK | CFG_RX_BCN_EN) + +#define RX_FILTER_OPTION_JOIN (CFG_RX_MGMT_EN | CFG_RX_DATA_EN\ + | CFG_RX_BCN_EN | CFG_RX_AUTH_EN\ + | CFG_RX_ASSOC_EN | CFG_RX_RCTS_ACK\ + | CFG_RX_PRSP_EN) + + +/*=============================================== + Phy regs + ===============================================*/ +#define ACX_PHY_ADDR_REG SBB_ADDR +#define ACX_PHY_DATA_REG SBB_DATA +#define ACX_PHY_CTRL_REG SBB_CTL +#define ACX_PHY_REG_WR_MASK 0x00000001ul +#define ACX_PHY_REG_RD_MASK 0x00000002ul + + +/*=============================================== + EEPROM Read/Write Request 32bit RW + ------------------------------------------ + 1 EE_READ - EEPROM Read Request 1 - Setting this bit + loads a single byte of data into the EE_DATA + register from the EEPROM location specified in + the EE_ADDR register. + The Wlan hardware hardware clears this bit automatically. + EE_DATA is valid when this bit is cleared. + + 0 EE_WRITE - EEPROM Write Request - Setting this bit + writes a single byte of data from the EE_DATA register into the + EEPROM location specified in the EE_ADDR register. + The Wlan hardware hardware clears this bit automatically. +*===============================================*/ +#define ACX_EE_CTL_REG EE_CTL +#define EE_WRITE 0x00000001ul +#define EE_READ 0x00000002ul + +/*=============================================== + EEPROM Address - 32bit RW + ------------------------------------------ + This register specifies the address + within the EEPROM from/to which to read/write data. + ===============================================*/ +#define ACX_EE_ADDR_REG EE_ADDR + +/*=============================================== + EEPROM Data - 32bit RW + ------------------------------------------ + This register either holds the read 8 bits of + data from the EEPROM or the write data + to be written to the EEPROM. + ===============================================*/ +#define ACX_EE_DATA_REG EE_DATA + +/*=============================================== + EEPROM Base Address - 32bit RW + ------------------------------------------ + This register holds the upper nine bits + [23:15] of the 24-bit Wlan hardware memory + address for burst reads from EEPROM accesses. + The EEPROM provides the lower 15 bits of this address. + The MSB of the address from the EEPROM is ignored. + ===============================================*/ +#define ACX_EE_CFG EE_CFG + +/*=============================================== + GPIO Output Values -32bit, RW + ------------------------------------------ + [31:16] Reserved + [15: 0] Specify the output values (at the output driver inputs) for + GPIO[15:0], respectively. + ===============================================*/ +#define ACX_GPIO_OUT_REG GPIO_OUT +#define ACX_MAX_GPIO_LINES 15 + +/*=============================================== + Contention window -32bit, RW + ------------------------------------------ + [31:26] Reserved + [25:16] Max (0x3ff) + [15:07] Reserved + [06:00] Current contention window value - default is 0x1F + ===============================================*/ +#define ACX_CONT_WIND_CFG_REG CONT_WIND_CFG +#define ACX_CONT_WIND_MIN_MASK 0x0000007f +#define ACX_CONT_WIND_MAX 0x03ff0000 + +/* + * Indirect slave register/memory registers + * ---------------------------------------- + */ +#define HW_SLAVE_REG_ADDR_REG 0x00000004 +#define HW_SLAVE_REG_DATA_REG 0x00000008 +#define HW_SLAVE_REG_CTRL_REG 0x0000000c + +#define SLAVE_AUTO_INC 0x00010000 +#define SLAVE_NO_AUTO_INC 0x00000000 +#define SLAVE_HOST_LITTLE_ENDIAN 0x00000000 + +#define HW_SLAVE_MEM_ADDR_REG SLV_MEM_ADDR +#define HW_SLAVE_MEM_DATA_REG SLV_MEM_DATA +#define HW_SLAVE_MEM_CTRL_REG SLV_MEM_CTL +#define HW_SLAVE_MEM_ENDIAN_REG SLV_END_CTL + +#define HW_FUNC_EVENT_INT_EN 0x8000 +#define HW_FUNC_EVENT_MASK_REG 0x00000034 + +#define ACX_MAC_TIMESTAMP_REG (MAC_TIMESTAMP) + +/*=============================================== + HI_CFG Interface Configuration Register Values + ------------------------------------------ + ===============================================*/ +#define HI_CFG_UART_ENABLE 0x00000004 +#define HI_CFG_RST232_ENABLE 0x00000008 +#define HI_CFG_CLOCK_REQ_SELECT 0x00000010 +#define HI_CFG_HOST_INT_ENABLE 0x00000020 +#define HI_CFG_VLYNQ_OUTPUT_ENABLE 0x00000040 +#define HI_CFG_HOST_INT_ACTIVE_LOW 0x00000080 +#define HI_CFG_UART_TX_OUT_GPIO_15 0x00000100 +#define HI_CFG_UART_TX_OUT_GPIO_14 0x00000200 +#define HI_CFG_UART_TX_OUT_GPIO_7 0x00000400 + +/* + * NOTE: USE_ACTIVE_HIGH compilation flag should be defined in makefile + * for platforms using active high interrupt level + */ +#ifdef USE_ACTIVE_HIGH +#define HI_CFG_DEF_VAL \ + (HI_CFG_UART_ENABLE | \ + HI_CFG_RST232_ENABLE | \ + HI_CFG_CLOCK_REQ_SELECT | \ + HI_CFG_HOST_INT_ENABLE) +#else +#define HI_CFG_DEF_VAL \ + (HI_CFG_UART_ENABLE | \ + HI_CFG_RST232_ENABLE | \ + HI_CFG_CLOCK_REQ_SELECT | \ + HI_CFG_HOST_INT_ENABLE) + +#endif + +#define REF_FREQ_19_2 0 +#define REF_FREQ_26_0 1 +#define REF_FREQ_38_4 2 +#define REF_FREQ_40_0 3 +#define REF_FREQ_33_6 4 +#define REF_FREQ_NUM 5 + +#define LUT_PARAM_INTEGER_DIVIDER 0 +#define LUT_PARAM_FRACTIONAL_DIVIDER 1 +#define LUT_PARAM_ATTN_BB 2 +#define LUT_PARAM_ALPHA_BB 3 +#define LUT_PARAM_STOP_TIME_BB 4 +#define LUT_PARAM_BB_PLL_LOOP_FILTER 5 +#define LUT_PARAM_NUM 6 + +#define ACX_EEPROMLESS_IND_REG (SCR_PAD4) +#define USE_EEPROM 0 +#define SOFT_RESET_MAX_TIME 1000000 +#define SOFT_RESET_STALL_TIME 1000 +#define NVS_DATA_BUNDARY_ALIGNMENT 4 + + +/* Firmware image load chunk size */ +#define CHUNK_SIZE 512 + +/* Firmware image header size */ +#define FW_HDR_SIZE 8 + +#define ECPU_CONTROL_HALT 0x00000101 + + +/****************************************************************************** + + CHANNELS, BAND & REG DOMAINS definitions + +******************************************************************************/ + + +enum { + RADIO_BAND_2_4GHZ = 0, /* 2.4 Ghz band */ + RADIO_BAND_5GHZ = 1, /* 5 Ghz band */ + RADIO_BAND_JAPAN_4_9_GHZ = 2, + DEFAULT_BAND = RADIO_BAND_2_4GHZ, + INVALID_BAND = 0xFE, + MAX_RADIO_BANDS = 0xFF +}; + +enum { + NO_RATE = 0, + RATE_1MBPS = 0x0A, + RATE_2MBPS = 0x14, + RATE_5_5MBPS = 0x37, + RATE_6MBPS = 0x0B, + RATE_9MBPS = 0x0F, + RATE_11MBPS = 0x6E, + RATE_12MBPS = 0x0A, + RATE_18MBPS = 0x0E, + RATE_22MBPS = 0xDC, + RATE_24MBPS = 0x09, + RATE_36MBPS = 0x0D, + RATE_48MBPS = 0x08, + RATE_54MBPS = 0x0C +}; + +enum { + RATE_INDEX_1MBPS = 0, + RATE_INDEX_2MBPS = 1, + RATE_INDEX_5_5MBPS = 2, + RATE_INDEX_6MBPS = 3, + RATE_INDEX_9MBPS = 4, + RATE_INDEX_11MBPS = 5, + RATE_INDEX_12MBPS = 6, + RATE_INDEX_18MBPS = 7, + RATE_INDEX_22MBPS = 8, + RATE_INDEX_24MBPS = 9, + RATE_INDEX_36MBPS = 10, + RATE_INDEX_48MBPS = 11, + RATE_INDEX_54MBPS = 12, + RATE_INDEX_MAX = RATE_INDEX_54MBPS, + MAX_RATE_INDEX, + INVALID_RATE_INDEX = MAX_RATE_INDEX, + RATE_INDEX_ENUM_MAX_SIZE = 0x7FFFFFFF +}; + +enum { + RATE_MASK_1MBPS = 0x1, + RATE_MASK_2MBPS = 0x2, + RATE_MASK_5_5MBPS = 0x4, + RATE_MASK_11MBPS = 0x20, +}; + +#define SHORT_PREAMBLE_BIT BIT(0) /* CCK or Barker depending on the rate */ +#define OFDM_RATE_BIT BIT(6) +#define PBCC_RATE_BIT BIT(7) + +enum { + CCK_LONG = 0, + CCK_SHORT = SHORT_PREAMBLE_BIT, + PBCC_LONG = PBCC_RATE_BIT, + PBCC_SHORT = PBCC_RATE_BIT | SHORT_PREAMBLE_BIT, + OFDM = OFDM_RATE_BIT +}; + +/****************************************************************************** + +Transmit-Descriptor RATE-SET field definitions... + +Define a new "Rate-Set" for TX path that incorporates the +Rate & Modulation info into a single 16-bit field. + +TxdRateSet_t: +b15 - Indicates Preamble type (1=SHORT, 0=LONG). + Notes: + Must be LONG (0) for 1Mbps rate. + Does not apply (set to 0) for RevG-OFDM rates. +b14 - Indicates PBCC encoding (1=PBCC, 0=not). + Notes: + Does not apply (set to 0) for rates 1 and 2 Mbps. + Does not apply (set to 0) for RevG-OFDM rates. +b13 - Unused (set to 0). +b12-b0 - Supported Rate indicator bits as defined below. + +******************************************************************************/ + + +#define TNETW1251_CHIP_ID_PG1_0 0x07010101 +#define TNETW1251_CHIP_ID_PG1_1 0x07020101 +#define TNETW1251_CHIP_ID_PG1_2 0x07030101 + +/************************************************************************* + + Interrupt Trigger Register (Host -> WiLink) + +**************************************************************************/ + +/* Hardware to Embedded CPU Interrupts - first 32-bit register set */ + +/* + * Host Command Interrupt. Setting this bit masks + * the interrupt that the host issues to inform + * the FW that it has sent a command + * to the Wlan hardware Command Mailbox. + */ +#define INTR_TRIG_CMD BIT(0) + +/* + * Host Event Acknowlegde Interrupt. The host + * sets this bit to acknowledge that it received + * the unsolicited information from the event + * mailbox. + */ +#define INTR_TRIG_EVENT_ACK BIT(1) + +/* + * The host sets this bit to inform the Wlan + * FW that a TX packet is in the XFER + * Buffer #0. + */ +#define INTR_TRIG_TX_PROC0 BIT(2) + +/* + * The host sets this bit to inform the FW + * that it read a packet from RX XFER + * Buffer #0. + */ +#define INTR_TRIG_RX_PROC0 BIT(3) + +#define INTR_TRIG_DEBUG_ACK BIT(4) + +#define INTR_TRIG_STATE_CHANGED BIT(5) + + +/* Hardware to Embedded CPU Interrupts - second 32-bit register set */ + +/* + * The host sets this bit to inform the FW + * that it read a packet from RX XFER + * Buffer #1. + */ +#define INTR_TRIG_RX_PROC1 BIT(17) + +/* + * The host sets this bit to inform the Wlan + * hardware that a TX packet is in the XFER + * Buffer #1. + */ +#define INTR_TRIG_TX_PROC1 BIT(18) + +#endif diff --git a/drivers/net/wireless/wl12xx/rx.c b/drivers/net/wireless/wl12xx/rx.c new file mode 100644 index 00000000000..981ea259eb8 --- /dev/null +++ b/drivers/net/wireless/wl12xx/rx.c @@ -0,0 +1,208 @@ +/* + * This file is part of wl12xx + * + * Copyright (c) 1998-2007 Texas Instruments Incorporated + * Copyright (C) 2008 Nokia Corporation + * + * Contact: Kalle Valo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#include +#include + +#include "wl12xx.h" +#include "reg.h" +#include "spi.h" +#include "rx.h" + +static void wl12xx_rx_header(struct wl12xx *wl, + struct wl12xx_rx_descriptor *desc) +{ + u32 rx_packet_ring_addr; + + rx_packet_ring_addr = wl->data_path->rx_packet_ring_addr; + if (wl->rx_current_buffer) + rx_packet_ring_addr += wl->data_path->rx_packet_ring_chunk_size; + + wl12xx_spi_mem_read(wl, rx_packet_ring_addr, desc, + sizeof(struct wl12xx_rx_descriptor)); +} + +static void wl12xx_rx_status(struct wl12xx *wl, + struct wl12xx_rx_descriptor *desc, + struct ieee80211_rx_status *status, + u8 beacon) +{ + memset(status, 0, sizeof(struct ieee80211_rx_status)); + + status->band = IEEE80211_BAND_2GHZ; + status->mactime = desc->timestamp; + + /* + * The rx status timestamp is a 32 bits value while the TSF is a + * 64 bits one. + * For IBSS merging, TSF is mandatory, so we have to get it + * somehow, so we ask for ACX_TSF_INFO. + * That could be moved to the get_tsf() hook, but unfortunately, + * this one must be atomic, while our SPI routines can sleep. + */ + if ((wl->bss_type == BSS_TYPE_IBSS) && beacon) { + u64 mactime; + int ret; + struct wl12xx_command cmd; + struct acx_tsf_info *tsf_info; + + memset(&cmd, 0, sizeof(cmd)); + + ret = wl12xx_cmd_interrogate(wl, ACX_TSF_INFO, + sizeof(struct acx_tsf_info), + &cmd); + if (ret < 0) { + wl12xx_warning("ACX_FW_REV interrogate failed"); + return; + } + + tsf_info = (struct acx_tsf_info *)&(cmd.parameters); + + mactime = tsf_info->current_tsf_lsb | + (tsf_info->current_tsf_msb << 31); + + status->mactime = mactime; + } + + status->signal = desc->rssi; + status->qual = (desc->rssi - WL12XX_RX_MIN_RSSI) * 100 / + (WL12XX_RX_MAX_RSSI - WL12XX_RX_MIN_RSSI); + status->qual = min(status->qual, 100); + status->qual = max(status->qual, 0); + + /* + * FIXME: guessing that snr needs to be divided by two, otherwise + * the values don't make any sense + */ + status->noise = desc->rssi - desc->snr / 2; + + status->freq = ieee80211_channel_to_frequency(desc->channel); + + status->flag |= RX_FLAG_TSFT; + + if (desc->flags & RX_DESC_ENCRYPTION_MASK) { + status->flag |= RX_FLAG_IV_STRIPPED | RX_FLAG_MMIC_STRIPPED; + + if (likely(!(desc->flags & RX_DESC_DECRYPT_FAIL))) + status->flag |= RX_FLAG_DECRYPTED; + + if (unlikely(desc->flags & RX_DESC_MIC_FAIL)) + status->flag |= RX_FLAG_MMIC_ERROR; + } + + if (unlikely(!(desc->flags & RX_DESC_VALID_FCS))) + status->flag |= RX_FLAG_FAILED_FCS_CRC; + + + /* FIXME: set status->rate_idx */ +} + +static void wl12xx_rx_body(struct wl12xx *wl, + struct wl12xx_rx_descriptor *desc) +{ + struct sk_buff *skb; + struct ieee80211_rx_status status; + u8 *rx_buffer, beacon = 0; + u16 length, *fc; + u32 curr_id, last_id_inc, rx_packet_ring_addr; + + length = WL12XX_RX_ALIGN(desc->length - PLCP_HEADER_LENGTH); + curr_id = (desc->flags & RX_DESC_SEQNUM_MASK) >> RX_DESC_PACKETID_SHIFT; + last_id_inc = (wl->rx_last_id + 1) % (RX_MAX_PACKET_ID + 1); + + if (last_id_inc != curr_id) { + wl12xx_warning("curr ID:%d, last ID inc:%d", + curr_id, last_id_inc); + wl->rx_last_id = curr_id; + } else { + wl->rx_last_id = last_id_inc; + } + + rx_packet_ring_addr = wl->data_path->rx_packet_ring_addr + + sizeof(struct wl12xx_rx_descriptor) + 20; + if (wl->rx_current_buffer) + rx_packet_ring_addr += wl->data_path->rx_packet_ring_chunk_size; + + skb = dev_alloc_skb(length); + if (!skb) { + wl12xx_error("Couldn't allocate RX frame"); + return; + } + + rx_buffer = skb_put(skb, length); + wl12xx_spi_mem_read(wl, rx_packet_ring_addr, rx_buffer, length); + + /* The actual lenght doesn't include the target's alignment */ + skb->len = desc->length - PLCP_HEADER_LENGTH; + + fc = (u16 *)skb->data; + + if ((*fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_BEACON) + beacon = 1; + + wl12xx_rx_status(wl, desc, &status, beacon); + + wl12xx_debug(DEBUG_RX, "rx skb 0x%p: %d B %s", skb, skb->len, + beacon ? "beacon" : ""); + + ieee80211_rx(wl->hw, skb, &status); +} + +static void wl12xx_rx_ack(struct wl12xx *wl) +{ + u32 data, addr; + + if (wl->rx_current_buffer) { + addr = ACX_REG_INTERRUPT_TRIG_H; + data = INTR_TRIG_RX_PROC1; + } else { + addr = ACX_REG_INTERRUPT_TRIG; + data = INTR_TRIG_RX_PROC0; + } + + wl12xx_reg_write32(wl, addr, data); + + /* Toggle buffer ring */ + wl->rx_current_buffer = !wl->rx_current_buffer; +} + + +void wl12xx_rx(struct wl12xx *wl) +{ + struct wl12xx_rx_descriptor rx_desc; + + if (wl->state != WL12XX_STATE_ON) + return; + + /* We first read the frame's header */ + wl12xx_rx_header(wl, &rx_desc); + + /* Now we can read the body */ + wl12xx_rx_body(wl, &rx_desc); + + /* Finally, we need to ACK the RX */ + wl12xx_rx_ack(wl); + + return; +} diff --git a/drivers/net/wireless/wl12xx/rx.h b/drivers/net/wireless/wl12xx/rx.h new file mode 100644 index 00000000000..8a23fdea501 --- /dev/null +++ b/drivers/net/wireless/wl12xx/rx.h @@ -0,0 +1,122 @@ +/* + * This file is part of wl12xx + * + * Copyright (c) 1998-2007 Texas Instruments Incorporated + * Copyright (C) 2008 Nokia Corporation + * + * Contact: Kalle Valo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#ifndef __WL12XX_RX_H__ +#define __WL12XX_RX_H__ + +#include + +/* + * RX PATH + * + * The Rx path uses a double buffer and an rx_contro structure, each located + * at a fixed address in the device memory. The host keeps track of which + * buffer is available and alternates between them on a per packet basis. + * The size of each of the two buffers is large enough to hold the longest + * 802.3 packet. + * The RX path goes like that: + * 1) The target generates an interrupt each time a new packet is received. + * There are 2 RX interrupts, one for each buffer. + * 2) The host reads the received packet from one of the double buffers. + * 3) The host triggers a target interrupt. + * 4) The target prepares the next RX packet. + */ + +#define WL12XX_RX_MAX_RSSI -30 +#define WL12XX_RX_MIN_RSSI -95 + +#define WL12XX_RX_ALIGN_TO 4 +#define WL12XX_RX_ALIGN(len) (((len) + WL12XX_RX_ALIGN_TO - 1) & \ + ~(WL12XX_RX_ALIGN_TO - 1)) + +#define SHORT_PREAMBLE_BIT BIT(0) +#define OFDM_RATE_BIT BIT(6) +#define PBCC_RATE_BIT BIT(7) + +#define PLCP_HEADER_LENGTH 8 +#define RX_DESC_PACKETID_SHIFT 11 +#define RX_MAX_PACKET_ID 3 + +#define RX_DESC_VALID_FCS 0x0001 +#define RX_DESC_MATCH_RXADDR1 0x0002 +#define RX_DESC_MCAST 0x0004 +#define RX_DESC_STAINTIM 0x0008 +#define RX_DESC_VIRTUAL_BM 0x0010 +#define RX_DESC_BCAST 0x0020 +#define RX_DESC_MATCH_SSID 0x0040 +#define RX_DESC_MATCH_BSSID 0x0080 +#define RX_DESC_ENCRYPTION_MASK 0x0300 +#define RX_DESC_MEASURMENT 0x0400 +#define RX_DESC_SEQNUM_MASK 0x1800 +#define RX_DESC_MIC_FAIL 0x2000 +#define RX_DESC_DECRYPT_FAIL 0x4000 + +struct wl12xx_rx_descriptor { + u32 timestamp; /* In microseconds */ + u16 length; /* Paylod length, including headers */ + u16 flags; + + /* + * 0 - 802.11 + * 1 - 802.3 + * 2 - IP + * 3 - Raw Codec + */ + u8 type; + + /* + * Recevied Rate: + * 0x0A - 1MBPS + * 0x14 - 2MBPS + * 0x37 - 5_5MBPS + * 0x0B - 6MBPS + * 0x0F - 9MBPS + * 0x6E - 11MBPS + * 0x0A - 12MBPS + * 0x0E - 18MBPS + * 0xDC - 22MBPS + * 0x09 - 24MBPS + * 0x0D - 36MBPS + * 0x08 - 48MBPS + * 0x0C - 54MBPS + */ + u8 rate; + + u8 mod_pre; /* Modulation and preamble */ + u8 channel; + + /* + * 0 - 2.4 Ghz + * 1 - 5 Ghz + */ + u8 band; + + s8 rssi; /* in dB */ + u8 rcpi; /* in dB */ + u8 snr; /* in dB */ +} __attribute__ ((packed)); + +void wl12xx_rx(struct wl12xx *wl); + +#endif diff --git a/drivers/net/wireless/wl12xx/spi.c b/drivers/net/wireless/wl12xx/spi.c new file mode 100644 index 00000000000..abdf171a47e --- /dev/null +++ b/drivers/net/wireless/wl12xx/spi.c @@ -0,0 +1,358 @@ +/* + * This file is part of wl12xx + * + * Copyright (C) 2008 Nokia Corporation + * + * Contact: Kalle Valo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#include +#include +#include + +#include "wl12xx.h" +#include "wl12xx_80211.h" +#include "reg.h" +#include "spi.h" +#include "ps.h" + +static int wl12xx_translate_reg_addr(struct wl12xx *wl, int addr) +{ + /* If the address is lower than REGISTERS_BASE, it means that this is + * a chip-specific register address, so look it up in the registers + * table */ + if (addr < REGISTERS_BASE) { + /* Make sure we don't go over the table */ + if (addr >= ACX_REG_TABLE_LEN) { + wl12xx_error("address out of range (%d)", addr); + return -EINVAL; + } + addr = wl->chip.acx_reg_table[addr]; + } + + return addr - wl->physical_reg_addr + wl->virtual_reg_addr; +} + +static int wl12xx_translate_mem_addr(struct wl12xx *wl, int addr) +{ + return addr - wl->physical_mem_addr + wl->virtual_mem_addr; +} + + +void wl12xx_spi_reset(struct wl12xx *wl) +{ + u8 *cmd; + struct spi_transfer t; + struct spi_message m; + + cmd = kzalloc(WSPI_INIT_CMD_LEN, GFP_KERNEL); + if (!cmd) { + wl12xx_error("could not allocate cmd for spi reset"); + return; + } + + memset(&t, 0, sizeof(t)); + spi_message_init(&m); + + memset(cmd, 0xff, WSPI_INIT_CMD_LEN); + + t.tx_buf = cmd; + t.len = WSPI_INIT_CMD_LEN; + spi_message_add_tail(&t, &m); + + spi_sync(wl->spi, &m); + + wl12xx_dump(DEBUG_SPI, "spi reset -> ", cmd, WSPI_INIT_CMD_LEN); +} + +void wl12xx_spi_init(struct wl12xx *wl) +{ + u8 crc[WSPI_INIT_CMD_CRC_LEN], *cmd; + struct spi_transfer t; + struct spi_message m; + + cmd = kzalloc(WSPI_INIT_CMD_LEN, GFP_KERNEL); + if (!cmd) { + wl12xx_error("could not allocate cmd for spi init"); + return; + } + + memset(crc, 0, sizeof(crc)); + memset(&t, 0, sizeof(t)); + spi_message_init(&m); + + /* + * Set WSPI_INIT_COMMAND + * the data is being send from the MSB to LSB + */ + cmd[2] = 0xff; + cmd[3] = 0xff; + cmd[1] = WSPI_INIT_CMD_START | WSPI_INIT_CMD_TX; + cmd[0] = 0; + cmd[7] = 0; + cmd[6] |= HW_ACCESS_WSPI_INIT_CMD_MASK << 3; + cmd[6] |= HW_ACCESS_WSPI_FIXED_BUSY_LEN & WSPI_INIT_CMD_FIXEDBUSY_LEN; + + if (HW_ACCESS_WSPI_FIXED_BUSY_LEN == 0) + cmd[5] |= WSPI_INIT_CMD_DIS_FIXEDBUSY; + else + cmd[5] |= WSPI_INIT_CMD_EN_FIXEDBUSY; + + cmd[5] |= WSPI_INIT_CMD_IOD | WSPI_INIT_CMD_IP | WSPI_INIT_CMD_CS + | WSPI_INIT_CMD_WSPI | WSPI_INIT_CMD_WS; + + crc[0] = cmd[1]; + crc[1] = cmd[0]; + crc[2] = cmd[7]; + crc[3] = cmd[6]; + crc[4] = cmd[5]; + + cmd[4] |= crc7(0, crc, WSPI_INIT_CMD_CRC_LEN) << 1; + cmd[4] |= WSPI_INIT_CMD_END; + + t.tx_buf = cmd; + t.len = WSPI_INIT_CMD_LEN; + spi_message_add_tail(&t, &m); + + spi_sync(wl->spi, &m); + + wl12xx_dump(DEBUG_SPI, "spi init -> ", cmd, WSPI_INIT_CMD_LEN); +} + +/* Set the SPI partitions to access the chip addresses + * + * There are two VIRTUAL (SPI) partitions (the memory partition and the + * registers partition), which are mapped to two different areas of the + * PHYSICAL (hardware) memory. This function also makes other checks to + * ensure that the partitions are not overlapping. In the diagram below, the + * memory partition comes before the register partition, but the opposite is + * also supported. + * + * PHYSICAL address + * space + * + * | | + * ...+----+--> mem_start + * VIRTUAL address ... | | + * space ... | | [PART_0] + * ... | | + * 0x00000000 <--+----+... ...+----+--> mem_start + mem_size + * | | ... | | + * |MEM | ... | | + * | | ... | | + * part_size <--+----+... | | {unused area) + * | | ... | | + * |REG | ... | | + * part_size | | ... | | + * + <--+----+... ...+----+--> reg_start + * reg_size ... | | + * ... | | [PART_1] + * ... | | + * ...+----+--> reg_start + reg_size + * | | + * + */ +void wl12xx_set_partition(struct wl12xx *wl, + u32 mem_start, u32 mem_size, + u32 reg_start, u32 reg_size) +{ + u8 tx_buf[sizeof(u32) + 2 * sizeof(struct wl12xx_partition)]; + struct wl12xx_partition *partition; + struct spi_transfer t; + struct spi_message m; + u32 *cmd; + size_t len; + int addr; + + spi_message_init(&m); + memset(&t, 0, sizeof(t)); + memset(tx_buf, 0, sizeof(tx_buf)); + + cmd = (u32 *) tx_buf; + partition = (struct wl12xx_partition *) (tx_buf + sizeof(u32)); + addr = HW_ACCESS_PART0_SIZE_ADDR; + len = 2 * sizeof(struct wl12xx_partition); + + *cmd |= WSPI_CMD_WRITE; + *cmd |= (len << WSPI_CMD_BYTE_LENGTH_OFFSET) & WSPI_CMD_BYTE_LENGTH; + *cmd |= addr & WSPI_CMD_BYTE_ADDR; + + wl12xx_debug(DEBUG_SPI, "mem_start %08X mem_size %08X", + mem_start, mem_size); + wl12xx_debug(DEBUG_SPI, "reg_start %08X reg_size %08X", + reg_start, reg_size); + + /* Make sure that the two partitions together don't exceed the + * address range */ + if ((mem_size + reg_size) > HW_ACCESS_MEMORY_MAX_RANGE) { + wl12xx_debug(DEBUG_SPI, "Total size exceeds maximum virtual" + " address range. Truncating partition[0]."); + mem_size = HW_ACCESS_MEMORY_MAX_RANGE - reg_size; + wl12xx_debug(DEBUG_SPI, "mem_start %08X mem_size %08X", + mem_start, mem_size); + wl12xx_debug(DEBUG_SPI, "reg_start %08X reg_size %08X", + reg_start, reg_size); + } + + if ((mem_start < reg_start) && + ((mem_start + mem_size) > reg_start)) { + /* Guarantee that the memory partition doesn't overlap the + * registers partition */ + wl12xx_debug(DEBUG_SPI, "End of partition[0] is " + "overlapping partition[1]. Adjusted."); + mem_size = reg_start - mem_start; + wl12xx_debug(DEBUG_SPI, "mem_start %08X mem_size %08X", + mem_start, mem_size); + wl12xx_debug(DEBUG_SPI, "reg_start %08X reg_size %08X", + reg_start, reg_size); + } else if ((reg_start < mem_start) && + ((reg_start + reg_size) > mem_start)) { + /* Guarantee that the register partition doesn't overlap the + * memory partition */ + wl12xx_debug(DEBUG_SPI, "End of partition[1] is" + " overlapping partition[0]. Adjusted."); + reg_size = mem_start - reg_start; + wl12xx_debug(DEBUG_SPI, "mem_start %08X mem_size %08X", + mem_start, mem_size); + wl12xx_debug(DEBUG_SPI, "reg_start %08X reg_size %08X", + reg_start, reg_size); + } + + partition[0].start = mem_start; + partition[0].size = mem_size; + partition[1].start = reg_start; + partition[1].size = reg_size; + + wl->physical_mem_addr = mem_start; + wl->physical_reg_addr = reg_start; + + wl->virtual_mem_addr = 0; + wl->virtual_reg_addr = mem_size; + + t.tx_buf = tx_buf; + t.len = sizeof(tx_buf); + spi_message_add_tail(&t, &m); + + spi_sync(wl->spi, &m); +} + +void wl12xx_spi_read(struct wl12xx *wl, int addr, void *buf, + size_t len) +{ + struct spi_transfer t[3]; + struct spi_message m; + char busy_buf[TNETWIF_READ_OFFSET_BYTES]; + u32 cmd; + + cmd = 0; + cmd |= WSPI_CMD_READ; + cmd |= (len << WSPI_CMD_BYTE_LENGTH_OFFSET) & WSPI_CMD_BYTE_LENGTH; + cmd |= addr & WSPI_CMD_BYTE_ADDR; + + spi_message_init(&m); + memset(t, 0, sizeof(t)); + + t[0].tx_buf = &cmd; + t[0].len = 4; + spi_message_add_tail(&t[0], &m); + + /* Busy and non busy words read */ + t[1].rx_buf = busy_buf; + t[1].len = TNETWIF_READ_OFFSET_BYTES; + spi_message_add_tail(&t[1], &m); + + t[2].rx_buf = buf; + t[2].len = len; + spi_message_add_tail(&t[2], &m); + + spi_sync(wl->spi, &m); + + /* FIXME: check busy words */ + + wl12xx_dump(DEBUG_SPI, "spi_read cmd -> ", &cmd, sizeof(cmd)); + wl12xx_dump(DEBUG_SPI, "spi_read buf <- ", buf, len); +} + +void wl12xx_spi_write(struct wl12xx *wl, int addr, void *buf, + size_t len) +{ + struct spi_transfer t[2]; + struct spi_message m; + u32 cmd; + + cmd = 0; + cmd |= WSPI_CMD_WRITE; + cmd |= (len << WSPI_CMD_BYTE_LENGTH_OFFSET) & WSPI_CMD_BYTE_LENGTH; + cmd |= addr & WSPI_CMD_BYTE_ADDR; + + spi_message_init(&m); + memset(t, 0, sizeof(t)); + + t[0].tx_buf = &cmd; + t[0].len = sizeof(cmd); + spi_message_add_tail(&t[0], &m); + + t[1].tx_buf = buf; + t[1].len = len; + spi_message_add_tail(&t[1], &m); + + spi_sync(wl->spi, &m); + + wl12xx_dump(DEBUG_SPI, "spi_write cmd -> ", &cmd, sizeof(cmd)); + wl12xx_dump(DEBUG_SPI, "spi_write buf -> ", buf, len); +} + +void wl12xx_spi_mem_read(struct wl12xx *wl, int addr, void *buf, + size_t len) +{ + int physical; + + physical = wl12xx_translate_mem_addr(wl, addr); + + wl12xx_spi_read(wl, physical, buf, len); +} + +void wl12xx_spi_mem_write(struct wl12xx *wl, int addr, void *buf, + size_t len) +{ + int physical; + + physical = wl12xx_translate_mem_addr(wl, addr); + + wl12xx_spi_write(wl, physical, buf, len); +} + +u32 wl12xx_mem_read32(struct wl12xx *wl, int addr) +{ + return wl12xx_read32(wl, wl12xx_translate_mem_addr(wl, addr)); +} + +void wl12xx_mem_write32(struct wl12xx *wl, int addr, u32 val) +{ + wl12xx_write32(wl, wl12xx_translate_mem_addr(wl, addr), val); +} + +u32 wl12xx_reg_read32(struct wl12xx *wl, int addr) +{ + return wl12xx_read32(wl, wl12xx_translate_reg_addr(wl, addr)); +} + +void wl12xx_reg_write32(struct wl12xx *wl, int addr, u32 val) +{ + wl12xx_write32(wl, wl12xx_translate_reg_addr(wl, addr), val); +} diff --git a/drivers/net/wireless/wl12xx/spi.h b/drivers/net/wireless/wl12xx/spi.h new file mode 100644 index 00000000000..fd3227e904a --- /dev/null +++ b/drivers/net/wireless/wl12xx/spi.h @@ -0,0 +1,109 @@ +/* + * This file is part of wl12xx + * + * Copyright (c) 1998-2007 Texas Instruments Incorporated + * Copyright (C) 2008 Nokia Corporation + * + * Contact: Kalle Valo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#ifndef __WL12XX_SPI_H__ +#define __WL12XX_SPI_H__ + +#include "cmd.h" +#include "acx.h" +#include "reg.h" + +#define HW_ACCESS_MEMORY_MAX_RANGE 0x1FFC0 + +#define HW_ACCESS_PART0_SIZE_ADDR 0x1FFC0 +#define HW_ACCESS_PART0_START_ADDR 0x1FFC4 +#define HW_ACCESS_PART1_SIZE_ADDR 0x1FFC8 +#define HW_ACCESS_PART1_START_ADDR 0x1FFCC + +#define HW_ACCESS_REGISTER_SIZE 4 + +#define HW_ACCESS_PRAM_MAX_RANGE 0x3c000 + +#define WSPI_CMD_READ 0x40000000 +#define WSPI_CMD_WRITE 0x00000000 +#define WSPI_CMD_FIXED 0x20000000 +#define WSPI_CMD_BYTE_LENGTH 0x1FFE0000 +#define WSPI_CMD_BYTE_LENGTH_OFFSET 17 +#define WSPI_CMD_BYTE_ADDR 0x0001FFFF + +#define WSPI_INIT_CMD_CRC_LEN 5 + +#define WSPI_INIT_CMD_START 0x00 +#define WSPI_INIT_CMD_TX 0x40 +/* the extra bypass bit is sampled by the TNET as '1' */ +#define WSPI_INIT_CMD_BYPASS_BIT 0x80 +#define WSPI_INIT_CMD_FIXEDBUSY_LEN 0x07 +#define WSPI_INIT_CMD_EN_FIXEDBUSY 0x80 +#define WSPI_INIT_CMD_DIS_FIXEDBUSY 0x00 +#define WSPI_INIT_CMD_IOD 0x40 +#define WSPI_INIT_CMD_IP 0x20 +#define WSPI_INIT_CMD_CS 0x10 +#define WSPI_INIT_CMD_WS 0x08 +#define WSPI_INIT_CMD_WSPI 0x01 +#define WSPI_INIT_CMD_END 0x01 + +#define WSPI_INIT_CMD_LEN 8 + +#define TNETWIF_READ_OFFSET_BYTES 8 +#define HW_ACCESS_WSPI_FIXED_BUSY_LEN \ + ((TNETWIF_READ_OFFSET_BYTES - 4) / sizeof(u32)) +#define HW_ACCESS_WSPI_INIT_CMD_MASK 0 + + +/* Raw target IO, address is not translated */ +void wl12xx_spi_read(struct wl12xx *wl, int addr, void *buf, size_t len); +void wl12xx_spi_write(struct wl12xx *wl, int addr, void *buf, size_t len); + +/* Memory target IO, address is tranlated to partition 0 */ +void wl12xx_spi_mem_read(struct wl12xx *wl, int addr, void *buf, size_t len); +void wl12xx_spi_mem_write(struct wl12xx *wl, int addr, void *buf, size_t len); +u32 wl12xx_mem_read32(struct wl12xx *wl, int addr); +void wl12xx_mem_write32(struct wl12xx *wl, int addr, u32 val); + +/* Registers IO */ +u32 wl12xx_reg_read32(struct wl12xx *wl, int addr); +void wl12xx_reg_write32(struct wl12xx *wl, int addr, u32 val); + +/* INIT and RESET words */ +void wl12xx_spi_reset(struct wl12xx *wl); +void wl12xx_spi_init(struct wl12xx *wl); +void wl12xx_set_partition(struct wl12xx *wl, + u32 part_start, u32 part_size, + u32 reg_start, u32 reg_size); + +static inline u32 wl12xx_read32(struct wl12xx *wl, int addr) +{ + u32 response; + + wl12xx_spi_read(wl, addr, &response, sizeof(u32)); + + return response; +} + +static inline void wl12xx_write32(struct wl12xx *wl, int addr, u32 val) +{ + wl12xx_spi_write(wl, addr, &val, sizeof(u32)); +} + +#endif /* __WL12XX_SPI_H__ */ diff --git a/drivers/net/wireless/wl12xx/tx.c b/drivers/net/wireless/wl12xx/tx.c new file mode 100644 index 00000000000..62145e205a8 --- /dev/null +++ b/drivers/net/wireless/wl12xx/tx.c @@ -0,0 +1,557 @@ +/* + * This file is part of wl12xx + * + * Copyright (c) 1998-2007 Texas Instruments Incorporated + * Copyright (C) 2008 Nokia Corporation + * + * Contact: Kalle Valo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#include +#include + +#include "wl12xx.h" +#include "reg.h" +#include "spi.h" +#include "tx.h" +#include "ps.h" + +static bool wl12xx_tx_double_buffer_busy(struct wl12xx *wl, u32 data_out_count) +{ + int used, data_in_count; + + data_in_count = wl->data_in_count; + + if (data_in_count < data_out_count) + /* data_in_count has wrapped */ + data_in_count += TX_STATUS_DATA_OUT_COUNT_MASK + 1; + + used = data_in_count - data_out_count; + + WARN_ON(used < 0); + WARN_ON(used > DP_TX_PACKET_RING_CHUNK_NUM); + + if (used >= DP_TX_PACKET_RING_CHUNK_NUM) + return true; + else + return false; +} + +static int wl12xx_tx_path_status(struct wl12xx *wl) +{ + u32 status, addr, data_out_count; + bool busy; + + addr = wl->data_path->tx_control_addr; + status = wl12xx_mem_read32(wl, addr); + data_out_count = status & TX_STATUS_DATA_OUT_COUNT_MASK; + busy = wl12xx_tx_double_buffer_busy(wl, data_out_count); + + if (busy) + return -EBUSY; + + return 0; +} + +static int wl12xx_tx_id(struct wl12xx *wl, struct sk_buff *skb) +{ + int i; + + for (i = 0; i < FW_TX_CMPLT_BLOCK_SIZE; i++) + if (wl->tx_frames[i] == NULL) { + wl->tx_frames[i] = skb; + return i; + } + + return -EBUSY; +} + +static void wl12xx_tx_control(struct tx_double_buffer_desc *tx_hdr, + struct ieee80211_tx_info *control, u16 fc) +{ + *(u16 *)&tx_hdr->control = 0; + + tx_hdr->control.rate_policy = 0; + + /* 802.11 packets */ + tx_hdr->control.packet_type = 0; + + if (control->flags & IEEE80211_TX_CTL_NO_ACK) + tx_hdr->control.ack_policy = 1; + + tx_hdr->control.tx_complete = 1; + + if ((fc & IEEE80211_FTYPE_DATA) && + ((fc & IEEE80211_STYPE_QOS_DATA) || + (fc & IEEE80211_STYPE_QOS_NULLFUNC))) + tx_hdr->control.qos = 1; +} + +/* RSN + MIC = 8 + 8 = 16 bytes (worst case - AES). */ +#define MAX_MSDU_SECURITY_LENGTH 16 +#define MAX_MPDU_SECURITY_LENGTH 16 +#define WLAN_QOS_HDR_LEN 26 +#define MAX_MPDU_HEADER_AND_SECURITY (MAX_MPDU_SECURITY_LENGTH + \ + WLAN_QOS_HDR_LEN) +#define HW_BLOCK_SIZE 252 +static void wl12xx_tx_frag_block_num(struct tx_double_buffer_desc *tx_hdr) +{ + u16 payload_len, frag_threshold, mem_blocks; + u16 num_mpdus, mem_blocks_per_frag; + + frag_threshold = IEEE80211_MAX_FRAG_THRESHOLD; + tx_hdr->frag_threshold = cpu_to_le16(frag_threshold); + + payload_len = tx_hdr->length + MAX_MSDU_SECURITY_LENGTH; + + if (payload_len > frag_threshold) { + mem_blocks_per_frag = + ((frag_threshold + MAX_MPDU_HEADER_AND_SECURITY) / + HW_BLOCK_SIZE) + 1; + num_mpdus = payload_len / frag_threshold; + mem_blocks = num_mpdus * mem_blocks_per_frag; + payload_len -= num_mpdus * frag_threshold; + num_mpdus++; + + } else { + mem_blocks_per_frag = 0; + mem_blocks = 0; + num_mpdus = 1; + } + + mem_blocks += (payload_len / HW_BLOCK_SIZE) + 1; + + if (num_mpdus > 1) + mem_blocks += min(num_mpdus, mem_blocks_per_frag); + + tx_hdr->num_mem_blocks = mem_blocks; +} + +static int wl12xx_tx_fill_hdr(struct wl12xx *wl, struct sk_buff *skb, + struct ieee80211_tx_info *control) +{ + struct tx_double_buffer_desc *tx_hdr; + struct ieee80211_rate *rate; + int id; + u16 fc; + + if (!skb) + return -EINVAL; + + id = wl12xx_tx_id(wl, skb); + if (id < 0) + return id; + + fc = *(u16 *)skb->data; + tx_hdr = (struct tx_double_buffer_desc *) skb_push(skb, + sizeof(*tx_hdr)); + + tx_hdr->length = cpu_to_le16(skb->len - sizeof(*tx_hdr)); + rate = ieee80211_get_tx_rate(wl->hw, control); + tx_hdr->rate = cpu_to_le16(rate->hw_value); + tx_hdr->expiry_time = cpu_to_le32(1 << 16); + tx_hdr->id = id; + + /* FIXME: how to get the correct queue id? */ + tx_hdr->xmit_queue = 0; + + wl12xx_tx_control(tx_hdr, control, fc); + wl12xx_tx_frag_block_num(tx_hdr); + + return 0; +} + +/* We copy the packet to the target */ +static int wl12xx_tx_send_packet(struct wl12xx *wl, struct sk_buff *skb, + struct ieee80211_tx_info *control) +{ + struct tx_double_buffer_desc *tx_hdr; + int len; + u32 addr; + + if (!skb) + return -EINVAL; + + tx_hdr = (struct tx_double_buffer_desc *) skb->data; + + if (control->control.hw_key && + control->control.hw_key->alg == ALG_TKIP) { + int hdrlen; + u16 fc; + u8 *pos; + + fc = *(u16 *)(skb->data + sizeof(*tx_hdr)); + tx_hdr->length += WL12XX_TKIP_IV_SPACE; + + hdrlen = ieee80211_hdrlen(fc); + + pos = skb_push(skb, WL12XX_TKIP_IV_SPACE); + memmove(pos, pos + WL12XX_TKIP_IV_SPACE, + sizeof(*tx_hdr) + hdrlen); + } + + /* Revisit. This is a workaround for getting non-aligned packets. + This happens at least with EAPOL packets from the user space. + Our DMA requires packets to be aligned on a 4-byte boundary. + */ + if (unlikely((long)skb->data & 0x03)) { + int offset = (4 - (long)skb->data) & 0x03; + wl12xx_debug(DEBUG_TX, "skb offset %d", offset); + + /* check whether the current skb can be used */ + if (!skb_cloned(skb) && (skb_tailroom(skb) >= offset)) { + unsigned char *src = skb->data; + + /* align the buffer on a 4-byte boundary */ + skb_reserve(skb, offset); + memmove(skb->data, src, skb->len); + } else { + wl12xx_info("No handler, fixme!"); + return -EINVAL; + } + } + + /* Our skb->data at this point includes the HW header */ + len = WL12XX_TX_ALIGN(skb->len); + + if (wl->data_in_count & 0x1) + addr = wl->data_path->tx_packet_ring_addr + + wl->data_path->tx_packet_ring_chunk_size; + else + addr = wl->data_path->tx_packet_ring_addr; + + wl12xx_spi_mem_write(wl, addr, skb->data, len); + + wl12xx_debug(DEBUG_TX, "tx id %u skb 0x%p payload %u rate 0x%x", + tx_hdr->id, skb, tx_hdr->length, tx_hdr->rate); + + return 0; +} + +static void wl12xx_tx_trigger(struct wl12xx *wl) +{ + u32 data, addr; + + if (wl->data_in_count & 0x1) { + addr = ACX_REG_INTERRUPT_TRIG_H; + data = INTR_TRIG_TX_PROC1; + } else { + addr = ACX_REG_INTERRUPT_TRIG; + data = INTR_TRIG_TX_PROC0; + } + + wl12xx_reg_write32(wl, addr, data); + + /* Bumping data in */ + wl->data_in_count = (wl->data_in_count + 1) & + TX_STATUS_DATA_OUT_COUNT_MASK; +} + +/* caller must hold wl->mutex */ +static int wl12xx_tx_frame(struct wl12xx *wl, struct sk_buff *skb) +{ + struct ieee80211_tx_info *info; + int ret = 0; + u8 idx; + + info = IEEE80211_SKB_CB(skb); + + if (info->control.hw_key) { + idx = info->control.hw_key->hw_key_idx; + if (unlikely(wl->default_key != idx)) { + ret = wl12xx_acx_default_key(wl, idx); + if (ret < 0) + return ret; + } + } + + ret = wl12xx_tx_path_status(wl); + if (ret < 0) + return ret; + + ret = wl12xx_tx_fill_hdr(wl, skb, info); + if (ret < 0) + return ret; + + ret = wl12xx_tx_send_packet(wl, skb, info); + if (ret < 0) + return ret; + + wl12xx_tx_trigger(wl); + + return ret; +} + +void wl12xx_tx_work(struct work_struct *work) +{ + struct wl12xx *wl = container_of(work, struct wl12xx, tx_work); + struct sk_buff *skb; + bool woken_up = false; + int ret; + + mutex_lock(&wl->mutex); + + if (unlikely(wl->state == WL12XX_STATE_OFF)) + goto out; + + while ((skb = skb_dequeue(&wl->tx_queue))) { + if (!woken_up) { + wl12xx_ps_elp_wakeup(wl); + woken_up = true; + } + + ret = wl12xx_tx_frame(wl, skb); + if (ret == -EBUSY) { + /* firmware buffer is full, stop queues */ + wl12xx_debug(DEBUG_TX, "tx_work: fw buffer full, " + "stop queues"); + ieee80211_stop_queues(wl->hw); + wl->tx_queue_stopped = true; + skb_queue_head(&wl->tx_queue, skb); + goto out; + } else if (ret < 0) { + dev_kfree_skb(skb); + goto out; + } + } + +out: + if (woken_up) + wl12xx_ps_elp_sleep(wl); + + mutex_unlock(&wl->mutex); +} + +static const char *wl12xx_tx_parse_status(u8 status) +{ + /* 8 bit status field, one character per bit plus null */ + static char buf[9]; + int i = 0; + + memset(buf, 0, sizeof(buf)); + + if (status & TX_DMA_ERROR) + buf[i++] = 'm'; + if (status & TX_DISABLED) + buf[i++] = 'd'; + if (status & TX_RETRY_EXCEEDED) + buf[i++] = 'r'; + if (status & TX_TIMEOUT) + buf[i++] = 't'; + if (status & TX_KEY_NOT_FOUND) + buf[i++] = 'k'; + if (status & TX_ENCRYPT_FAIL) + buf[i++] = 'e'; + if (status & TX_UNAVAILABLE_PRIORITY) + buf[i++] = 'p'; + + /* bit 0 is unused apparently */ + + return buf; +} + +static void wl12xx_tx_packet_cb(struct wl12xx *wl, + struct tx_result *result) +{ + struct ieee80211_tx_info *info; + struct sk_buff *skb; + int hdrlen, ret; + u8 *frame; + + skb = wl->tx_frames[result->id]; + if (skb == NULL) { + wl12xx_error("SKB for packet %d is NULL", result->id); + return; + } + + info = IEEE80211_SKB_CB(skb); + + if (!(info->flags & IEEE80211_TX_CTL_NO_ACK) && + (result->status == TX_SUCCESS)) + info->flags |= IEEE80211_TX_STAT_ACK; + + info->status.rates[0].count = result->ack_failures + 1; + wl->stats.retry_count += result->ack_failures; + + /* + * We have to remove our private TX header before pushing + * the skb back to mac80211. + */ + frame = skb_pull(skb, sizeof(struct tx_double_buffer_desc)); + if (info->control.hw_key && + info->control.hw_key->alg == ALG_TKIP) { + hdrlen = ieee80211_get_hdrlen_from_skb(skb); + memmove(frame + WL12XX_TKIP_IV_SPACE, frame, hdrlen); + skb_pull(skb, WL12XX_TKIP_IV_SPACE); + } + + wl12xx_debug(DEBUG_TX, "tx status id %u skb 0x%p failures %u rate 0x%x" + " status 0x%x (%s)", + result->id, skb, result->ack_failures, result->rate, + result->status, wl12xx_tx_parse_status(result->status)); + + + ieee80211_tx_status(wl->hw, skb); + + wl->tx_frames[result->id] = NULL; + + if (wl->tx_queue_stopped) { + wl12xx_debug(DEBUG_TX, "cb: queue was stopped"); + + skb = skb_dequeue(&wl->tx_queue); + + /* The skb can be NULL because tx_work might have been + scheduled before the queue was stopped making the + queue empty */ + + if (skb) { + ret = wl12xx_tx_frame(wl, skb); + if (ret == -EBUSY) { + /* firmware buffer is still full */ + wl12xx_debug(DEBUG_TX, "cb: fw buffer " + "still full"); + skb_queue_head(&wl->tx_queue, skb); + return; + } else if (ret < 0) { + dev_kfree_skb(skb); + return; + } + } + + wl12xx_debug(DEBUG_TX, "cb: waking queues"); + ieee80211_wake_queues(wl->hw); + wl->tx_queue_stopped = false; + } +} + +/* Called upon reception of a TX complete interrupt */ +void wl12xx_tx_complete(struct wl12xx *wl) +{ + int i, result_index, num_complete = 0; + struct tx_result result[FW_TX_CMPLT_BLOCK_SIZE], *result_ptr; + + if (unlikely(wl->state != WL12XX_STATE_ON)) + return; + + /* First we read the result */ + wl12xx_spi_mem_read(wl, wl->data_path->tx_complete_addr, + result, sizeof(result)); + + result_index = wl->next_tx_complete; + + for (i = 0; i < ARRAY_SIZE(result); i++) { + result_ptr = &result[result_index]; + + if (result_ptr->done_1 == 1 && + result_ptr->done_2 == 1) { + wl12xx_tx_packet_cb(wl, result_ptr); + + result_ptr->done_1 = 0; + result_ptr->done_2 = 0; + + result_index = (result_index + 1) & + (FW_TX_CMPLT_BLOCK_SIZE - 1); + num_complete++; + } else { + break; + } + } + + /* Every completed frame needs to be acknowledged */ + if (num_complete) { + /* + * If we've wrapped, we have to clear + * the results in 2 steps. + */ + if (result_index > wl->next_tx_complete) { + /* Only 1 write is needed */ + wl12xx_spi_mem_write(wl, + wl->data_path->tx_complete_addr + + (wl->next_tx_complete * + sizeof(struct tx_result)), + &result[wl->next_tx_complete], + num_complete * + sizeof(struct tx_result)); + + + } else if (result_index < wl->next_tx_complete) { + /* 2 writes are needed */ + wl12xx_spi_mem_write(wl, + wl->data_path->tx_complete_addr + + (wl->next_tx_complete * + sizeof(struct tx_result)), + &result[wl->next_tx_complete], + (FW_TX_CMPLT_BLOCK_SIZE - + wl->next_tx_complete) * + sizeof(struct tx_result)); + + wl12xx_spi_mem_write(wl, + wl->data_path->tx_complete_addr, + result, + (num_complete - + FW_TX_CMPLT_BLOCK_SIZE + + wl->next_tx_complete) * + sizeof(struct tx_result)); + + } else { + /* We have to write the whole array */ + wl12xx_spi_mem_write(wl, + wl->data_path->tx_complete_addr, + result, + FW_TX_CMPLT_BLOCK_SIZE * + sizeof(struct tx_result)); + } + + } + + wl->next_tx_complete = result_index; +} + +/* caller must hold wl->mutex */ +void wl12xx_tx_flush(struct wl12xx *wl) +{ + int i; + struct sk_buff *skb; + struct ieee80211_tx_info *info; + + /* TX failure */ +/* control->flags = 0; FIXME */ + + while ((skb = skb_dequeue(&wl->tx_queue))) { + info = IEEE80211_SKB_CB(skb); + + wl12xx_debug(DEBUG_TX, "flushing skb 0x%p", skb); + + if (!(info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS)) + continue; + + ieee80211_tx_status(wl->hw, skb); + } + + for (i = 0; i < FW_TX_CMPLT_BLOCK_SIZE; i++) + if (wl->tx_frames[i] != NULL) { + skb = wl->tx_frames[i]; + info = IEEE80211_SKB_CB(skb); + + if (!(info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS)) + continue; + + ieee80211_tx_status(wl->hw, skb); + wl->tx_frames[i] = NULL; + } +} diff --git a/drivers/net/wireless/wl12xx/tx.h b/drivers/net/wireless/wl12xx/tx.h new file mode 100644 index 00000000000..dc82691f4c1 --- /dev/null +++ b/drivers/net/wireless/wl12xx/tx.h @@ -0,0 +1,215 @@ +/* + * This file is part of wl12xx + * + * Copyright (c) 1998-2007 Texas Instruments Incorporated + * Copyright (C) 2008 Nokia Corporation + * + * Contact: Kalle Valo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#ifndef __WL12XX_TX_H__ +#define __WL12XX_TX_H__ + +#include + +/* + * + * TX PATH + * + * The Tx path uses a double buffer and a tx_control structure, each located + * at a fixed address in the device's memory. On startup, the host retrieves + * the pointers to these addresses. A double buffer allows for continuous data + * flow towards the device. The host keeps track of which buffer is available + * and alternates between these two buffers on a per packet basis. + * + * The size of each of the two buffers is large enough to hold the longest + * 802.3 packet - maximum size Ethernet packet + header + descriptor. + * TX complete indication will be received a-synchronously in a TX done cyclic + * buffer which is composed of 16 tx_result descriptors structures and is used + * in a cyclic manner. + * + * The TX (HOST) procedure is as follows: + * 1. Read the Tx path status, that will give the data_out_count. + * 2. goto 1, if not possible. + * i.e. if data_in_count - data_out_count >= HwBuffer size (2 for double + * buffer). + * 3. Copy the packet (preceded by double_buffer_desc), if possible. + * i.e. if data_in_count - data_out_count < HwBuffer size (2 for double + * buffer). + * 4. increment data_in_count. + * 5. Inform the firmware by generating a firmware internal interrupt. + * 6. FW will increment data_out_count after it reads the buffer. + * + * The TX Complete procedure: + * 1. To get a TX complete indication the host enables the tx_complete flag in + * the TX descriptor Structure. + * 2. For each packet with a Tx Complete field set, the firmware adds the + * transmit results to the cyclic buffer (txDoneRing) and sets both done_1 + * and done_2 to 1 to indicate driver ownership. + * 3. The firmware sends a Tx Complete interrupt to the host to trigger the + * host to process the new data. Note: interrupt will be send per packet if + * TX complete indication was requested in tx_control or per crossing + * aggregation threshold. + * 4. After receiving the Tx Complete interrupt, the host reads the + * TxDescriptorDone information in a cyclic manner and clears both done_1 + * and done_2 fields. + * + */ + +#define TX_COMPLETE_REQUIRED_BIT 0x80 +#define TX_STATUS_DATA_OUT_COUNT_MASK 0xf +#define WL12XX_TX_ALIGN_TO 4 +#define WL12XX_TX_ALIGN(len) (((len) + WL12XX_TX_ALIGN_TO - 1) & \ + ~(WL12XX_TX_ALIGN_TO - 1)) +#define WL12XX_TKIP_IV_SPACE 4 + +struct tx_control { + /* Rate Policy (class) index */ + unsigned rate_policy:3; + + /* When set, no ack policy is expected */ + unsigned ack_policy:1; + + /* + * Packet type: + * 0 -> 802.11 + * 1 -> 802.3 + * 2 -> IP + * 3 -> raw codec + */ + unsigned packet_type:2; + + /* If set, this is a QoS-Null or QoS-Data frame */ + unsigned qos:1; + + /* + * If set, the target triggers the tx complete INT + * upon frame sending completion. + */ + unsigned tx_complete:1; + + /* 2 bytes padding before packet header */ + unsigned xfer_pad:1; + + unsigned reserved:7; +} __attribute__ ((packed)); + + +struct tx_double_buffer_desc { + /* Length of payload, including headers. */ + u16 length; + + /* + * A bit mask that specifies the initial rate to be used + * Possible values are: + * 0x0001 - 1Mbits + * 0x0002 - 2Mbits + * 0x0004 - 5.5Mbits + * 0x0008 - 6Mbits + * 0x0010 - 9Mbits + * 0x0020 - 11Mbits + * 0x0040 - 12Mbits + * 0x0080 - 18Mbits + * 0x0100 - 22Mbits + * 0x0200 - 24Mbits + * 0x0400 - 36Mbits + * 0x0800 - 48Mbits + * 0x1000 - 54Mbits + */ + u16 rate; + + /* Time in us that a packet can spend in the target */ + u32 expiry_time; + + /* index of the TX queue used for this packet */ + u8 xmit_queue; + + /* Used to identify a packet */ + u8 id; + + struct tx_control control; + + /* + * The FW should cut the packet into fragments + * of this size. + */ + u16 frag_threshold; + + /* Numbers of HW queue blocks to be allocated */ + u8 num_mem_blocks; + + u8 reserved; +} __attribute__ ((packed)); + +enum { + TX_SUCCESS = 0, + TX_DMA_ERROR = BIT(7), + TX_DISABLED = BIT(6), + TX_RETRY_EXCEEDED = BIT(5), + TX_TIMEOUT = BIT(4), + TX_KEY_NOT_FOUND = BIT(3), + TX_ENCRYPT_FAIL = BIT(2), + TX_UNAVAILABLE_PRIORITY = BIT(1), +}; + +struct tx_result { + /* + * Ownership synchronization between the host and + * the firmware. If done_1 and done_2 are cleared, + * owned by the FW (no info ready). + */ + u8 done_1; + + /* same as double_buffer_desc->id */ + u8 id; + + /* + * Total air access duration consumed by this + * packet, including all retries and overheads. + */ + u16 medium_usage; + + /* Total media delay (from 1st EDCA AIFS counter until TX Complete). */ + u32 medium_delay; + + /* Time between host xfer and tx complete */ + u32 fw_hnadling_time; + + /* The LS-byte of the last TKIP sequence number. */ + u8 lsb_seq_num; + + /* Retry count */ + u8 ack_failures; + + /* At which rate we got a ACK */ + u16 rate; + + u16 reserved; + + /* TX_* */ + u8 status; + + /* See done_1 */ + u8 done_2; +} __attribute__ ((packed)); + +void wl12xx_tx_work(struct work_struct *work); +void wl12xx_tx_complete(struct wl12xx *wl); +void wl12xx_tx_flush(struct wl12xx *wl); + +#endif diff --git a/drivers/net/wireless/wl12xx/wl1251.c b/drivers/net/wireless/wl12xx/wl1251.c new file mode 100644 index 00000000000..bd0decdcad2 --- /dev/null +++ b/drivers/net/wireless/wl12xx/wl1251.c @@ -0,0 +1,709 @@ +/* + * This file is part of wl12xx + * + * Copyright (C) 2008-2009 Nokia Corporation + * + * Contact: Kalle Valo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#include +#include + +#include "wl1251.h" +#include "reg.h" +#include "spi.h" +#include "boot.h" +#include "event.h" +#include "acx.h" +#include "tx.h" +#include "rx.h" +#include "ps.h" +#include "init.h" + +static struct wl12xx_partition_set wl1251_part_table[PART_TABLE_LEN] = { + [PART_DOWN] = { + .mem = { + .start = 0x00000000, + .size = 0x00016800 + }, + .reg = { + .start = REGISTERS_BASE, + .size = REGISTERS_DOWN_SIZE + }, + }, + + [PART_WORK] = { + .mem = { + .start = 0x00028000, + .size = 0x00014000 + }, + .reg = { + .start = REGISTERS_BASE, + .size = REGISTERS_WORK_SIZE + }, + }, + + /* WL1251 doesn't use the DRPW partition, so we don't set it here */ +}; + +static enum wl12xx_acx_int_reg wl1251_acx_reg_table[ACX_REG_TABLE_LEN] = { + [ACX_REG_INTERRUPT_TRIG] = (REGISTERS_BASE + 0x0474), + [ACX_REG_INTERRUPT_TRIG_H] = (REGISTERS_BASE + 0x0478), + [ACX_REG_INTERRUPT_MASK] = (REGISTERS_BASE + 0x0494), + [ACX_REG_HINT_MASK_SET] = (REGISTERS_BASE + 0x0498), + [ACX_REG_HINT_MASK_CLR] = (REGISTERS_BASE + 0x049C), + [ACX_REG_INTERRUPT_NO_CLEAR] = (REGISTERS_BASE + 0x04B0), + [ACX_REG_INTERRUPT_CLEAR] = (REGISTERS_BASE + 0x04A4), + [ACX_REG_INTERRUPT_ACK] = (REGISTERS_BASE + 0x04A8), + [ACX_REG_SLV_SOFT_RESET] = (REGISTERS_BASE + 0x0000), + [ACX_REG_EE_START] = (REGISTERS_BASE + 0x080C), + [ACX_REG_ECPU_CONTROL] = (REGISTERS_BASE + 0x0804) +}; + +static int wl1251_upload_firmware(struct wl12xx *wl) +{ + struct wl12xx_partition_set *p_table = wl->chip.p_table; + int addr, chunk_num, partition_limit; + size_t fw_data_len; + u8 *p; + + /* whal_FwCtrl_LoadFwImageSm() */ + + wl12xx_debug(DEBUG_BOOT, "chip id before fw upload: 0x%x", + wl12xx_reg_read32(wl, CHIP_ID_B)); + + /* 10.0 check firmware length and set partition */ + fw_data_len = (wl->fw[4] << 24) | (wl->fw[5] << 16) | + (wl->fw[6] << 8) | (wl->fw[7]); + + wl12xx_debug(DEBUG_BOOT, "fw_data_len %d chunk_size %d", fw_data_len, + CHUNK_SIZE); + + if ((fw_data_len % 4) != 0) { + wl12xx_error("firmware length not multiple of four"); + return -EIO; + } + + wl12xx_set_partition(wl, + p_table[PART_DOWN].mem.start, + p_table[PART_DOWN].mem.size, + p_table[PART_DOWN].reg.start, + p_table[PART_DOWN].reg.size); + + /* 10.1 set partition limit and chunk num */ + chunk_num = 0; + partition_limit = p_table[PART_DOWN].mem.size; + + while (chunk_num < fw_data_len / CHUNK_SIZE) { + /* 10.2 update partition, if needed */ + addr = p_table[PART_DOWN].mem.start + + (chunk_num + 2) * CHUNK_SIZE; + if (addr > partition_limit) { + addr = p_table[PART_DOWN].mem.start + + chunk_num * CHUNK_SIZE; + partition_limit = chunk_num * CHUNK_SIZE + + p_table[PART_DOWN].mem.size; + wl12xx_set_partition(wl, + addr, + p_table[PART_DOWN].mem.size, + p_table[PART_DOWN].reg.start, + p_table[PART_DOWN].reg.size); + } + + /* 10.3 upload the chunk */ + addr = p_table[PART_DOWN].mem.start + chunk_num * CHUNK_SIZE; + p = wl->fw + FW_HDR_SIZE + chunk_num * CHUNK_SIZE; + wl12xx_debug(DEBUG_BOOT, "uploading fw chunk 0x%p to 0x%x", + p, addr); + wl12xx_spi_mem_write(wl, addr, p, CHUNK_SIZE); + + chunk_num++; + } + + /* 10.4 upload the last chunk */ + addr = p_table[PART_DOWN].mem.start + chunk_num * CHUNK_SIZE; + p = wl->fw + FW_HDR_SIZE + chunk_num * CHUNK_SIZE; + wl12xx_debug(DEBUG_BOOT, "uploading fw last chunk (%d B) 0x%p to 0x%x", + fw_data_len % CHUNK_SIZE, p, addr); + wl12xx_spi_mem_write(wl, addr, p, fw_data_len % CHUNK_SIZE); + + return 0; +} + +static int wl1251_upload_nvs(struct wl12xx *wl) +{ + size_t nvs_len, nvs_bytes_written, burst_len; + int nvs_start, i; + u32 dest_addr, val; + u8 *nvs_ptr, *nvs; + + nvs = wl->nvs; + if (nvs == NULL) + return -ENODEV; + + nvs_ptr = nvs; + + nvs_len = wl->nvs_len; + nvs_start = wl->fw_len; + + /* + * Layout before the actual NVS tables: + * 1 byte : burst length. + * 2 bytes: destination address. + * n bytes: data to burst copy. + * + * This is ended by a 0 length, then the NVS tables. + */ + + while (nvs_ptr[0]) { + burst_len = nvs_ptr[0]; + dest_addr = (nvs_ptr[1] & 0xfe) | ((u32)(nvs_ptr[2] << 8)); + + /* We move our pointer to the data */ + nvs_ptr += 3; + + for (i = 0; i < burst_len; i++) { + val = (nvs_ptr[0] | (nvs_ptr[1] << 8) + | (nvs_ptr[2] << 16) | (nvs_ptr[3] << 24)); + + wl12xx_debug(DEBUG_BOOT, + "nvs burst write 0x%x: 0x%x", + dest_addr, val); + wl12xx_mem_write32(wl, dest_addr, val); + + nvs_ptr += 4; + dest_addr += 4; + } + } + + /* + * We've reached the first zero length, the first NVS table + * is 7 bytes further. + */ + nvs_ptr += 7; + nvs_len -= nvs_ptr - nvs; + nvs_len = ALIGN(nvs_len, 4); + + /* Now we must set the partition correctly */ + wl12xx_set_partition(wl, nvs_start, + wl->chip.p_table[PART_DOWN].mem.size, + wl->chip.p_table[PART_DOWN].reg.start, + wl->chip.p_table[PART_DOWN].reg.size); + + /* And finally we upload the NVS tables */ + nvs_bytes_written = 0; + while (nvs_bytes_written < nvs_len) { + val = (nvs_ptr[0] | (nvs_ptr[1] << 8) + | (nvs_ptr[2] << 16) | (nvs_ptr[3] << 24)); + + val = cpu_to_le32(val); + + wl12xx_debug(DEBUG_BOOT, + "nvs write table 0x%x: 0x%x", + nvs_start, val); + wl12xx_mem_write32(wl, nvs_start, val); + + nvs_ptr += 4; + nvs_bytes_written += 4; + nvs_start += 4; + } + + return 0; +} + +static int wl1251_boot(struct wl12xx *wl) +{ + int ret = 0, minor_minor_e2_ver; + u32 tmp, boot_data; + + ret = wl12xx_boot_soft_reset(wl); + if (ret < 0) + goto out; + + /* 2. start processing NVS file */ + ret = wl->chip.op_upload_nvs(wl); + if (ret < 0) + goto out; + + /* write firmware's last address (ie. it's length) to + * ACX_EEPROMLESS_IND_REG */ + wl12xx_reg_write32(wl, ACX_EEPROMLESS_IND_REG, wl->fw_len); + + /* 6. read the EEPROM parameters */ + tmp = wl12xx_reg_read32(wl, SCR_PAD2); + + /* 7. read bootdata */ + wl->boot_attr.radio_type = (tmp & 0x0000FF00) >> 8; + wl->boot_attr.major = (tmp & 0x00FF0000) >> 16; + tmp = wl12xx_reg_read32(wl, SCR_PAD3); + + /* 8. check bootdata and call restart sequence */ + wl->boot_attr.minor = (tmp & 0x00FF0000) >> 16; + minor_minor_e2_ver = (tmp & 0xFF000000) >> 24; + + wl12xx_debug(DEBUG_BOOT, "radioType 0x%x majorE2Ver 0x%x " + "minorE2Ver 0x%x minor_minor_e2_ver 0x%x", + wl->boot_attr.radio_type, wl->boot_attr.major, + wl->boot_attr.minor, minor_minor_e2_ver); + + ret = wl12xx_boot_init_seq(wl); + if (ret < 0) + goto out; + + /* 9. NVS processing done */ + boot_data = wl12xx_reg_read32(wl, ACX_REG_ECPU_CONTROL); + + wl12xx_debug(DEBUG_BOOT, "halt boot_data 0x%x", boot_data); + + /* 10. check that ECPU_CONTROL_HALT bits are set in + * pWhalBus->uBootData and start uploading firmware + */ + if ((boot_data & ECPU_CONTROL_HALT) == 0) { + wl12xx_error("boot failed, ECPU_CONTROL_HALT not set"); + ret = -EIO; + goto out; + } + + ret = wl->chip.op_upload_fw(wl); + if (ret < 0) + goto out; + + /* 10.5 start firmware */ + ret = wl12xx_boot_run_firmware(wl); + if (ret < 0) + goto out; + + /* Get and save the firmware version */ + wl12xx_acx_fw_version(wl, wl->chip.fw_ver, sizeof(wl->chip.fw_ver)); + +out: + return ret; +} + +static int wl1251_mem_cfg(struct wl12xx *wl) +{ + struct wl1251_acx_config_memory mem_conf; + int ret, i; + + wl12xx_debug(DEBUG_ACX, "wl1251 mem cfg"); + + /* memory config */ + mem_conf.mem_config.num_stations = cpu_to_le16(DEFAULT_NUM_STATIONS); + mem_conf.mem_config.rx_mem_block_num = 35; + mem_conf.mem_config.tx_min_mem_block_num = 64; + mem_conf.mem_config.num_tx_queues = MAX_TX_QUEUES; + mem_conf.mem_config.host_if_options = HOSTIF_PKT_RING; + mem_conf.mem_config.num_ssid_profiles = 1; + mem_conf.mem_config.debug_buffer_size = + cpu_to_le16(TRACE_BUFFER_MAX_SIZE); + + /* RX queue config */ + mem_conf.rx_queue_config.dma_address = 0; + mem_conf.rx_queue_config.num_descs = ACX_RX_DESC_DEF; + mem_conf.rx_queue_config.priority = DEFAULT_RXQ_PRIORITY; + mem_conf.rx_queue_config.type = DEFAULT_RXQ_TYPE; + + /* TX queue config */ + for (i = 0; i < MAX_TX_QUEUES; i++) { + mem_conf.tx_queue_config[i].num_descs = ACX_TX_DESC_DEF; + mem_conf.tx_queue_config[i].attributes = i; + } + + mem_conf.header.id = ACX_MEM_CFG; + mem_conf.header.len = sizeof(struct wl1251_acx_config_memory) - + sizeof(struct acx_header); + mem_conf.header.len -= + (MAX_TX_QUEUE_CONFIGS - mem_conf.mem_config.num_tx_queues) * + sizeof(struct wl1251_acx_tx_queue_config); + + ret = wl12xx_cmd_configure(wl, &mem_conf, + sizeof(struct wl1251_acx_config_memory)); + if (ret < 0) + wl12xx_warning("wl1251 mem config failed: %d", ret); + + return ret; +} + +static int wl1251_hw_init_mem_config(struct wl12xx *wl) +{ + int ret; + + ret = wl1251_mem_cfg(wl); + if (ret < 0) + return ret; + + wl->target_mem_map = kzalloc(sizeof(struct wl1251_acx_mem_map), + GFP_KERNEL); + if (!wl->target_mem_map) { + wl12xx_error("couldn't allocate target memory map"); + return -ENOMEM; + } + + /* we now ask for the firmware built memory map */ + ret = wl12xx_acx_mem_map(wl, wl->target_mem_map, + sizeof(struct wl1251_acx_mem_map)); + if (ret < 0) { + wl12xx_error("couldn't retrieve firmware memory map"); + kfree(wl->target_mem_map); + wl->target_mem_map = NULL; + return ret; + } + + return 0; +} + +static void wl1251_set_ecpu_ctrl(struct wl12xx *wl, u32 flag) +{ + u32 cpu_ctrl; + + /* 10.5.0 run the firmware (I) */ + cpu_ctrl = wl12xx_reg_read32(wl, ACX_REG_ECPU_CONTROL); + + /* 10.5.1 run the firmware (II) */ + cpu_ctrl &= ~flag; + wl12xx_reg_write32(wl, ACX_REG_ECPU_CONTROL, cpu_ctrl); +} + +static void wl1251_target_enable_interrupts(struct wl12xx *wl) +{ + /* Enable target's interrupts */ + wl->intr_mask = WL1251_ACX_INTR_RX0_DATA | + WL1251_ACX_INTR_RX1_DATA | + WL1251_ACX_INTR_TX_RESULT | + WL1251_ACX_INTR_EVENT_A | + WL1251_ACX_INTR_EVENT_B | + WL1251_ACX_INTR_INIT_COMPLETE; + wl12xx_boot_target_enable_interrupts(wl); +} + +static void wl1251_irq_work(struct work_struct *work) +{ + u32 intr; + struct wl12xx *wl = + container_of(work, struct wl12xx, irq_work); + + mutex_lock(&wl->mutex); + + wl12xx_debug(DEBUG_IRQ, "IRQ work"); + + if (wl->state == WL12XX_STATE_OFF) + goto out; + + wl12xx_ps_elp_wakeup(wl); + + wl12xx_reg_write32(wl, ACX_REG_INTERRUPT_MASK, WL1251_ACX_INTR_ALL); + + intr = wl12xx_reg_read32(wl, ACX_REG_INTERRUPT_CLEAR); + wl12xx_debug(DEBUG_IRQ, "intr: 0x%x", intr); + + if (wl->data_path) { + wl12xx_spi_mem_read(wl, wl->data_path->rx_control_addr, + &wl->rx_counter, sizeof(u32)); + + /* We handle a frmware bug here */ + switch ((wl->rx_counter - wl->rx_handled) & 0xf) { + case 0: + wl12xx_debug(DEBUG_IRQ, "RX: FW and host in sync"); + intr &= ~WL1251_ACX_INTR_RX0_DATA; + intr &= ~WL1251_ACX_INTR_RX1_DATA; + break; + case 1: + wl12xx_debug(DEBUG_IRQ, "RX: FW +1"); + intr |= WL1251_ACX_INTR_RX0_DATA; + intr &= ~WL1251_ACX_INTR_RX1_DATA; + break; + case 2: + wl12xx_debug(DEBUG_IRQ, "RX: FW +2"); + intr |= WL1251_ACX_INTR_RX0_DATA; + intr |= WL1251_ACX_INTR_RX1_DATA; + break; + default: + wl12xx_warning("RX: FW and host out of sync: %d", + wl->rx_counter - wl->rx_handled); + break; + } + + wl->rx_handled = wl->rx_counter; + + + wl12xx_debug(DEBUG_IRQ, "RX counter: %d", wl->rx_counter); + } + + intr &= wl->intr_mask; + + if (intr == 0) { + wl12xx_debug(DEBUG_IRQ, "INTR is 0"); + wl12xx_reg_write32(wl, ACX_REG_INTERRUPT_MASK, + ~(wl->intr_mask)); + + goto out_sleep; + } + + if (intr & WL1251_ACX_INTR_RX0_DATA) { + wl12xx_debug(DEBUG_IRQ, "WL1251_ACX_INTR_RX0_DATA"); + wl12xx_rx(wl); + } + + if (intr & WL1251_ACX_INTR_RX1_DATA) { + wl12xx_debug(DEBUG_IRQ, "WL1251_ACX_INTR_RX1_DATA"); + wl12xx_rx(wl); + } + + if (intr & WL1251_ACX_INTR_TX_RESULT) { + wl12xx_debug(DEBUG_IRQ, "WL1251_ACX_INTR_TX_RESULT"); + wl12xx_tx_complete(wl); + } + + if (intr & (WL1251_ACX_INTR_EVENT_A | WL1251_ACX_INTR_EVENT_B)) { + wl12xx_debug(DEBUG_IRQ, "WL1251_ACX_INTR_EVENT (0x%x)", intr); + if (intr & WL1251_ACX_INTR_EVENT_A) + wl12xx_event_handle(wl, 0); + else + wl12xx_event_handle(wl, 1); + } + + if (intr & WL1251_ACX_INTR_INIT_COMPLETE) + wl12xx_debug(DEBUG_IRQ, "WL1251_ACX_INTR_INIT_COMPLETE"); + + wl12xx_reg_write32(wl, ACX_REG_INTERRUPT_MASK, ~(wl->intr_mask)); + +out_sleep: + wl12xx_ps_elp_sleep(wl); +out: + mutex_unlock(&wl->mutex); +} + +static int wl1251_hw_init_txq_fill(u8 qid, + struct acx_tx_queue_qos_config *config, + u32 num_blocks) +{ + config->qid = qid; + + switch (qid) { + case QOS_AC_BE: + config->high_threshold = + (QOS_TX_HIGH_BE_DEF * num_blocks) / 100; + config->low_threshold = + (QOS_TX_LOW_BE_DEF * num_blocks) / 100; + break; + case QOS_AC_BK: + config->high_threshold = + (QOS_TX_HIGH_BK_DEF * num_blocks) / 100; + config->low_threshold = + (QOS_TX_LOW_BK_DEF * num_blocks) / 100; + break; + case QOS_AC_VI: + config->high_threshold = + (QOS_TX_HIGH_VI_DEF * num_blocks) / 100; + config->low_threshold = + (QOS_TX_LOW_VI_DEF * num_blocks) / 100; + break; + case QOS_AC_VO: + config->high_threshold = + (QOS_TX_HIGH_VO_DEF * num_blocks) / 100; + config->low_threshold = + (QOS_TX_LOW_VO_DEF * num_blocks) / 100; + break; + default: + wl12xx_error("Invalid TX queue id: %d", qid); + return -EINVAL; + } + + return 0; +} + +static int wl1251_hw_init_tx_queue_config(struct wl12xx *wl) +{ + struct acx_tx_queue_qos_config config; + struct wl1251_acx_mem_map *wl_mem_map = wl->target_mem_map; + int ret, i; + + wl12xx_debug(DEBUG_ACX, "acx tx queue config"); + + config.header.id = ACX_TX_QUEUE_CFG; + config.header.len = sizeof(struct acx_tx_queue_qos_config) - + sizeof(struct acx_header); + + for (i = 0; i < MAX_NUM_OF_AC; i++) { + ret = wl1251_hw_init_txq_fill(i, &config, + wl_mem_map->num_tx_mem_blocks); + if (ret < 0) + return ret; + + ret = wl12xx_cmd_configure(wl, &config, sizeof(config)); + if (ret < 0) + return ret; + } + + return 0; +} + +static int wl1251_hw_init_data_path_config(struct wl12xx *wl) +{ + int ret; + + /* asking for the data path parameters */ + wl->data_path = kzalloc(sizeof(struct acx_data_path_params_resp), + GFP_KERNEL); + if (!wl->data_path) { + wl12xx_error("Couldnt allocate data path parameters"); + return -ENOMEM; + } + + ret = wl12xx_acx_data_path_params(wl, wl->data_path); + if (ret < 0) { + kfree(wl->data_path); + wl->data_path = NULL; + return ret; + } + + return 0; +} + +static int wl1251_hw_init(struct wl12xx *wl) +{ + struct wl1251_acx_mem_map *wl_mem_map; + int ret; + + ret = wl12xx_hw_init_hwenc_config(wl); + if (ret < 0) + return ret; + + /* Template settings */ + ret = wl12xx_hw_init_templates_config(wl); + if (ret < 0) + return ret; + + /* Default memory configuration */ + ret = wl1251_hw_init_mem_config(wl); + if (ret < 0) + return ret; + + /* Default data path configuration */ + ret = wl1251_hw_init_data_path_config(wl); + if (ret < 0) + goto out_free_memmap; + + /* RX config */ + ret = wl12xx_hw_init_rx_config(wl, + RX_CFG_PROMISCUOUS | RX_CFG_TSF, + RX_FILTER_OPTION_DEF); + /* RX_CONFIG_OPTION_ANY_DST_ANY_BSS, + RX_FILTER_OPTION_FILTER_ALL); */ + if (ret < 0) + goto out_free_data_path; + + /* TX queues config */ + ret = wl1251_hw_init_tx_queue_config(wl); + if (ret < 0) + goto out_free_data_path; + + /* PHY layer config */ + ret = wl12xx_hw_init_phy_config(wl); + if (ret < 0) + goto out_free_data_path; + + /* Beacon filtering */ + ret = wl12xx_hw_init_beacon_filter(wl); + if (ret < 0) + goto out_free_data_path; + + /* Bluetooth WLAN coexistence */ + ret = wl12xx_hw_init_pta(wl); + if (ret < 0) + goto out_free_data_path; + + /* Energy detection */ + ret = wl12xx_hw_init_energy_detection(wl); + if (ret < 0) + goto out_free_data_path; + + /* Beacons and boradcast settings */ + ret = wl12xx_hw_init_beacon_broadcast(wl); + if (ret < 0) + goto out_free_data_path; + + /* Enable data path */ + ret = wl12xx_cmd_data_path(wl, wl->channel, 1); + if (ret < 0) + goto out_free_data_path; + + /* Default power state */ + ret = wl12xx_hw_init_power_auth(wl); + if (ret < 0) + goto out_free_data_path; + + wl_mem_map = wl->target_mem_map; + wl12xx_info("%d tx blocks at 0x%x, %d rx blocks at 0x%x", + wl_mem_map->num_tx_mem_blocks, + wl->data_path->tx_control_addr, + wl_mem_map->num_rx_mem_blocks, + wl->data_path->rx_control_addr); + + return 0; + + out_free_data_path: + kfree(wl->data_path); + + out_free_memmap: + kfree(wl->target_mem_map); + + return ret; +} + +static int wl1251_plt_init(struct wl12xx *wl) +{ + int ret; + + ret = wl1251_hw_init_mem_config(wl); + if (ret < 0) + return ret; + + ret = wl12xx_cmd_data_path(wl, wl->channel, 1); + if (ret < 0) + return ret; + + return 0; +} + +void wl1251_setup(struct wl12xx *wl) +{ + /* FIXME: Is it better to use strncpy here or is this ok? */ + wl->chip.fw_filename = WL1251_FW_NAME; + wl->chip.nvs_filename = WL1251_NVS_NAME; + + /* Now we know what chip we're using, so adjust the power on sleep + * time accordingly */ + wl->chip.power_on_sleep = WL1251_POWER_ON_SLEEP; + + wl->chip.intr_cmd_complete = WL1251_ACX_INTR_CMD_COMPLETE; + wl->chip.intr_init_complete = WL1251_ACX_INTR_INIT_COMPLETE; + + wl->chip.op_upload_nvs = wl1251_upload_nvs; + wl->chip.op_upload_fw = wl1251_upload_firmware; + wl->chip.op_boot = wl1251_boot; + wl->chip.op_set_ecpu_ctrl = wl1251_set_ecpu_ctrl; + wl->chip.op_target_enable_interrupts = wl1251_target_enable_interrupts; + wl->chip.op_hw_init = wl1251_hw_init; + wl->chip.op_plt_init = wl1251_plt_init; + + wl->chip.p_table = wl1251_part_table; + wl->chip.acx_reg_table = wl1251_acx_reg_table; + + INIT_WORK(&wl->irq_work, wl1251_irq_work); +} diff --git a/drivers/net/wireless/wl12xx/wl1251.h b/drivers/net/wireless/wl12xx/wl1251.h new file mode 100644 index 00000000000..1f4a4433039 --- /dev/null +++ b/drivers/net/wireless/wl12xx/wl1251.h @@ -0,0 +1,165 @@ +/* + * This file is part of wl12xx + * + * Copyright (C) 2008 Nokia Corporation + * + * Contact: Kalle Valo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#ifndef __WL1251_H__ +#define __WL1251_H__ + +#include + +#include "wl12xx.h" +#include "acx.h" + +#define WL1251_FW_NAME "wl1251-fw.bin" +#define WL1251_NVS_NAME "wl1251-nvs.bin" + +#define WL1251_POWER_ON_SLEEP 10 /* in miliseconds */ + +void wl1251_setup(struct wl12xx *wl); + + +struct wl1251_acx_memory { + __le16 num_stations; /* number of STAs to be supported. */ + u16 reserved_1; + + /* + * Nmber of memory buffers for the RX mem pool. + * The actual number may be less if there are + * not enough blocks left for the minimum num + * of TX ones. + */ + u8 rx_mem_block_num; + u8 reserved_2; + u8 num_tx_queues; /* From 1 to 16 */ + u8 host_if_options; /* HOST_IF* */ + u8 tx_min_mem_block_num; + u8 num_ssid_profiles; + __le16 debug_buffer_size; +} __attribute__ ((packed)); + + +#define ACX_RX_DESC_MIN 1 +#define ACX_RX_DESC_MAX 127 +#define ACX_RX_DESC_DEF 32 +struct wl1251_acx_rx_queue_config { + u8 num_descs; + u8 pad; + u8 type; + u8 priority; + __le32 dma_address; +} __attribute__ ((packed)); + +#define ACX_TX_DESC_MIN 1 +#define ACX_TX_DESC_MAX 127 +#define ACX_TX_DESC_DEF 16 +struct wl1251_acx_tx_queue_config { + u8 num_descs; + u8 pad[2]; + u8 attributes; +} __attribute__ ((packed)); + +#define MAX_TX_QUEUE_CONFIGS 5 +#define MAX_TX_QUEUES 4 +struct wl1251_acx_config_memory { + struct acx_header header; + + struct wl1251_acx_memory mem_config; + struct wl1251_acx_rx_queue_config rx_queue_config; + struct wl1251_acx_tx_queue_config tx_queue_config[MAX_TX_QUEUE_CONFIGS]; +} __attribute__ ((packed)); + +struct wl1251_acx_mem_map { + struct acx_header header; + + void *code_start; + void *code_end; + + void *wep_defkey_start; + void *wep_defkey_end; + + void *sta_table_start; + void *sta_table_end; + + void *packet_template_start; + void *packet_template_end; + + void *queue_memory_start; + void *queue_memory_end; + + void *packet_memory_pool_start; + void *packet_memory_pool_end; + + void *debug_buffer1_start; + void *debug_buffer1_end; + + void *debug_buffer2_start; + void *debug_buffer2_end; + + /* Number of blocks FW allocated for TX packets */ + u32 num_tx_mem_blocks; + + /* Number of blocks FW allocated for RX packets */ + u32 num_rx_mem_blocks; +} __attribute__ ((packed)); + +/************************************************************************* + + Host Interrupt Register (WiLink -> Host) + +**************************************************************************/ + +/* RX packet is ready in Xfer buffer #0 */ +#define WL1251_ACX_INTR_RX0_DATA BIT(0) + +/* TX result(s) are in the TX complete buffer */ +#define WL1251_ACX_INTR_TX_RESULT BIT(1) + +/* OBSOLETE */ +#define WL1251_ACX_INTR_TX_XFR BIT(2) + +/* RX packet is ready in Xfer buffer #1 */ +#define WL1251_ACX_INTR_RX1_DATA BIT(3) + +/* Event was entered to Event MBOX #A */ +#define WL1251_ACX_INTR_EVENT_A BIT(4) + +/* Event was entered to Event MBOX #B */ +#define WL1251_ACX_INTR_EVENT_B BIT(5) + +/* OBSOLETE */ +#define WL1251_ACX_INTR_WAKE_ON_HOST BIT(6) + +/* Trace meassge on MBOX #A */ +#define WL1251_ACX_INTR_TRACE_A BIT(7) + +/* Trace meassge on MBOX #B */ +#define WL1251_ACX_INTR_TRACE_B BIT(8) + +/* Command processing completion */ +#define WL1251_ACX_INTR_CMD_COMPLETE BIT(9) + +/* Init sequence is done */ +#define WL1251_ACX_INTR_INIT_COMPLETE BIT(14) + +#define WL1251_ACX_INTR_ALL 0xFFFFFFFF + +#endif diff --git a/drivers/net/wireless/wl12xx/wl12xx.h b/drivers/net/wireless/wl12xx/wl12xx.h new file mode 100644 index 00000000000..48641437414 --- /dev/null +++ b/drivers/net/wireless/wl12xx/wl12xx.h @@ -0,0 +1,409 @@ +/* + * This file is part of wl12xx + * + * Copyright (c) 1998-2007 Texas Instruments Incorporated + * Copyright (C) 2008-2009 Nokia Corporation + * + * Contact: Kalle Valo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#ifndef __WL12XX_H__ +#define __WL12XX_H__ + +#include +#include +#include +#include + +#define DRIVER_NAME "wl12xx" +#define DRIVER_PREFIX DRIVER_NAME ": " + +enum { + DEBUG_NONE = 0, + DEBUG_IRQ = BIT(0), + DEBUG_SPI = BIT(1), + DEBUG_BOOT = BIT(2), + DEBUG_MAILBOX = BIT(3), + DEBUG_NETLINK = BIT(4), + DEBUG_EVENT = BIT(5), + DEBUG_TX = BIT(6), + DEBUG_RX = BIT(7), + DEBUG_SCAN = BIT(8), + DEBUG_CRYPT = BIT(9), + DEBUG_PSM = BIT(10), + DEBUG_MAC80211 = BIT(11), + DEBUG_CMD = BIT(12), + DEBUG_ACX = BIT(13), + DEBUG_ALL = ~0, +}; + +#define DEBUG_LEVEL (DEBUG_NONE) + +#define DEBUG_DUMP_LIMIT 1024 + +#define wl12xx_error(fmt, arg...) \ + printk(KERN_ERR DRIVER_PREFIX "ERROR " fmt "\n", ##arg) + +#define wl12xx_warning(fmt, arg...) \ + printk(KERN_WARNING DRIVER_PREFIX "WARNING " fmt "\n", ##arg) + +#define wl12xx_notice(fmt, arg...) \ + printk(KERN_INFO DRIVER_PREFIX fmt "\n", ##arg) + +#define wl12xx_info(fmt, arg...) \ + printk(KERN_DEBUG DRIVER_PREFIX fmt "\n", ##arg) + +#define wl12xx_debug(level, fmt, arg...) \ + do { \ + if (level & DEBUG_LEVEL) \ + printk(KERN_DEBUG DRIVER_PREFIX fmt "\n", ##arg); \ + } while (0) + +#define wl12xx_dump(level, prefix, buf, len) \ + do { \ + if (level & DEBUG_LEVEL) \ + print_hex_dump(KERN_DEBUG, DRIVER_PREFIX prefix, \ + DUMP_PREFIX_OFFSET, 16, 1, \ + buf, \ + min_t(size_t, len, DEBUG_DUMP_LIMIT), \ + 0); \ + } while (0) + +#define wl12xx_dump_ascii(level, prefix, buf, len) \ + do { \ + if (level & DEBUG_LEVEL) \ + print_hex_dump(KERN_DEBUG, DRIVER_PREFIX prefix, \ + DUMP_PREFIX_OFFSET, 16, 1, \ + buf, \ + min_t(size_t, len, DEBUG_DUMP_LIMIT), \ + true); \ + } while (0) + +#define WL12XX_DEFAULT_RX_CONFIG (CFG_UNI_FILTER_EN | \ + CFG_BSSID_FILTER_EN) + +#define WL12XX_DEFAULT_RX_FILTER (CFG_RX_PRSP_EN | \ + CFG_RX_MGMT_EN | \ + CFG_RX_DATA_EN | \ + CFG_RX_CTL_EN | \ + CFG_RX_BCN_EN | \ + CFG_RX_AUTH_EN | \ + CFG_RX_ASSOC_EN) + + +struct boot_attr { + u32 radio_type; + u8 mac_clock; + u8 arm_clock; + int firmware_debug; + u32 minor; + u32 major; + u32 bugfix; +}; + +enum wl12xx_state { + WL12XX_STATE_OFF, + WL12XX_STATE_ON, + WL12XX_STATE_PLT, +}; + +enum wl12xx_partition_type { + PART_DOWN, + PART_WORK, + PART_DRPW, + + PART_TABLE_LEN +}; + +struct wl12xx_partition { + u32 size; + u32 start; +}; + +struct wl12xx_partition_set { + struct wl12xx_partition mem; + struct wl12xx_partition reg; +}; + +struct wl12xx; + +/* FIXME: I'm not sure about this structure name */ +struct wl12xx_chip { + u32 id; + + const char *fw_filename; + const char *nvs_filename; + + char fw_ver[21]; + + unsigned int power_on_sleep; + int intr_cmd_complete; + int intr_init_complete; + + int (*op_upload_fw)(struct wl12xx *wl); + int (*op_upload_nvs)(struct wl12xx *wl); + int (*op_boot)(struct wl12xx *wl); + void (*op_set_ecpu_ctrl)(struct wl12xx *wl, u32 flag); + void (*op_target_enable_interrupts)(struct wl12xx *wl); + int (*op_hw_init)(struct wl12xx *wl); + int (*op_plt_init)(struct wl12xx *wl); + + struct wl12xx_partition_set *p_table; + enum wl12xx_acx_int_reg *acx_reg_table; +}; + +struct wl12xx_stats { + struct acx_statistics *fw_stats; + unsigned long fw_stats_update; + + unsigned int retry_count; + unsigned int excessive_retries; +}; + +struct wl12xx_debugfs { + struct dentry *rootdir; + struct dentry *fw_statistics; + + struct dentry *tx_internal_desc_overflow; + + struct dentry *rx_out_of_mem; + struct dentry *rx_hdr_overflow; + struct dentry *rx_hw_stuck; + struct dentry *rx_dropped; + struct dentry *rx_fcs_err; + struct dentry *rx_xfr_hint_trig; + struct dentry *rx_path_reset; + struct dentry *rx_reset_counter; + + struct dentry *dma_rx_requested; + struct dentry *dma_rx_errors; + struct dentry *dma_tx_requested; + struct dentry *dma_tx_errors; + + struct dentry *isr_cmd_cmplt; + struct dentry *isr_fiqs; + struct dentry *isr_rx_headers; + struct dentry *isr_rx_mem_overflow; + struct dentry *isr_rx_rdys; + struct dentry *isr_irqs; + struct dentry *isr_tx_procs; + struct dentry *isr_decrypt_done; + struct dentry *isr_dma0_done; + struct dentry *isr_dma1_done; + struct dentry *isr_tx_exch_complete; + struct dentry *isr_commands; + struct dentry *isr_rx_procs; + struct dentry *isr_hw_pm_mode_changes; + struct dentry *isr_host_acknowledges; + struct dentry *isr_pci_pm; + struct dentry *isr_wakeups; + struct dentry *isr_low_rssi; + + struct dentry *wep_addr_key_count; + struct dentry *wep_default_key_count; + /* skipping wep.reserved */ + struct dentry *wep_key_not_found; + struct dentry *wep_decrypt_fail; + struct dentry *wep_packets; + struct dentry *wep_interrupt; + + struct dentry *pwr_ps_enter; + struct dentry *pwr_elp_enter; + struct dentry *pwr_missing_bcns; + struct dentry *pwr_wake_on_host; + struct dentry *pwr_wake_on_timer_exp; + struct dentry *pwr_tx_with_ps; + struct dentry *pwr_tx_without_ps; + struct dentry *pwr_rcvd_beacons; + struct dentry *pwr_power_save_off; + struct dentry *pwr_enable_ps; + struct dentry *pwr_disable_ps; + struct dentry *pwr_fix_tsf_ps; + /* skipping cont_miss_bcns_spread for now */ + struct dentry *pwr_rcvd_awake_beacons; + + struct dentry *mic_rx_pkts; + struct dentry *mic_calc_failure; + + struct dentry *aes_encrypt_fail; + struct dentry *aes_decrypt_fail; + struct dentry *aes_encrypt_packets; + struct dentry *aes_decrypt_packets; + struct dentry *aes_encrypt_interrupt; + struct dentry *aes_decrypt_interrupt; + + struct dentry *event_heart_beat; + struct dentry *event_calibration; + struct dentry *event_rx_mismatch; + struct dentry *event_rx_mem_empty; + struct dentry *event_rx_pool; + struct dentry *event_oom_late; + struct dentry *event_phy_transmit_error; + struct dentry *event_tx_stuck; + + struct dentry *ps_pspoll_timeouts; + struct dentry *ps_upsd_timeouts; + struct dentry *ps_upsd_max_sptime; + struct dentry *ps_upsd_max_apturn; + struct dentry *ps_pspoll_max_apturn; + struct dentry *ps_pspoll_utilization; + struct dentry *ps_upsd_utilization; + + struct dentry *rxpipe_rx_prep_beacon_drop; + struct dentry *rxpipe_descr_host_int_trig_rx_data; + struct dentry *rxpipe_beacon_buffer_thres_host_int_trig_rx_data; + struct dentry *rxpipe_missed_beacon_host_int_trig_rx_data; + struct dentry *rxpipe_tx_xfr_host_int_trig_rx_data; + + struct dentry *tx_queue_len; + + struct dentry *retry_count; + struct dentry *excessive_retries; +}; + +struct wl12xx { + struct ieee80211_hw *hw; + bool mac80211_registered; + + struct spi_device *spi; + + void (*set_power)(bool enable); + int irq; + + enum wl12xx_state state; + struct mutex mutex; + + int physical_mem_addr; + int physical_reg_addr; + int virtual_mem_addr; + int virtual_reg_addr; + + struct wl12xx_chip chip; + + int cmd_box_addr; + int event_box_addr; + struct boot_attr boot_attr; + + u8 *fw; + size_t fw_len; + u8 *nvs; + size_t nvs_len; + + u8 bssid[ETH_ALEN]; + u8 mac_addr[ETH_ALEN]; + u8 bss_type; + u8 listen_int; + int channel; + + void *target_mem_map; + struct acx_data_path_params_resp *data_path; + + /* Number of TX packets transferred to the FW, modulo 16 */ + u32 data_in_count; + + /* Frames scheduled for transmission, not handled yet */ + struct sk_buff_head tx_queue; + bool tx_queue_stopped; + + struct work_struct tx_work; + struct work_struct filter_work; + + /* Pending TX frames */ + struct sk_buff *tx_frames[16]; + + /* + * Index pointing to the next TX complete entry + * in the cyclic XT complete array we get from + * the FW. + */ + u32 next_tx_complete; + + /* FW Rx counter */ + u32 rx_counter; + + /* Rx frames handled */ + u32 rx_handled; + + /* Current double buffer */ + u32 rx_current_buffer; + u32 rx_last_id; + + /* The target interrupt mask */ + u32 intr_mask; + struct work_struct irq_work; + + /* The mbox event mask */ + u32 event_mask; + + /* Mailbox pointers */ + u32 mbox_ptr[2]; + + /* Are we currently scanning */ + bool scanning; + + /* Our association ID */ + u16 aid; + + /* Default key (for WEP) */ + u32 default_key; + + unsigned int tx_mgmt_frm_rate; + unsigned int tx_mgmt_frm_mod; + + unsigned int rx_config; + unsigned int rx_filter; + + /* is firmware in elp mode */ + bool elp; + + /* we can be in psm, but not in elp, we have to differentiate */ + bool psm; + + /* PSM mode requested */ + bool psm_requested; + + /* in dBm */ + int power_level; + + struct wl12xx_stats stats; + struct wl12xx_debugfs debugfs; +}; + +int wl12xx_plt_start(struct wl12xx *wl); +int wl12xx_plt_stop(struct wl12xx *wl); + +#define DEFAULT_HW_GEN_MODULATION_TYPE CCK_LONG /* Long Preamble */ +#define DEFAULT_HW_GEN_TX_RATE RATE_2MBPS +#define JOIN_TIMEOUT 5000 /* 5000 milliseconds to join */ + +#define WL12XX_DEFAULT_POWER_LEVEL 20 + +#define WL12XX_TX_QUEUE_MAX_LENGTH 20 + +/* Different chips need different sleep times after power on. WL1271 needs + * 200ms, WL1251 needs only 10ms. By default we use 200ms, but as soon as we + * know the chip ID, we change the sleep value in the wl12xx chip structure, + * so in subsequent power ons, we don't waste more time then needed. */ +#define WL12XX_DEFAULT_POWER_ON_SLEEP 200 + +#define CHIP_ID_1251_PG10 (0x7010101) +#define CHIP_ID_1251_PG11 (0x7020101) +#define CHIP_ID_1251_PG12 (0x7030101) +#define CHIP_ID_1271_PG10 (0x4030101) + +#endif diff --git a/drivers/net/wireless/wl12xx/wl12xx_80211.h b/drivers/net/wireless/wl12xx/wl12xx_80211.h new file mode 100644 index 00000000000..657c2dbcb7d --- /dev/null +++ b/drivers/net/wireless/wl12xx/wl12xx_80211.h @@ -0,0 +1,156 @@ +#ifndef __WL12XX_80211_H__ +#define __WL12XX_80211_H__ + +#include /* ETH_ALEN */ + +/* RATES */ +#define IEEE80211_CCK_RATE_1MB 0x02 +#define IEEE80211_CCK_RATE_2MB 0x04 +#define IEEE80211_CCK_RATE_5MB 0x0B +#define IEEE80211_CCK_RATE_11MB 0x16 +#define IEEE80211_OFDM_RATE_6MB 0x0C +#define IEEE80211_OFDM_RATE_9MB 0x12 +#define IEEE80211_OFDM_RATE_12MB 0x18 +#define IEEE80211_OFDM_RATE_18MB 0x24 +#define IEEE80211_OFDM_RATE_24MB 0x30 +#define IEEE80211_OFDM_RATE_36MB 0x48 +#define IEEE80211_OFDM_RATE_48MB 0x60 +#define IEEE80211_OFDM_RATE_54MB 0x6C +#define IEEE80211_BASIC_RATE_MASK 0x80 + +#define IEEE80211_CCK_RATE_1MB_MASK (1<<0) +#define IEEE80211_CCK_RATE_2MB_MASK (1<<1) +#define IEEE80211_CCK_RATE_5MB_MASK (1<<2) +#define IEEE80211_CCK_RATE_11MB_MASK (1<<3) +#define IEEE80211_OFDM_RATE_6MB_MASK (1<<4) +#define IEEE80211_OFDM_RATE_9MB_MASK (1<<5) +#define IEEE80211_OFDM_RATE_12MB_MASK (1<<6) +#define IEEE80211_OFDM_RATE_18MB_MASK (1<<7) +#define IEEE80211_OFDM_RATE_24MB_MASK (1<<8) +#define IEEE80211_OFDM_RATE_36MB_MASK (1<<9) +#define IEEE80211_OFDM_RATE_48MB_MASK (1<<10) +#define IEEE80211_OFDM_RATE_54MB_MASK (1<<11) + +#define IEEE80211_CCK_RATES_MASK 0x0000000F +#define IEEE80211_CCK_BASIC_RATES_MASK (IEEE80211_CCK_RATE_1MB_MASK | \ + IEEE80211_CCK_RATE_2MB_MASK) +#define IEEE80211_CCK_DEFAULT_RATES_MASK (IEEE80211_CCK_BASIC_RATES_MASK | \ + IEEE80211_CCK_RATE_5MB_MASK | \ + IEEE80211_CCK_RATE_11MB_MASK) + +#define IEEE80211_OFDM_RATES_MASK 0x00000FF0 +#define IEEE80211_OFDM_BASIC_RATES_MASK (IEEE80211_OFDM_RATE_6MB_MASK | \ + IEEE80211_OFDM_RATE_12MB_MASK | \ + IEEE80211_OFDM_RATE_24MB_MASK) +#define IEEE80211_OFDM_DEFAULT_RATES_MASK (IEEE80211_OFDM_BASIC_RATES_MASK | \ + IEEE80211_OFDM_RATE_9MB_MASK | \ + IEEE80211_OFDM_RATE_18MB_MASK | \ + IEEE80211_OFDM_RATE_36MB_MASK | \ + IEEE80211_OFDM_RATE_48MB_MASK | \ + IEEE80211_OFDM_RATE_54MB_MASK) +#define IEEE80211_DEFAULT_RATES_MASK (IEEE80211_OFDM_DEFAULT_RATES_MASK | \ + IEEE80211_CCK_DEFAULT_RATES_MASK) + + +/* This really should be 8, but not for our firmware */ +#define MAX_SUPPORTED_RATES 32 +#define COUNTRY_STRING_LEN 3 +#define MAX_COUNTRY_TRIPLETS 32 + +/* Headers */ +struct ieee80211_header { + __le16 frame_ctl; + __le16 duration_id; + u8 da[ETH_ALEN]; + u8 sa[ETH_ALEN]; + u8 bssid[ETH_ALEN]; + __le16 seq_ctl; + u8 payload[0]; +} __attribute__ ((packed)); + +struct wl12xx_ie_header { + u8 id; + u8 len; +} __attribute__ ((packed)); + +/* IEs */ + +struct wl12xx_ie_ssid { + struct wl12xx_ie_header header; + char ssid[IW_ESSID_MAX_SIZE]; +} __attribute__ ((packed)); + +struct wl12xx_ie_rates { + struct wl12xx_ie_header header; + u8 rates[MAX_SUPPORTED_RATES]; +} __attribute__ ((packed)); + +struct wl12xx_ie_ds_params { + struct wl12xx_ie_header header; + u8 channel; +} __attribute__ ((packed)); + +struct country_triplet { + u8 channel; + u8 num_channels; + u8 max_tx_power; +} __attribute__ ((packed)); + +struct wl12xx_ie_country { + struct wl12xx_ie_header header; + u8 country_string[COUNTRY_STRING_LEN]; + struct country_triplet triplets[MAX_COUNTRY_TRIPLETS]; +} __attribute__ ((packed)); + + +/* Templates */ + +struct wl12xx_beacon_template { + struct ieee80211_header header; + __le32 time_stamp[2]; + __le16 beacon_interval; + __le16 capability; + struct wl12xx_ie_ssid ssid; + struct wl12xx_ie_rates rates; + struct wl12xx_ie_rates ext_rates; + struct wl12xx_ie_ds_params ds_params; + struct wl12xx_ie_country country; +} __attribute__ ((packed)); + +struct wl12xx_null_data_template { + struct ieee80211_header header; +} __attribute__ ((packed)); + +struct wl12xx_ps_poll_template { + u16 fc; + u16 aid; + u8 bssid[ETH_ALEN]; + u8 ta[ETH_ALEN]; +} __attribute__ ((packed)); + +struct wl12xx_qos_null_data_template { + struct ieee80211_header header; + __le16 qos_ctl; +} __attribute__ ((packed)); + +struct wl12xx_probe_req_template { + struct ieee80211_header header; + struct wl12xx_ie_ssid ssid; + struct wl12xx_ie_rates rates; + struct wl12xx_ie_rates ext_rates; +} __attribute__ ((packed)); + + +struct wl12xx_probe_resp_template { + struct ieee80211_header header; + __le32 time_stamp[2]; + __le16 beacon_interval; + __le16 capability; + struct wl12xx_ie_ssid ssid; + struct wl12xx_ie_rates rates; + struct wl12xx_ie_rates ext_rates; + struct wl12xx_ie_ds_params ds_params; + struct wl12xx_ie_country country; +} __attribute__ ((packed)); + +#endif diff --git a/include/linux/spi/wl12xx.h b/include/linux/spi/wl12xx.h new file mode 100644 index 00000000000..11430cab2aa --- /dev/null +++ b/include/linux/spi/wl12xx.h @@ -0,0 +1,31 @@ +/* + * This file is part of wl12xx + * + * Copyright (C) 2009 Nokia Corporation + * + * Contact: Kalle Valo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#ifndef _LINUX_SPI_WL12XX_H +#define _LINUX_SPI_WL12XX_H + +struct wl12xx_platform_data { + void (*set_power)(bool enable); +}; + +#endif -- cgit v1.2.3-70-g09d2 From 9dfd6ba353b993d648dcda72480c7ce92cd27c7e Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Wed, 6 May 2009 20:34:10 +0300 Subject: mac80211: Update SA Query transaction id length IEEE 802.11w/D8.0 changed the length of the SA Query transaction identifier from 16 to 2 octets. Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index c52e7fba4e4..dc92359f37e 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -543,7 +543,7 @@ struct ieee80211_tim_ie { u8 virtual_map[1]; } __attribute__ ((packed)); -#define WLAN_SA_QUERY_TR_ID_LEN 16 +#define WLAN_SA_QUERY_TR_ID_LEN 2 struct ieee80211_mgmt { __le16 frame_control; -- cgit v1.2.3-70-g09d2 From 4d5b78c055c76bb563c4a43d2adb92735b3b9405 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 6 May 2009 16:52:51 -0700 Subject: net: Add missing rculist.h include to netdevice.h Otherwise list_for_each_entry_rcu() et al. aren't visible and we get build failures in some configurations. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 02882e2ebd4..2af89b662ca 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -39,6 +39,7 @@ #include #include +#include #include #include -- cgit v1.2.3-70-g09d2 From 08ce4c91e44d51bb6c946f2756825a462d53c545 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 7 Apr 2009 23:40:39 +0200 Subject: dlm: Make name input parameter of {,dlm_}new_lockspace() const | fs/gfs2/lock_dlm.c:207: warning: passing argument 1 of 'dlm_new_lockspace' discards qualifiers from pointer target type Signed-off-by: Geert Uytterhoeven Signed-off-by: David Teigland --- fs/dlm/lockspace.c | 4 ++-- include/linux/dlm.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index cd8e2df3c29..82528d9b72e 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -384,7 +384,7 @@ static void threads_stop(void) dlm_astd_stop(); } -static int new_lockspace(char *name, int namelen, void **lockspace, +static int new_lockspace(const char *name, int namelen, void **lockspace, uint32_t flags, int lvblen) { struct dlm_ls *ls; @@ -614,7 +614,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, return error; } -int dlm_new_lockspace(char *name, int namelen, void **lockspace, +int dlm_new_lockspace(const char *name, int namelen, void **lockspace, uint32_t flags, int lvblen) { int error = 0; diff --git a/include/linux/dlm.h b/include/linux/dlm.h index b9cd38603fd..0b3518c4235 100644 --- a/include/linux/dlm.h +++ b/include/linux/dlm.h @@ -81,8 +81,8 @@ struct dlm_lksb { * the cluster, the calling node joins it. */ -int dlm_new_lockspace(char *name, int namelen, dlm_lockspace_t **lockspace, - uint32_t flags, int lvblen); +int dlm_new_lockspace(const char *name, int namelen, + dlm_lockspace_t **lockspace, uint32_t flags, int lvblen); /* * dlm_release_lockspace -- cgit v1.2.3-70-g09d2 From f5f8d86b231e0489c33542c42afbb14d32411ee8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 May 2009 07:08:38 +0000 Subject: tcp: tcp_prequeue() cleanup Small cleanup patch to reduce line lengths, before a change in tcp_prequeue(). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 45 +++++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index b55b4891029..ac37228b700 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -890,30 +890,31 @@ static inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); - if (!sysctl_tcp_low_latency && tp->ucopy.task) { - __skb_queue_tail(&tp->ucopy.prequeue, skb); - tp->ucopy.memory += skb->truesize; - if (tp->ucopy.memory > sk->sk_rcvbuf) { - struct sk_buff *skb1; - - BUG_ON(sock_owned_by_user(sk)); - - while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) { - sk_backlog_rcv(sk, skb1); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPREQUEUEDROPPED); - } - - tp->ucopy.memory = 0; - } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { - wake_up_interruptible(sk->sk_sleep); - if (!inet_csk_ack_scheduled(sk)) - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, - (3 * TCP_RTO_MIN) / 4, - TCP_RTO_MAX); + if (sysctl_tcp_low_latency || !tp->ucopy.task) + return 0; + + __skb_queue_tail(&tp->ucopy.prequeue, skb); + tp->ucopy.memory += skb->truesize; + if (tp->ucopy.memory > sk->sk_rcvbuf) { + struct sk_buff *skb1; + + BUG_ON(sock_owned_by_user(sk)); + + while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) { + sk_backlog_rcv(sk, skb1); + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPPREQUEUEDROPPED); } - return 1; + + tp->ucopy.memory = 0; + } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { + wake_up_interruptible(sk->sk_sleep); + if (!inet_csk_ack_scheduled(sk)) + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, + (3 * TCP_RTO_MIN) / 4, + TCP_RTO_MAX); } - return 0; + return 1; } -- cgit v1.2.3-70-g09d2 From 7aedec2ad5314b173e78ca3f4edb4ceaa02248bb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 May 2009 07:20:39 +0000 Subject: tcp: tcp_prequeue() can use keyed wakeups We can avoid waking up tasks not interested in receive notifications, using wake_up_interruptible_poll() instead of wake_up_interruptible() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index ac37228b700..87d210bb12a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -908,7 +908,8 @@ static inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb) tp->ucopy.memory = 0; } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { - wake_up_interruptible(sk->sk_sleep); + wake_up_interruptible_poll(sk->sk_sleep, + POLLIN | POLLRDNORM | POLLRDBAND); if (!inet_csk_ack_scheduled(sk)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, (3 * TCP_RTO_MIN) / 4, -- cgit v1.2.3-70-g09d2 From 9080b72819650c3a757d173a19bc930d603b79d6 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 21 Jan 2009 15:13:58 +0000 Subject: sh-sci: remove early_sci_setup() Remove unused early_sci_setup() function from sh-sci. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- drivers/serial/sh-sci.c | 14 -------------- include/linux/serial_sci.h | 2 -- 2 files changed, 16 deletions(-) (limited to 'include') diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c index af5da110d52..5f37f7d3166 100644 --- a/drivers/serial/sh-sci.c +++ b/drivers/serial/sh-sci.c @@ -1082,20 +1082,6 @@ static void __init sci_init_ports(void) } } -int __init early_sci_setup(struct uart_port *port) -{ - if (unlikely(port->line > SCI_NPORTS)) - return -ENODEV; - - sci_init_ports(); - - sci_ports[port->line].port.membase = port->membase; - sci_ports[port->line].port.mapbase = port->mapbase; - sci_ports[port->line].port.type = port->type; - - return 0; -} - #ifdef CONFIG_SERIAL_SH_SCI_CONSOLE /* * Print a string to the serial port trying not to disturb diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h index 893cc53486b..717059d6791 100644 --- a/include/linux/serial_sci.h +++ b/include/linux/serial_sci.h @@ -27,6 +27,4 @@ struct plat_sci_port { upf_t flags; /* UPF_* flags */ }; -int early_sci_setup(struct uart_port *port); - #endif /* __LINUX_SERIAL_SCI_H */ -- cgit v1.2.3-70-g09d2 From 501b825d01efb93766c87d29f299851152cf4eb0 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 21 Jan 2009 15:14:30 +0000 Subject: sh-sci: improve clock framework support Use enable/disable hooks for clock framework integration. Make sure we control the clock for the serial console as well. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- drivers/serial/sh-sci.c | 77 +++++++++++++++++++++++++++------------------- include/linux/serial_sci.h | 1 + 2 files changed, 47 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c index 408624ae7fe..3daf76725ac 100644 --- a/drivers/serial/sh-sci.c +++ b/drivers/serial/sh-sci.c @@ -76,8 +76,10 @@ struct sci_port { int break_flag; #ifdef CONFIG_HAVE_CLK - /* Port clock */ - struct clk *clk; + /* Interface clock */ + struct clk *iclk; + /* Data clock */ + struct clk *dclk; #endif struct list_head node; }; @@ -166,12 +168,12 @@ static void h8300_sci_config(struct uart_port *port, unsigned int ctrl) *mstpcrl &= ~mask; } -static inline void h8300_sci_enable(struct uart_port *port) +static void h8300_sci_enable(struct uart_port *port) { h8300_sci_config(port, sci_enable); } -static inline void h8300_sci_disable(struct uart_port *port) +static void h8300_sci_disable(struct uart_port *port) { h8300_sci_config(port, sci_disable); } @@ -742,13 +744,34 @@ static int sci_notifier(struct notifier_block *self, (phase == CPUFREQ_RESUMECHANGE)) { spin_lock_irqsave(&priv->lock, flags); list_for_each_entry(sci_port, &priv->ports, node) - sci_port->port.uartclk = clk_get_rate(sci_port->clk); + sci_port->port.uartclk = clk_get_rate(sci_port->dclk); spin_unlock_irqrestore(&priv->lock, flags); } return NOTIFY_OK; } + +static void sci_clk_enable(struct uart_port *port) +{ + struct sci_port *sci_port = to_sci_port(port); + + clk_enable(sci_port->dclk); + sci_port->port.uartclk = clk_get_rate(sci_port->dclk); + + if (sci_port->iclk) + clk_enable(sci_port->iclk); +} + +static void sci_clk_disable(struct uart_port *port) +{ + struct sci_port *sci_port = to_sci_port(port); + + if (sci_port->iclk) + clk_disable(sci_port->iclk); + + clk_disable(sci_port->dclk); +} #endif static int sci_request_irq(struct sci_port *port) @@ -880,10 +903,6 @@ static int sci_startup(struct uart_port *port) if (s->enable) s->enable(port); -#ifdef CONFIG_HAVE_CLK - s->clk = clk_get(NULL, "module_clk"); -#endif - sci_request_irq(s); sci_start_tx(port); sci_start_rx(port, 1); @@ -901,11 +920,6 @@ static void sci_shutdown(struct uart_port *port) if (s->disable) s->disable(port); - -#ifdef CONFIG_HAVE_CLK - clk_put(s->clk); - s->clk = NULL; -#endif } static void sci_set_termios(struct uart_port *port, struct ktermios *termios, @@ -1048,7 +1062,8 @@ static struct uart_ops sci_uart_ops = { #endif }; -static void __devinit sci_init_single(struct sci_port *sci_port, +static void __devinit sci_init_single(struct platform_device *dev, + struct sci_port *sci_port, unsigned int index, struct plat_sci_port *p) { @@ -1064,14 +1079,10 @@ static void __devinit sci_init_single(struct sci_port *sci_port, #endif sci_port->port.uartclk = CONFIG_CPU_CLOCK; #elif defined(CONFIG_HAVE_CLK) - /* - * XXX: We should use a proper SCI/SCIF clock - */ - { - struct clk *clk = clk_get(NULL, "module_clk"); - sci_port->port.uartclk = clk_get_rate(clk); - clk_put(clk); - } + sci_port->iclk = p->clk ? clk_get(&dev->dev, p->clk) : NULL; + sci_port->dclk = clk_get(&dev->dev, "module_clk"); + sci_port->enable = sci_clk_enable; + sci_port->disable = sci_clk_disable; #else #error "Need a valid uartclk" #endif @@ -1085,9 +1096,11 @@ static void __devinit sci_init_single(struct sci_port *sci_port, sci_port->port.irq = p->irqs[SCIx_TXI_IRQ]; sci_port->port.flags = p->flags; + sci_port->port.dev = &dev->dev; sci_port->type = sci_port->port.type = p->type; memcpy(&sci_port->irqs, &p->irqs, sizeof(p->irqs)); + } #ifdef CONFIG_SERIAL_SH_SCI_CONSOLE @@ -1111,14 +1124,21 @@ static void serial_console_write(struct console *co, const char *s, unsigned count) { struct uart_port *port = co->data; + struct sci_port *sci_port = to_sci_port(port); unsigned short bits; - uart_console_write(co->data, s, count, serial_console_putchar); + if (sci_port->enable) + sci_port->enable(port); + + uart_console_write(port, s, count, serial_console_putchar); /* wait until fifo is empty and last bit has been transmitted */ bits = SCxSR_TDxE(port) | SCxSR_TEND(port); while ((sci_in(port, SCxSR) & bits) != bits) cpu_relax(); + + if (sci_port->disable); + sci_port->disable(port); } static int __init serial_console_setup(struct console *co, char *options) @@ -1152,11 +1172,6 @@ static int __init serial_console_setup(struct console *co, char *options) if (!port->type) return -ENODEV; -#ifdef CONFIG_HAVE_CLK - if (!sci_port->clk) - sci_port->clk = clk_get(NULL, "module_clk"); -#endif - sci_config_port(port, 0); if (sci_port->enable) @@ -1171,6 +1186,7 @@ static int __init serial_console_setup(struct console *co, char *options) if (ret == 0) sci_stop_rx(port); #endif + /* TODO: disable clock */ return ret; } @@ -1250,8 +1266,7 @@ static int __devinit sci_probe_single(struct platform_device *dev, return 0; } - sciport->port.dev = &dev->dev; - sci_init_single(sciport, index, p); + sci_init_single(dev, sciport, index, p); ret = uart_add_one_port(&sci_uart_driver, &sciport->port); if (ret) diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h index 717059d6791..1c297ddc9d5 100644 --- a/include/linux/serial_sci.h +++ b/include/linux/serial_sci.h @@ -25,6 +25,7 @@ struct plat_sci_port { unsigned int irqs[SCIx_NR_IRQS]; /* ERI, RXI, TXI, BRI */ unsigned int type; /* SCI / SCIF / IRDA */ upf_t flags; /* UPF_* flags */ + char *clk; /* clock string */ }; #endif /* __LINUX_SERIAL_SCI_H */ -- cgit v1.2.3-70-g09d2 From 3df5edad87a998273aa5a9a8c728c05d855ad00e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 8 May 2009 18:52:22 +0200 Subject: perf_counter: rework ioctl()s Corey noticed that ioctl()s on grouped counters didn't work on the whole group. This extends the ioctl() interface to take a second argument that is interpreted as a flags field. We then provide PERF_IOC_FLAG_GROUP to toggle the behaviour. Having this flag gives the greatest flexibility, allowing you to individually enable/disable/reset counters in a group, or all together. [ Impact: fix group counter enable/disable semantics ] Reported-by: Corey Ashford Signed-off-by: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20090508170028.837558214@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 10 +++-- kernel/perf_counter.c | 104 ++++++++++++++++++++++++------------------- 2 files changed, 65 insertions(+), 49 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 00081d84169..88f863ec274 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -157,10 +157,14 @@ struct perf_counter_hw_event { /* * Ioctls that can be done on a perf counter fd: */ -#define PERF_COUNTER_IOC_ENABLE _IO ('$', 0) -#define PERF_COUNTER_IOC_DISABLE _IO ('$', 1) +#define PERF_COUNTER_IOC_ENABLE _IOW('$', 0, u32) +#define PERF_COUNTER_IOC_DISABLE _IOW('$', 1, u32) #define PERF_COUNTER_IOC_REFRESH _IOW('$', 2, u32) -#define PERF_COUNTER_IOC_RESET _IO ('$', 3) +#define PERF_COUNTER_IOC_RESET _IOW('$', 3, u32) + +enum perf_counter_ioc_flags { + PERF_IOC_FLAG_GROUP = 1U << 0, +}; /* * Structure of the page that can be mapped via mmap diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index fdb0d242127..f4883f1f47e 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -82,7 +82,7 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) * add it straight to the context's counter list, or to the group * leader's sibling list: */ - if (counter->group_leader == counter) + if (group_leader == counter) list_add_tail(&counter->list_entry, &ctx->counter_list); else { list_add_tail(&counter->list_entry, &group_leader->sibling_list); @@ -385,24 +385,6 @@ static void perf_counter_disable(struct perf_counter *counter) spin_unlock_irq(&ctx->lock); } -/* - * Disable a counter and all its children. - */ -static void perf_counter_disable_family(struct perf_counter *counter) -{ - struct perf_counter *child; - - perf_counter_disable(counter); - - /* - * Lock the mutex to protect the list of children - */ - mutex_lock(&counter->mutex); - list_for_each_entry(child, &counter->child_list, child_list) - perf_counter_disable(child); - mutex_unlock(&counter->mutex); -} - static int counter_sched_in(struct perf_counter *counter, struct perf_cpu_context *cpuctx, @@ -753,24 +735,6 @@ static int perf_counter_refresh(struct perf_counter *counter, int refresh) return 0; } -/* - * Enable a counter and all its children. - */ -static void perf_counter_enable_family(struct perf_counter *counter) -{ - struct perf_counter *child; - - perf_counter_enable(counter); - - /* - * Lock the mutex to protect the list of children - */ - mutex_lock(&counter->mutex); - list_for_each_entry(child, &counter->child_list, child_list) - perf_counter_enable(child); - mutex_unlock(&counter->mutex); -} - void __perf_counter_sched_out(struct perf_counter_context *ctx, struct perf_cpu_context *cpuctx) { @@ -1307,31 +1271,79 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) static void perf_counter_reset(struct perf_counter *counter) { + (void)perf_counter_read(counter); atomic_set(&counter->count, 0); + perf_counter_update_userpage(counter); +} + +static void perf_counter_for_each_sibling(struct perf_counter *counter, + void (*func)(struct perf_counter *)) +{ + struct perf_counter_context *ctx = counter->ctx; + struct perf_counter *sibling; + + spin_lock_irq(&ctx->lock); + counter = counter->group_leader; + + func(counter); + list_for_each_entry(sibling, &counter->sibling_list, list_entry) + func(sibling); + spin_unlock_irq(&ctx->lock); +} + +static void perf_counter_for_each_child(struct perf_counter *counter, + void (*func)(struct perf_counter *)) +{ + struct perf_counter *child; + + mutex_lock(&counter->mutex); + func(counter); + list_for_each_entry(child, &counter->child_list, child_list) + func(child); + mutex_unlock(&counter->mutex); +} + +static void perf_counter_for_each(struct perf_counter *counter, + void (*func)(struct perf_counter *)) +{ + struct perf_counter *child; + + mutex_lock(&counter->mutex); + perf_counter_for_each_sibling(counter, func); + list_for_each_entry(child, &counter->child_list, child_list) + perf_counter_for_each_sibling(child, func); + mutex_unlock(&counter->mutex); } static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct perf_counter *counter = file->private_data; - int err = 0; + void (*func)(struct perf_counter *); + u32 flags = arg; switch (cmd) { case PERF_COUNTER_IOC_ENABLE: - perf_counter_enable_family(counter); + func = perf_counter_enable; break; case PERF_COUNTER_IOC_DISABLE: - perf_counter_disable_family(counter); - break; - case PERF_COUNTER_IOC_REFRESH: - err = perf_counter_refresh(counter, arg); + func = perf_counter_disable; break; case PERF_COUNTER_IOC_RESET: - perf_counter_reset(counter); + func = perf_counter_reset; break; + + case PERF_COUNTER_IOC_REFRESH: + return perf_counter_refresh(counter, arg); default: - err = -ENOTTY; + return -ENOTTY; } - return err; + + if (flags & PERF_IOC_FLAG_GROUP) + perf_counter_for_each(counter, func); + else + perf_counter_for_each_child(counter, func); + + return 0; } /* -- cgit v1.2.3-70-g09d2 From a85f61abe11a46553c4562e74edb27ebc782aeb7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 8 May 2009 18:52:23 +0200 Subject: perf_counter: add PERF_RECORD_CONFIG Much like CONFIG_RECORD_GROUP records the hw_event.config to identify the values, allow to record this for all counters. [ Impact: extend perfcounter output record format ] Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090508170028.923228280@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 ++ kernel/perf_counter.c | 8 ++++++++ 2 files changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 88f863ec274..0e6303d36c6 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -104,6 +104,7 @@ enum perf_counter_record_format { PERF_RECORD_ADDR = 1U << 3, PERF_RECORD_GROUP = 1U << 4, PERF_RECORD_CALLCHAIN = 1U << 5, + PERF_RECORD_CONFIG = 1U << 6, }; /* @@ -258,6 +259,7 @@ enum perf_event_type { * { u32 pid, tid; } && PERF_RECORD_TID * { u64 time; } && PERF_RECORD_TIME * { u64 addr; } && PERF_RECORD_ADDR + * { u64 config; } && PERF_RECORD_CONFIG * * { u64 nr; * { u64 event, val; } cnt[nr]; } && PERF_RECORD_GROUP diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index f4883f1f47e..c615f52aa40 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1994,6 +1994,11 @@ static void perf_counter_output(struct perf_counter *counter, header.size += sizeof(u64); } + if (record_type & PERF_RECORD_CONFIG) { + header.type |= PERF_RECORD_CONFIG; + header.size += sizeof(u64); + } + if (record_type & PERF_RECORD_GROUP) { header.type |= PERF_RECORD_GROUP; header.size += sizeof(u64) + @@ -2029,6 +2034,9 @@ static void perf_counter_output(struct perf_counter *counter, if (record_type & PERF_RECORD_ADDR) perf_output_put(&handle, addr); + if (record_type & PERF_RECORD_CONFIG) + perf_output_put(&handle, counter->hw_event.config); + /* * XXX PERF_RECORD_GROUP vs inherited counters seems difficult. */ -- cgit v1.2.3-70-g09d2 From f370e1e2f195ec1e6420e26fc83e0319595db578 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 8 May 2009 18:52:24 +0200 Subject: perf_counter: add PERF_RECORD_CPU Allow recording the CPU number the event was generated on. RFC: this leaves a u32 as reserved, should we fill in the node_id() there, or leave this open for future extention, as userspace can already easily do the cpu->node mapping if needed. [ Impact: extend perfcounter output record format ] Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090508170029.008627711@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 ++ kernel/perf_counter.c | 13 +++++++++++++ 2 files changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 0e6303d36c6..614f921d616 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -105,6 +105,7 @@ enum perf_counter_record_format { PERF_RECORD_GROUP = 1U << 4, PERF_RECORD_CALLCHAIN = 1U << 5, PERF_RECORD_CONFIG = 1U << 6, + PERF_RECORD_CPU = 1U << 7, }; /* @@ -260,6 +261,7 @@ enum perf_event_type { * { u64 time; } && PERF_RECORD_TIME * { u64 addr; } && PERF_RECORD_ADDR * { u64 config; } && PERF_RECORD_CONFIG + * { u32 cpu, res; } && PERF_RECORD_CPU * * { u64 nr; * { u64 event, val; } cnt[nr]; } && PERF_RECORD_GROUP diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index c615f52aa40..d850a1fb8d4 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1956,6 +1956,9 @@ static void perf_counter_output(struct perf_counter *counter, struct perf_callchain_entry *callchain = NULL; int callchain_size = 0; u64 time; + struct { + u32 cpu, reserved; + } cpu_entry; header.type = 0; header.size = sizeof(header); @@ -1999,6 +2002,13 @@ static void perf_counter_output(struct perf_counter *counter, header.size += sizeof(u64); } + if (record_type & PERF_RECORD_CPU) { + header.type |= PERF_RECORD_CPU; + header.size += sizeof(cpu_entry); + + cpu_entry.cpu = raw_smp_processor_id(); + } + if (record_type & PERF_RECORD_GROUP) { header.type |= PERF_RECORD_GROUP; header.size += sizeof(u64) + @@ -2037,6 +2047,9 @@ static void perf_counter_output(struct perf_counter *counter, if (record_type & PERF_RECORD_CONFIG) perf_output_put(&handle, counter->hw_event.config); + if (record_type & PERF_RECORD_CPU) + perf_output_put(&handle, cpu_entry); + /* * XXX PERF_RECORD_GROUP vs inherited counters seems difficult. */ -- cgit v1.2.3-70-g09d2 From 4671c79408a3f8a5a6a45e39c4c164dada3a5678 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 8 May 2009 16:27:41 -0400 Subject: tracing: add trace_set_clr_event to export event enabling function Other parts of the kernel may need to be able to enable or disable specific events. Especially parts that create trace events. [ Impact: allow enabling of trace events by those that create the event ] Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 2 ++ kernel/trace/trace_events.c | 17 +++++++++++++++++ 2 files changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 662c1becf36..bae51ddfabd 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -127,6 +127,8 @@ extern int trace_define_field(struct ftrace_event_call *call, char *type, #define is_signed_type(type) (((type)(-1)) < 0) +int trace_set_clr_event(const char *system, const char *event, int set); + /* * The double __builtin_constant_p is because gcc will give us an error * if we try to allocate the static variable to fmt if it is not a diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 2eecb87e42d..0eec0c55dd8 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -177,6 +177,23 @@ static int ftrace_set_clr_event(char *buf, int set) return __ftrace_set_clr_event(match, sub, event, set); } +/** + * trace_set_clr_event - enable or disable an event + * @system: system name to match (NULL for any system) + * @event: event name to match (NULL for all events, within system) + * @set: 1 to enable, 0 to disable + * + * This is a way for other parts of the kernel to enable or disable + * event recording. + * + * Returns 0 on success, -EINVAL if the parameters do not match any + * registered events. + */ +int trace_set_clr_event(const char *system, const char *event, int set) +{ + return __ftrace_set_clr_event(NULL, system, event, set); +} + /* 128 should be much more than enough */ #define EVENT_BUF_SIZE 127 -- cgit v1.2.3-70-g09d2 From 7e044e056a6aa0dc695db50461d7b326fde15e8b Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sat, 9 May 2009 16:08:05 -0700 Subject: Input: serio - do not use deprecated dev.power.power_state Signed-off-by: Dmitry Torokhov --- drivers/input/gameport/gameport.c | 6 +++--- drivers/input/serio/i8042.c | 17 +++++++++-------- drivers/input/serio/serio.c | 37 +++++++++++++++++++------------------ include/linux/gameport.h | 3 ++- include/linux/serio.h | 9 ++++++--- 5 files changed, 39 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c index 0279d6983cc..ac11be08585 100644 --- a/drivers/input/gameport/gameport.c +++ b/drivers/input/gameport/gameport.c @@ -723,7 +723,7 @@ int __gameport_register_driver(struct gameport_driver *drv, struct module *owner * Temporarily disable automatic binding because probing * takes long time and we are better off doing it in kgameportd */ - drv->ignore = 1; + drv->ignore = true; error = driver_register(&drv->driver); if (error) { @@ -736,7 +736,7 @@ int __gameport_register_driver(struct gameport_driver *drv, struct module *owner /* * Reset ignore flag and let kgameportd bind the driver to free ports */ - drv->ignore = 0; + drv->ignore = false; error = gameport_queue_event(drv, NULL, GAMEPORT_ATTACH_DRIVER); if (error) { driver_unregister(&drv->driver); @@ -753,7 +753,7 @@ void gameport_unregister_driver(struct gameport_driver *drv) mutex_lock(&gameport_mutex); - drv->ignore = 1; /* so gameport_find_driver ignores it */ + drv->ignore = true; /* so gameport_find_driver ignores it */ gameport_remove_pending_events(drv); start_over: diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index 3cffb704e37..f919bf57293 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -10,6 +10,7 @@ * the Free Software Foundation. */ +#include #include #include #include @@ -921,6 +922,9 @@ static void i8042_dritek_enable(void) #endif #ifdef CONFIG_PM + +static bool i8042_suspended; + /* * Here we try to restore the original BIOS settings. We only want to * do that once, when we really suspend, not when we taking memory @@ -930,11 +934,9 @@ static void i8042_dritek_enable(void) static int i8042_suspend(struct platform_device *dev, pm_message_t state) { - if (dev->dev.power.power_state.event != state.event) { - if (state.event == PM_EVENT_SUSPEND) - i8042_controller_reset(); - - dev->dev.power.power_state = state; + if (!i8042_suspended && state.event == PM_EVENT_SUSPEND) { + i8042_controller_reset(); + i8042_suspended = true; } return 0; @@ -952,7 +954,7 @@ static int i8042_resume(struct platform_device *dev) /* * Do not bother with restoring state if we haven't suspened yet */ - if (dev->dev.power.power_state.event == PM_EVENT_ON) + if (!i8042_suspended) return 0; error = i8042_controller_check(); @@ -998,10 +1000,9 @@ static int i8042_resume(struct platform_device *dev) if (i8042_ports[I8042_KBD_PORT_NO].serio) i8042_enable_kbd_port(); + i8042_suspended = false; i8042_interrupt(0, NULL); - dev->dev.power.power_state = PMSG_ON; - return 0; } #endif /* CONFIG_PM */ diff --git a/drivers/input/serio/serio.c b/drivers/input/serio/serio.c index 8d2df5d2d5a..fb17573f8f2 100644 --- a/drivers/input/serio/serio.c +++ b/drivers/input/serio/serio.c @@ -495,9 +495,9 @@ static ssize_t serio_set_bind_mode(struct device *dev, struct device_attribute * retval = count; if (!strncmp(buf, "manual", count)) { - serio->manual_bind = 1; + serio->manual_bind = true; } else if (!strncmp(buf, "auto", count)) { - serio->manual_bind = 0; + serio->manual_bind = false; } else { retval = -EINVAL; } @@ -570,7 +570,7 @@ static void serio_add_port(struct serio *serio) "serio: device_add() failed for %s (%s), error: %d\n", serio->phys, serio->name, error); else { - serio->registered = 1; + serio->registered = true; error = sysfs_create_group(&serio->dev.kobj, &serio_id_attr_group); if (error) printk(KERN_ERR @@ -606,7 +606,7 @@ static void serio_destroy_port(struct serio *serio) if (serio->registered) { sysfs_remove_group(&serio->dev.kobj, &serio_id_attr_group); device_del(&serio->dev); - serio->registered = 0; + serio->registered = false; } list_del_init(&serio->node); @@ -750,9 +750,9 @@ static ssize_t serio_driver_set_bind_mode(struct device_driver *drv, const char retval = count; if (!strncmp(buf, "manual", count)) { - serio_drv->manual_bind = 1; + serio_drv->manual_bind = true; } else if (!strncmp(buf, "auto", count)) { - serio_drv->manual_bind = 0; + serio_drv->manual_bind = false; } else { retval = -EINVAL; } @@ -812,7 +812,7 @@ static void serio_attach_driver(struct serio_driver *drv) int __serio_register_driver(struct serio_driver *drv, struct module *owner, const char *mod_name) { - int manual_bind = drv->manual_bind; + bool manual_bind = drv->manual_bind; int error; drv->driver.bus = &serio_bus; @@ -823,7 +823,7 @@ int __serio_register_driver(struct serio_driver *drv, struct module *owner, cons * Temporarily disable automatic binding because probing * takes long time and we are better off doing it in kseriod */ - drv->manual_bind = 1; + drv->manual_bind = true; error = driver_register(&drv->driver); if (error) { @@ -838,7 +838,7 @@ int __serio_register_driver(struct serio_driver *drv, struct module *owner, cons * driver to free ports */ if (!manual_bind) { - drv->manual_bind = 0; + drv->manual_bind = false; error = serio_queue_event(drv, NULL, SERIO_ATTACH_DRIVER); if (error) { driver_unregister(&drv->driver); @@ -856,7 +856,7 @@ void serio_unregister_driver(struct serio_driver *drv) mutex_lock(&serio_mutex); - drv->manual_bind = 1; /* so serio_find_driver ignores it */ + drv->manual_bind = true; /* so serio_find_driver ignores it */ serio_remove_pending_events(drv); start_over: @@ -933,11 +933,11 @@ static int serio_uevent(struct device *dev, struct kobj_uevent_env *env) #ifdef CONFIG_PM static int serio_suspend(struct device *dev, pm_message_t state) { - if (dev->power.power_state.event != state.event) { - if (state.event == PM_EVENT_SUSPEND) - serio_cleanup(to_serio_port(dev)); + struct serio *serio = to_serio_port(dev); - dev->power.power_state = state; + if (!serio->suspended && state.event == PM_EVENT_SUSPEND) { + serio_cleanup(serio); + serio->suspended = true; } return 0; @@ -945,14 +945,15 @@ static int serio_suspend(struct device *dev, pm_message_t state) static int serio_resume(struct device *dev) { + struct serio *serio = to_serio_port(dev); + /* * Driver reconnect can take a while, so better let kseriod * deal with it. */ - if (dev->power.power_state.event != PM_EVENT_ON) { - dev->power.power_state = PMSG_ON; - serio_queue_event(to_serio_port(dev), NULL, - SERIO_RECONNECT_PORT); + if (serio->suspended) { + serio->suspended = false; + serio_queue_event(serio, NULL, SERIO_RECONNECT_PORT); } return 0; diff --git a/include/linux/gameport.h b/include/linux/gameport.h index 0cd825f7363..1bc08541c2b 100644 --- a/include/linux/gameport.h +++ b/include/linux/gameport.h @@ -11,6 +11,7 @@ #ifdef __KERNEL__ #include +#include #include #include #include @@ -62,7 +63,7 @@ struct gameport_driver { struct device_driver driver; - unsigned int ignore; + bool ignore; }; #define to_gameport_driver(d) container_of(d, struct gameport_driver, driver) diff --git a/include/linux/serio.h b/include/linux/serio.h index e0417e4d3f1..126d24c9eaa 100644 --- a/include/linux/serio.h +++ b/include/linux/serio.h @@ -15,6 +15,7 @@ #ifdef __KERNEL__ +#include #include #include #include @@ -28,7 +29,10 @@ struct serio { char name[32]; char phys[32]; - unsigned int manual_bind; + bool manual_bind; + bool registered; /* port has been fully registered with driver core */ + bool suspended; /* port is suspended */ + struct serio_device_id id; @@ -47,7 +51,6 @@ struct serio { struct mutex drv_mutex; /* protects serio->drv so attributes can pin driver */ struct device dev; - unsigned int registered; /* port has been fully registered with driver core */ struct list_head node; }; @@ -58,7 +61,7 @@ struct serio_driver { char *description; struct serio_device_id *id_table; - unsigned int manual_bind; + bool manual_bind; void (*write_wakeup)(struct serio *); irqreturn_t (*interrupt)(struct serio *, unsigned char, unsigned int); -- cgit v1.2.3-70-g09d2 From 4c25a2c1b90bf785fc2e2f0f0c74a80b3e070d39 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 10 May 2009 17:16:06 +0100 Subject: intel-iommu: Clean up handling of "caching mode" vs. context flushing. It really doesn't make a lot of sense to have some of the logic to handle caching vs. non-caching mode duplicated in qi_flush_context() and __iommu_flush_context(), while the return value indicates whether the caller should take other action which depends on the same thing. Especially since qi_flush_context() thought it was returning something entirely different anyway. This patch makes qi_flush_context() and __iommu_flush_context() both return void, removes the 'non_present_entry_flush' argument and makes the only call site which _set_ that argument to 1 do the right thing. Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 13 +++--------- drivers/pci/intel-iommu.c | 52 ++++++++++++++++++--------------------------- include/linux/intel-iommu.h | 8 +++---- 3 files changed, 28 insertions(+), 45 deletions(-) (limited to 'include') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index d3d86b749ee..10a071ba323 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -723,23 +723,16 @@ void qi_global_iec(struct intel_iommu *iommu) qi_submit_sync(&desc, iommu); } -int qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, - u64 type, int non_present_entry_flush) +void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, + u64 type) { struct qi_desc desc; - if (non_present_entry_flush) { - if (!cap_caching_mode(iommu->cap)) - return 1; - else - did = 0; - } - desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did) | QI_CC_GRAN(type) | QI_CC_TYPE; desc.high = 0; - return qi_submit_sync(&desc, iommu); + qi_submit_sync(&desc, iommu); } int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index d6f4ee50924..9f5d9151edc 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -857,26 +857,13 @@ static void iommu_flush_write_buffer(struct intel_iommu *iommu) } /* return value determine if we need a write buffer flush */ -static int __iommu_flush_context(struct intel_iommu *iommu, - u16 did, u16 source_id, u8 function_mask, u64 type, - int non_present_entry_flush) +static void __iommu_flush_context(struct intel_iommu *iommu, + u16 did, u16 source_id, u8 function_mask, + u64 type) { u64 val = 0; unsigned long flag; - /* - * In the non-present entry flush case, if hardware doesn't cache - * non-present entry we do nothing and if hardware cache non-present - * entry, we flush entries of domain 0 (the domain id is used to cache - * any non-present entries) - */ - if (non_present_entry_flush) { - if (!cap_caching_mode(iommu->cap)) - return 1; - else - did = 0; - } - switch (type) { case DMA_CCMD_GLOBAL_INVL: val = DMA_CCMD_GLOBAL_INVL; @@ -901,9 +888,6 @@ static int __iommu_flush_context(struct intel_iommu *iommu, dmar_readq, (!(val & DMA_CCMD_ICC)), val); spin_unlock_irqrestore(&iommu->register_lock, flag); - - /* flush context entry will implicitly flush write buffer */ - return 0; } /* return value determine if we need a write buffer flush */ @@ -1428,14 +1412,21 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment, context_set_present(context); domain_flush_cache(domain, context, sizeof(*context)); - /* it's a non-present to present mapping */ - if (iommu->flush.flush_context(iommu, id, - (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, - DMA_CCMD_DEVICE_INVL, 1)) - iommu_flush_write_buffer(iommu); - else + /* + * It's a non-present to present mapping. If hardware doesn't cache + * non-present entry we only need to flush the write-buffer. If the + * _does_ cache non-present entries, then it does so in the special + * domain #0, which we have to flush: + */ + if (cap_caching_mode(iommu->cap)) { + iommu->flush.flush_context(iommu, 0, + (((u16)bus) << 8) | devfn, + DMA_CCMD_MASK_NOBIT, + DMA_CCMD_DEVICE_INVL); iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0); - + } else { + iommu_flush_write_buffer(iommu); + } spin_unlock_irqrestore(&iommu->lock, flags); spin_lock_irqsave(&domain->iommu_lock, flags); @@ -1566,7 +1557,7 @@ static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn) clear_context_table(iommu, bus, devfn); iommu->flush.flush_context(iommu, 0, 0, 0, - DMA_CCMD_GLOBAL_INVL, 0); + DMA_CCMD_GLOBAL_INVL); iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH, 0); } @@ -2104,8 +2095,7 @@ static int __init init_dmars(void) iommu_set_root_entry(iommu); - iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL, - 0); + iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH, 0); iommu_disable_protect_mem_regions(iommu); @@ -2721,7 +2711,7 @@ static int init_iommu_hw(void) iommu_set_root_entry(iommu); iommu->flush.flush_context(iommu, 0, 0, 0, - DMA_CCMD_GLOBAL_INVL, 0); + DMA_CCMD_GLOBAL_INVL); iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH, 0); iommu_disable_protect_mem_regions(iommu); @@ -2738,7 +2728,7 @@ static void iommu_flush_all(void) for_each_active_iommu(iommu, drhd) { iommu->flush.flush_context(iommu, 0, 0, 0, - DMA_CCMD_GLOBAL_INVL, 0); + DMA_CCMD_GLOBAL_INVL); iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH, 0); } diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 7246971a7fe..f2b94dafbf3 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -281,8 +281,8 @@ struct ir_table { #endif struct iommu_flush { - int (*flush_context)(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, - u64 type, int non_present_entry_flush); + void (*flush_context)(struct intel_iommu *iommu, u16 did, u16 sid, + u8 fm, u64 type); int (*flush_iotlb)(struct intel_iommu *iommu, u16 did, u64 addr, unsigned int size_order, u64 type, int non_present_entry_flush); }; @@ -339,8 +339,8 @@ extern void dmar_disable_qi(struct intel_iommu *iommu); extern int dmar_reenable_qi(struct intel_iommu *iommu); extern void qi_global_iec(struct intel_iommu *iommu); -extern int qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, - u8 fm, u64 type, int non_present_entry_flush); +extern void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, + u8 fm, u64 type); extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, unsigned int size_order, u64 type, int non_present_entry_flush); -- cgit v1.2.3-70-g09d2 From 1f0ef2aa18802a8ce7eb5a5164aaaf4d59073801 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 10 May 2009 19:58:49 +0100 Subject: intel-iommu: Clean up handling of "caching mode" vs. IOTLB flushing. As we just did for context cache flushing, clean up the logic around whether we need to flush the iotlb or just the write-buffer, depending on caching mode. Fix the same bug in qi_flush_iotlb() that qi_flush_context() had -- it isn't supposed to be returning an error; it's supposed to be returning a flag which triggers a write-buffer flush. Remove some superfluous conditional write-buffer flushes which could never have happened because they weren't for non-present-to-present mapping changes anyway. Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 14 ++------ drivers/pci/intel-iommu.c | 78 +++++++++++++++++---------------------------- include/linux/intel-iommu.h | 9 +++--- 3 files changed, 37 insertions(+), 64 deletions(-) (limited to 'include') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 10a071ba323..df6af0d4ec0 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -735,22 +735,14 @@ void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, qi_submit_sync(&desc, iommu); } -int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, - unsigned int size_order, u64 type, - int non_present_entry_flush) +void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, + unsigned int size_order, u64 type) { u8 dw = 0, dr = 0; struct qi_desc desc; int ih = 0; - if (non_present_entry_flush) { - if (!cap_caching_mode(iommu->cap)) - return 1; - else - did = 0; - } - if (cap_write_drain(iommu->cap)) dw = 1; @@ -762,7 +754,7 @@ int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih) | QI_IOTLB_AM(size_order); - return qi_submit_sync(&desc, iommu); + qi_submit_sync(&desc, iommu); } /* diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 9f5d9151edc..f47d04aced8 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -891,27 +891,13 @@ static void __iommu_flush_context(struct intel_iommu *iommu, } /* return value determine if we need a write buffer flush */ -static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, - u64 addr, unsigned int size_order, u64 type, - int non_present_entry_flush) +static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, + u64 addr, unsigned int size_order, u64 type) { int tlb_offset = ecap_iotlb_offset(iommu->ecap); u64 val = 0, val_iva = 0; unsigned long flag; - /* - * In the non-present entry flush case, if hardware doesn't cache - * non-present entry we do nothing and if hardware cache non-present - * entry, we flush entries of domain 0 (the domain id is used to cache - * any non-present entries) - */ - if (non_present_entry_flush) { - if (!cap_caching_mode(iommu->cap)) - return 1; - else - did = 0; - } - switch (type) { case DMA_TLB_GLOBAL_FLUSH: /* global flush doesn't need set IVA_REG */ @@ -959,12 +945,10 @@ static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n", (unsigned long long)DMA_TLB_IIRG(type), (unsigned long long)DMA_TLB_IAIG(val)); - /* flush iotlb entry will implicitly flush write buffer */ - return 0; } -static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, - u64 addr, unsigned int pages, int non_present_entry_flush) +static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, + u64 addr, unsigned int pages) { unsigned int mask; @@ -974,8 +958,7 @@ static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, /* Fallback to domain selective flush if no PSI support */ if (!cap_pgsel_inv(iommu->cap)) return iommu->flush.flush_iotlb(iommu, did, 0, 0, - DMA_TLB_DSI_FLUSH, - non_present_entry_flush); + DMA_TLB_DSI_FLUSH); /* * PSI requires page size to be 2 ^ x, and the base address is naturally @@ -985,11 +968,10 @@ static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, /* Fallback to domain selective flush if size is too big */ if (mask > cap_max_amask_val(iommu->cap)) return iommu->flush.flush_iotlb(iommu, did, 0, 0, - DMA_TLB_DSI_FLUSH, non_present_entry_flush); + DMA_TLB_DSI_FLUSH); return iommu->flush.flush_iotlb(iommu, did, addr, mask, - DMA_TLB_PSI_FLUSH, - non_present_entry_flush); + DMA_TLB_PSI_FLUSH); } static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) @@ -1423,7 +1405,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment, (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL); - iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0); + iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH); } else { iommu_flush_write_buffer(iommu); } @@ -1558,8 +1540,7 @@ static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn) clear_context_table(iommu, bus, devfn); iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); - iommu->flush.flush_iotlb(iommu, 0, 0, 0, - DMA_TLB_GLOBAL_FLUSH, 0); + iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); } static void domain_remove_dev_info(struct dmar_domain *domain) @@ -2096,8 +2077,7 @@ static int __init init_dmars(void) iommu_set_root_entry(iommu); iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); - iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH, - 0); + iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); iommu_disable_protect_mem_regions(iommu); ret = iommu_enable_translation(iommu); @@ -2244,10 +2224,11 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, if (ret) goto error; - /* it's a non-present to present mapping */ - ret = iommu_flush_iotlb_psi(iommu, domain->id, - start_paddr, size >> VTD_PAGE_SHIFT, 1); - if (ret) + /* it's a non-present to present mapping. Only flush if caching mode */ + if (cap_caching_mode(iommu->cap)) + iommu_flush_iotlb_psi(iommu, 0, start_paddr, + size >> VTD_PAGE_SHIFT); + else iommu_flush_write_buffer(iommu); return start_paddr + ((u64)paddr & (~PAGE_MASK)); @@ -2283,7 +2264,7 @@ static void flush_unmaps(void) if (deferred_flush[i].next) { iommu->flush.flush_iotlb(iommu, 0, 0, 0, - DMA_TLB_GLOBAL_FLUSH, 0); + DMA_TLB_GLOBAL_FLUSH); for (j = 0; j < deferred_flush[i].next; j++) { __free_iova(&deferred_flush[i].domain[j]->iovad, deferred_flush[i].iova[j]); @@ -2362,9 +2343,8 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, /* free page tables */ dma_pte_free_pagetable(domain, start_addr, start_addr + size); if (intel_iommu_strict) { - if (iommu_flush_iotlb_psi(iommu, - domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0)) - iommu_flush_write_buffer(iommu); + iommu_flush_iotlb_psi(iommu, domain->id, start_addr, + size >> VTD_PAGE_SHIFT); /* free iova */ __free_iova(&domain->iovad, iova); } else { @@ -2455,9 +2435,8 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, /* free page tables */ dma_pte_free_pagetable(domain, start_addr, start_addr + size); - if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr, - size >> VTD_PAGE_SHIFT, 0)) - iommu_flush_write_buffer(iommu); + iommu_flush_iotlb_psi(iommu, domain->id, start_addr, + size >> VTD_PAGE_SHIFT); /* free iova */ __free_iova(&domain->iovad, iova); @@ -2549,10 +2528,13 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne offset += size; } - /* it's a non-present to present mapping */ - if (iommu_flush_iotlb_psi(iommu, domain->id, - start_addr, offset >> VTD_PAGE_SHIFT, 1)) + /* it's a non-present to present mapping. Only flush if caching mode */ + if (cap_caching_mode(iommu->cap)) + iommu_flush_iotlb_psi(iommu, 0, start_addr, + offset >> VTD_PAGE_SHIFT); + else iommu_flush_write_buffer(iommu); + return nelems; } @@ -2711,9 +2693,9 @@ static int init_iommu_hw(void) iommu_set_root_entry(iommu); iommu->flush.flush_context(iommu, 0, 0, 0, - DMA_CCMD_GLOBAL_INVL); + DMA_CCMD_GLOBAL_INVL); iommu->flush.flush_iotlb(iommu, 0, 0, 0, - DMA_TLB_GLOBAL_FLUSH, 0); + DMA_TLB_GLOBAL_FLUSH); iommu_disable_protect_mem_regions(iommu); iommu_enable_translation(iommu); } @@ -2728,9 +2710,9 @@ static void iommu_flush_all(void) for_each_active_iommu(iommu, drhd) { iommu->flush.flush_context(iommu, 0, 0, 0, - DMA_CCMD_GLOBAL_INVL); + DMA_CCMD_GLOBAL_INVL); iommu->flush.flush_iotlb(iommu, 0, 0, 0, - DMA_TLB_GLOBAL_FLUSH, 0); + DMA_TLB_GLOBAL_FLUSH); } } diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index f2b94dafbf3..29e05a034c0 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -283,8 +283,8 @@ struct ir_table { struct iommu_flush { void (*flush_context)(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, u64 type); - int (*flush_iotlb)(struct intel_iommu *iommu, u16 did, u64 addr, - unsigned int size_order, u64 type, int non_present_entry_flush); + void (*flush_iotlb)(struct intel_iommu *iommu, u16 did, u64 addr, + unsigned int size_order, u64 type); }; enum { @@ -341,9 +341,8 @@ extern void qi_global_iec(struct intel_iommu *iommu); extern void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, u64 type); -extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, - unsigned int size_order, u64 type, - int non_present_entry_flush); +extern void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, + unsigned int size_order, u64 type); extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); -- cgit v1.2.3-70-g09d2 From 5e751e992f3fb08ba35e1ca8095ec8fbf9eda523 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 8 May 2009 13:55:22 +0100 Subject: CRED: Rename cred_exec_mutex to reflect that it's a guard against ptrace Rename cred_exec_mutex to reflect that it's a guard against foreign intervention on a process's credential state, such as is made by ptrace(). The attachment of a debugger to a process affects execve()'s calculation of the new credential state - _and_ also setprocattr()'s calculation of that state. Signed-off-by: David Howells Signed-off-by: James Morris --- fs/compat.c | 6 +++--- fs/exec.c | 10 +++++----- include/linux/init_task.h | 4 ++-- include/linux/sched.h | 4 +++- kernel/cred.c | 4 ++-- kernel/ptrace.c | 9 +++++---- 6 files changed, 20 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/fs/compat.c b/fs/compat.c index 681ed81e6be..bb2a9b2e817 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1488,7 +1488,7 @@ int compat_do_execve(char * filename, if (!bprm) goto out_files; - retval = mutex_lock_interruptible(¤t->cred_exec_mutex); + retval = mutex_lock_interruptible(¤t->cred_guard_mutex); if (retval < 0) goto out_free; current->in_execve = 1; @@ -1550,7 +1550,7 @@ int compat_do_execve(char * filename, /* execve succeeded */ current->fs->in_exec = 0; current->in_execve = 0; - mutex_unlock(¤t->cred_exec_mutex); + mutex_unlock(¤t->cred_guard_mutex); acct_update_integrals(current); free_bprm(bprm); if (displaced) @@ -1573,7 +1573,7 @@ out_unmark: out_unlock: current->in_execve = 0; - mutex_unlock(¤t->cred_exec_mutex); + mutex_unlock(¤t->cred_guard_mutex); out_free: free_bprm(bprm); diff --git a/fs/exec.c b/fs/exec.c index 639177b0eea..998e856c307 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1045,7 +1045,7 @@ void install_exec_creds(struct linux_binprm *bprm) commit_creds(bprm->cred); bprm->cred = NULL; - /* cred_exec_mutex must be held at least to this point to prevent + /* cred_guard_mutex must be held at least to this point to prevent * ptrace_attach() from altering our determination of the task's * credentials; any time after this it may be unlocked */ @@ -1055,7 +1055,7 @@ EXPORT_SYMBOL(install_exec_creds); /* * determine how safe it is to execute the proposed program - * - the caller must hold current->cred_exec_mutex to protect against + * - the caller must hold current->cred_guard_mutex to protect against * PTRACE_ATTACH */ int check_unsafe_exec(struct linux_binprm *bprm) @@ -1297,7 +1297,7 @@ int do_execve(char * filename, if (!bprm) goto out_files; - retval = mutex_lock_interruptible(¤t->cred_exec_mutex); + retval = mutex_lock_interruptible(¤t->cred_guard_mutex); if (retval < 0) goto out_free; current->in_execve = 1; @@ -1360,7 +1360,7 @@ int do_execve(char * filename, /* execve succeeded */ current->fs->in_exec = 0; current->in_execve = 0; - mutex_unlock(¤t->cred_exec_mutex); + mutex_unlock(¤t->cred_guard_mutex); acct_update_integrals(current); free_bprm(bprm); if (displaced) @@ -1383,7 +1383,7 @@ out_unmark: out_unlock: current->in_execve = 0; - mutex_unlock(¤t->cred_exec_mutex); + mutex_unlock(¤t->cred_guard_mutex); out_free: free_bprm(bprm); diff --git a/include/linux/init_task.h b/include/linux/init_task.h index d87247d2641..7f54ba94242 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -145,8 +145,8 @@ extern struct cred init_cred; .group_leader = &tsk, \ .real_cred = &init_cred, \ .cred = &init_cred, \ - .cred_exec_mutex = \ - __MUTEX_INITIALIZER(tsk.cred_exec_mutex), \ + .cred_guard_mutex = \ + __MUTEX_INITIALIZER(tsk.cred_guard_mutex), \ .comm = "swapper", \ .thread = INIT_THREAD, \ .fs = &init_fs, \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 3fa82b353c9..5932ace2240 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1247,7 +1247,9 @@ struct task_struct { * credentials (COW) */ const struct cred *cred; /* effective (overridable) subjective task * credentials (COW) */ - struct mutex cred_exec_mutex; /* execve vs ptrace cred calculation mutex */ + struct mutex cred_guard_mutex; /* guard against foreign influences on + * credential calculations + * (notably. ptrace) */ char comm[TASK_COMM_LEN]; /* executable name excluding path - access with [gs]et_task_comm (which lock diff --git a/kernel/cred.c b/kernel/cred.c index 3a039189d70..1bb4d7e5d61 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -167,7 +167,7 @@ EXPORT_SYMBOL(prepare_creds); /* * Prepare credentials for current to perform an execve() - * - The caller must hold current->cred_exec_mutex + * - The caller must hold current->cred_guard_mutex */ struct cred *prepare_exec_creds(void) { @@ -276,7 +276,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) struct cred *new; int ret; - mutex_init(&p->cred_exec_mutex); + mutex_init(&p->cred_guard_mutex); if ( #ifdef CONFIG_KEYS diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 0692ab5a0d6..27ac80298bf 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -185,10 +185,11 @@ int ptrace_attach(struct task_struct *task) if (same_thread_group(task, current)) goto out; - /* Protect exec's credential calculations against our interference; - * SUID, SGID and LSM creds get determined differently under ptrace. + /* Protect the target's credential calculations against our + * interference; SUID, SGID and LSM creds get determined differently + * under ptrace. */ - retval = mutex_lock_interruptible(&task->cred_exec_mutex); + retval = mutex_lock_interruptible(&task->cred_guard_mutex); if (retval < 0) goto out; @@ -232,7 +233,7 @@ repeat: bad: write_unlock_irqrestore(&tasklist_lock, flags); task_unlock(task); - mutex_unlock(&task->cred_exec_mutex); + mutex_unlock(&task->cred_guard_mutex); out: return retval; } -- cgit v1.2.3-70-g09d2 From 7303f240981888884412a97ac742772527356880 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Mon, 11 May 2009 09:59:34 +0300 Subject: slob: use PG_slab for identifying SLOB pages For the sake of consistency. Signed-off-by: Wu Fengguang Cc: KOSAKI Motohiro Cc: Andi Kleen Acked-by: Matt Mackall Cc: Alexey Dobriyan Cc: Ingo Molnar Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Pekka Enberg --- include/linux/page-flags.h | 2 -- mm/slob.c | 6 +++--- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 62214c7d2d9..f036dfbdad5 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -120,7 +120,6 @@ enum pageflags { PG_savepinned = PG_dirty, /* SLOB */ - PG_slob_page = PG_active, PG_slob_free = PG_private, /* SLUB */ @@ -203,7 +202,6 @@ PAGEFLAG(SavePinned, savepinned); /* Xen */ PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved) PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked) -__PAGEFLAG(SlobPage, slob_page) __PAGEFLAG(SlobFree, slob_free) __PAGEFLAG(SlubFrozen, slub_frozen) diff --git a/mm/slob.c b/mm/slob.c index a2d4ab32198..aad9dad2e82 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -131,17 +131,17 @@ static LIST_HEAD(free_slob_large); */ static inline int is_slob_page(struct slob_page *sp) { - return PageSlobPage((struct page *)sp); + return PageSlab((struct page *)sp); } static inline void set_slob_page(struct slob_page *sp) { - __SetPageSlobPage((struct page *)sp); + __SetPageSlab((struct page *)sp); } static inline void clear_slob_page(struct slob_page *sp) { - __ClearPageSlobPage((struct page *)sp); + __ClearPageSlab((struct page *)sp); } static inline struct slob_page *slob_page(const void *addr) -- cgit v1.2.3-70-g09d2 From c3a4d78c580de4edc9ef0f7c59812fb02ceb037f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 7 May 2009 22:24:37 +0900 Subject: block: add rq->resid_len rq->data_len served two purposes - the length of data buffer on issue and the residual count on completion. This duality creates some headaches. First of all, block layer and low level drivers can't really determine what rq->data_len contains while a request is executing. It could be the total request length or it coulde be anything else one of the lower layers is using to keep track of residual count. This complicates things because blk_rq_bytes() and thus [__]blk_end_request_all() relies on rq->data_len for PC commands. Drivers which want to report residual count should first cache the total request length, update rq->data_len and then complete the request with the cached data length. Secondly, it makes requests default to reporting full residual count, ie. reporting that no data transfer occurred. The residual count is an exception not the norm; however, the driver should clear rq->data_len to zero to signify the normal cases while leaving it alone means no data transfer occurred at all. This reverse default behavior complicates code unnecessarily and renders block PC on some drivers (ide-tape/floppy) unuseable. This patch adds rq->resid_len which is used only for residual count. While at it, remove now unnecessasry blk_rq_bytes() caching in ide_pc_intr() as rq->data_len is not changed anymore. Boaz : spotted missing conversion in osd Sergei : spotted too early conversion to blk_rq_bytes() in ide-tape [ Impact: cleanup residual count handling, report 0 resid by default ] Signed-off-by: Tejun Heo Cc: James Bottomley Cc: Bartlomiej Zolnierkiewicz Cc: Borislav Petkov Cc: Sergei Shtylyov Cc: Mike Miller Cc: Eric Moore Cc: Alan Stern Cc: FUJITA Tomonori Cc: Doug Gilbert Cc: Mike Miller Cc: Eric Moore Cc: Darrick J. Wong Cc: Pete Zaitcev Cc: Boaz Harrosh Signed-off-by: Jens Axboe --- block/bsg.c | 8 +++---- block/scsi_ioctl.c | 2 +- drivers/block/cciss.c | 13 ++++------- drivers/block/ub.c | 6 ++--- drivers/ide/ide-atapi.c | 9 +------- drivers/ide/ide-cd.c | 13 +++++------ drivers/ide/ide-tape.c | 4 ++-- drivers/message/fusion/mptsas.c | 3 +-- drivers/scsi/libsas/sas_expander.c | 6 +---- drivers/scsi/libsas/sas_host_smp.c | 38 +++++++++++++++++--------------- drivers/scsi/mpt2sas/mpt2sas_transport.c | 4 +--- drivers/scsi/scsi_lib.c | 24 ++++++++++---------- drivers/scsi/sg.c | 2 +- drivers/scsi/st.c | 2 +- fs/exofs/osd.c | 4 ++-- include/linux/blkdev.h | 1 + 16 files changed, 59 insertions(+), 80 deletions(-) (limited to 'include') diff --git a/block/bsg.c b/block/bsg.c index 206060e795d..2d746e34f4c 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -445,14 +445,14 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr, } if (rq->next_rq) { - hdr->dout_resid = rq->data_len; - hdr->din_resid = rq->next_rq->data_len; + hdr->dout_resid = rq->resid_len; + hdr->din_resid = rq->next_rq->resid_len; blk_rq_unmap_user(bidi_bio); blk_put_request(rq->next_rq); } else if (rq_data_dir(rq) == READ) - hdr->din_resid = rq->data_len; + hdr->din_resid = rq->resid_len; else - hdr->dout_resid = rq->data_len; + hdr->dout_resid = rq->resid_len; /* * If the request generated a negative error number, return it diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 58cf4560f74..a9670dd4b5d 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -230,7 +230,7 @@ static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr, hdr->info = 0; if (hdr->masked_status || hdr->host_status || hdr->driver_status) hdr->info |= SG_INFO_CHECK; - hdr->resid = rq->data_len; + hdr->resid = rq->resid_len; hdr->sb_len_wr = 0; if (rq->sense_len && hdr->sbp) { diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 4d4d5e0d3fa..f22d4932433 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -1299,7 +1299,6 @@ static void cciss_softirq_done(struct request *rq) { CommandList_struct *cmd = rq->completion_data; ctlr_info_t *h = hba[cmd->ctlr]; - unsigned int nr_bytes; unsigned long flags; u64bit temp64; int i, ddir; @@ -1321,15 +1320,11 @@ static void cciss_softirq_done(struct request *rq) printk("Done with %p\n", rq); #endif /* CCISS_DEBUG */ - /* - * Store the full size and set the residual count for pc requests - */ - nr_bytes = blk_rq_bytes(rq); + /* set the residual count for pc requests */ if (blk_pc_request(rq)) - rq->data_len = cmd->err_info->ResidualCnt; + rq->resid_len = cmd->err_info->ResidualCnt; - if (blk_end_request(rq, (rq->errors == 0) ? 0 : -EIO, nr_bytes)) - BUG(); + blk_end_request_all(rq, (rq->errors == 0) ? 0 : -EIO); spin_lock_irqsave(&h->lock, flags); cmd_free(h, cmd, 1); @@ -2691,7 +2686,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, printk(KERN_WARNING "cciss: cmd %p has" " completed with data underrun " "reported\n", cmd); - cmd->rq->data_len = cmd->err_info->ResidualCnt; + cmd->rq->resid_len = cmd->err_info->ResidualCnt; } break; case CMD_DATA_OVERRUN: diff --git a/drivers/block/ub.c b/drivers/block/ub.c index 689cd27ac89..8c2cc71327e 100644 --- a/drivers/block/ub.c +++ b/drivers/block/ub.c @@ -783,10 +783,8 @@ static void ub_rw_cmd_done(struct ub_dev *sc, struct ub_scsi_cmd *cmd) if (cmd->error == 0) { if (blk_pc_request(rq)) { - if (cmd->act_len >= rq->data_len) - rq->data_len = 0; - else - rq->data_len -= cmd->act_len; + if (cmd->act_len < rq->data_len) + rq->resid_len = rq->data_len - cmd->act_len; scsi_status = 0; } else { if (cmd->act_len != cmd->len) { diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index afe5a432387..e4a02a05fc8 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -367,7 +367,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) /* No more interrupts */ if ((stat & ATA_DRQ) == 0) { int uptodate, error; - unsigned int done; debug_log("Packet command completed, %d bytes transferred\n", pc->xferred); @@ -406,12 +405,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) if ((pc->flags & PC_FLAG_WAIT_FOR_DSC) && (stat & ATA_DSC) == 0) dsc = 1; - /* - * ->pc_callback() might change rq->data_len for - * residual count, cache total length. - */ - done = blk_rq_bytes(rq); - /* Command finished - Call the callback function */ uptodate = drive->pc_callback(drive, dsc); @@ -431,7 +424,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) error = uptodate ? 0 : -EIO; } - ide_complete_rq(drive, error, done); + ide_complete_rq(drive, error, blk_rq_bytes(rq)); return ide_stopped; } diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 673628790f1..8bbe222c5e4 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -519,7 +519,7 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd, error = blk_execute_rq(drive->queue, info->disk, rq, 0); if (buffer) - *bufflen = rq->data_len; + *bufflen = rq->resid_len; flags = rq->cmd_flags; blk_put_request(rq); @@ -707,11 +707,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) out_end: if (blk_pc_request(rq) && rc == 0) { - unsigned int dlen = rq->data_len; - - rq->data_len = 0; - - if (blk_end_request(rq, 0, dlen)) + if (blk_end_request(rq, 0, rq->data_len)) BUG(); hwif->rq = NULL; @@ -740,9 +736,10 @@ out_end: nsectors = 1; if (blk_fs_request(rq) == 0) { - rq->data_len -= (cmd->nbytes - cmd->nleft); + rq->resid_len = rq->data_len - + (cmd->nbytes - cmd->nleft); if (uptodate == 0 && (cmd->tf_flags & IDE_TFLAG_WRITE)) - rq->data_len += cmd->last_xfer_len; + rq->resid_len += cmd->last_xfer_len; } ide_complete_rq(drive, uptodate ? 0 : -EIO, nsectors << 9); diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 7149224d1fe..65c5b705883 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -380,7 +380,7 @@ static int ide_tape_callback(ide_drive_t *drive, int dsc) } tape->first_frame += blocks; - rq->data_len -= blocks * tape->blk_size; + rq->resid_len = rq->data_len - blocks * tape->blk_size; if (pc->error) { uptodate = 0; @@ -903,7 +903,7 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size) blk_execute_rq(drive->queue, tape->disk, rq, 0); /* calculate the number of transferred bytes and update buffer state */ - size -= rq->data_len; + size -= rq->resid_len; tape->cur = tape->buf; if (cmd == REQ_IDETAPE_READ) tape->valid = size; diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c index a9019f081b9..5d5f34715de 100644 --- a/drivers/message/fusion/mptsas.c +++ b/drivers/message/fusion/mptsas.c @@ -1357,8 +1357,7 @@ static int mptsas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, smprep = (SmpPassthroughReply_t *)ioc->sas_mgmt.reply; memcpy(req->sense, smprep, sizeof(*smprep)); req->sense_len = sizeof(*smprep); - req->data_len = 0; - rsp->data_len -= smprep->ResponseDataLength; + rsp->resid_len = rsp->data_len - smprep->ResponseDataLength; } else { printk(MYIOC_s_ERR_FMT "%s: smp passthru reply failed to be returned\n", ioc->name, __func__); diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 3da02e43678..6605ec905cc 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -1936,12 +1936,8 @@ int sas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, bio_data(rsp->bio), rsp->data_len); if (ret > 0) { /* positive number is the untransferred residual */ - rsp->data_len = ret; - req->data_len = 0; + rsp->resid_len = ret; ret = 0; - } else if (ret == 0) { - rsp->data_len = 0; - req->data_len = 0; } return ret; diff --git a/drivers/scsi/libsas/sas_host_smp.c b/drivers/scsi/libsas/sas_host_smp.c index d110a366c48..89952edd0be 100644 --- a/drivers/scsi/libsas/sas_host_smp.c +++ b/drivers/scsi/libsas/sas_host_smp.c @@ -134,7 +134,7 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req, { u8 *req_data = NULL, *resp_data = NULL, *buf; struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(shost); - int error = -EINVAL, resp_data_len = rsp->data_len; + int error = -EINVAL; /* eight is the minimum size for request and response frames */ if (req->data_len < 8 || rsp->data_len < 8) @@ -176,17 +176,20 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req, resp_data[1] = req_data[1]; resp_data[2] = SMP_RESP_FUNC_UNK; + req->resid_len = req->data_len; + rsp->resid_len = rsp->data_len; + switch (req_data[1]) { case SMP_REPORT_GENERAL: - req->data_len -= 8; - resp_data_len -= 32; + req->resid_len -= 8; + rsp->resid_len -= 32; resp_data[2] = SMP_RESP_FUNC_ACC; resp_data[9] = sas_ha->num_phys; break; case SMP_REPORT_MANUF_INFO: - req->data_len -= 8; - resp_data_len -= 64; + req->resid_len -= 8; + rsp->resid_len -= 64; resp_data[2] = SMP_RESP_FUNC_ACC; memcpy(resp_data + 12, shost->hostt->name, SAS_EXPANDER_VENDOR_ID_LEN); @@ -199,13 +202,13 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req, break; case SMP_DISCOVER: - req->data_len -= 16; - if ((int)req->data_len < 0) { - req->data_len = 0; + req->resid_len -= 16; + if ((int)req->resid_len < 0) { + req->resid_len = 0; error = -EINVAL; goto out; } - resp_data_len -= 56; + rsp->resid_len -= 56; sas_host_smp_discover(sas_ha, resp_data, req_data[9]); break; @@ -215,13 +218,13 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req, break; case SMP_REPORT_PHY_SATA: - req->data_len -= 16; - if ((int)req->data_len < 0) { - req->data_len = 0; + req->resid_len -= 16; + if ((int)req->resid_len < 0) { + req->resid_len = 0; error = -EINVAL; goto out; } - resp_data_len -= 60; + rsp->resid_len -= 60; sas_report_phy_sata(sas_ha, resp_data, req_data[9]); break; @@ -238,13 +241,13 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req, break; case SMP_PHY_CONTROL: - req->data_len -= 44; - if ((int)req->data_len < 0) { - req->data_len = 0; + req->resid_len -= 44; + if ((int)req->resid_len < 0) { + req->resid_len = 0; error = -EINVAL; goto out; } - resp_data_len -= 8; + rsp->resid_len -= 8; sas_phy_control(sas_ha, req_data[9], req_data[10], req_data[32] >> 4, req_data[33] >> 4, resp_data); @@ -265,7 +268,6 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req, flush_kernel_dcache_page(bio_page(rsp->bio)); kunmap_atomic(buf - bio_offset(rsp->bio), KM_USER0); local_irq_enable(); - rsp->data_len = resp_data_len; out: kfree(req_data); diff --git a/drivers/scsi/mpt2sas/mpt2sas_transport.c b/drivers/scsi/mpt2sas/mpt2sas_transport.c index e03dc0b1e1a..53759c566bf 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_transport.c +++ b/drivers/scsi/mpt2sas/mpt2sas_transport.c @@ -1170,9 +1170,7 @@ transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, memcpy(req->sense, mpi_reply, sizeof(*mpi_reply)); req->sense_len = sizeof(*mpi_reply); - req->data_len = 0; - rsp->data_len -= mpi_reply->ResponseDataLength; - + rsp->resid_len = rsp->data_len - mpi_reply->ResponseDataLength; } else { dtransportprintk(ioc, printk(MPT2SAS_DEBUG_FMT "%s - no reply\n", ioc->name, __func__)); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index aa9fc572e45..7d49ef589f3 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -240,11 +240,11 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, * is invalid. Prevent the garbage from being misinterpreted * and prevent security leaks by zeroing out the excess data. */ - if (unlikely(req->data_len > 0 && req->data_len <= bufflen)) - memset(buffer + (bufflen - req->data_len), 0, req->data_len); + if (unlikely(req->resid_len > 0 && req->resid_len <= bufflen)) + memset(buffer + (bufflen - req->resid_len), 0, req->resid_len); if (resid) - *resid = req->data_len; + *resid = req->resid_len; ret = req->errors; out: blk_put_request(req); @@ -549,7 +549,7 @@ static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int error, int leftover = (req->hard_nr_sectors << 9); if (blk_pc_request(req)) - leftover = req->data_len; + leftover = req->resid_len; /* kill remainder if no retrys */ if (error && scsi_noretry_cmd(cmd)) @@ -673,11 +673,11 @@ void scsi_release_buffers(struct scsi_cmnd *cmd) EXPORT_SYMBOL(scsi_release_buffers); /* - * Bidi commands Must be complete as a whole, both sides at once. - * If part of the bytes were written and lld returned - * scsi_in()->resid and/or scsi_out()->resid this information will be left - * in req->data_len and req->next_rq->data_len. The upper-layer driver can - * decide what to do with this information. + * Bidi commands Must be complete as a whole, both sides at once. If + * part of the bytes were written and lld returned scsi_in()->resid + * and/or scsi_out()->resid this information will be left in + * req->resid_len and req->next_rq->resid_len. The upper-layer driver + * can decide what to do with this information. */ static void scsi_end_bidi_request(struct scsi_cmnd *cmd) { @@ -685,8 +685,8 @@ static void scsi_end_bidi_request(struct scsi_cmnd *cmd) unsigned int dlen = req->data_len; unsigned int next_dlen = req->next_rq->data_len; - req->data_len = scsi_out(cmd)->resid; - req->next_rq->data_len = scsi_in(cmd)->resid; + req->resid_len = scsi_out(cmd)->resid; + req->next_rq->resid_len = scsi_in(cmd)->resid; /* The req and req->next_rq have not been completed */ BUG_ON(blk_end_bidi_request(req, 0, dlen, next_dlen)); @@ -778,7 +778,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) scsi_end_bidi_request(cmd); return; } - req->data_len = scsi_get_resid(cmd); + req->resid_len = scsi_get_resid(cmd); } BUG_ON(blk_bidi_rq(req)); /* bidi not support for !blk_pc_request yet */ diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 82312df9b0b..dec4c70677d 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1260,7 +1260,7 @@ static void sg_rq_end_io(struct request *rq, int uptodate) sense = rq->sense; result = rq->errors; - resid = rq->data_len; + resid = rq->resid_len; SCSI_LOG_TIMEOUT(4, printk("sg_cmd_done: %s, pack_id=%d, res=0x%x\n", sdp->disk->disk_name, srp->header.pack_id, result)); diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index eb24efea8f1..8681b708344 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -463,7 +463,7 @@ static void st_scsi_execute_end(struct request *req, int uptodate) struct scsi_tape *STp = SRpnt->stp; STp->buffer->cmdstat.midlevel_result = SRpnt->result = req->errors; - STp->buffer->cmdstat.residual = req->data_len; + STp->buffer->cmdstat.residual = req->resid_len; if (SRpnt->waiting) complete(SRpnt->waiting); diff --git a/fs/exofs/osd.c b/fs/exofs/osd.c index b249ae97fb1..06ca92672eb 100644 --- a/fs/exofs/osd.c +++ b/fs/exofs/osd.c @@ -50,10 +50,10 @@ int exofs_check_ok_resid(struct osd_request *or, u64 *in_resid, u64 *out_resid) /* FIXME: should be include in osd_sense_info */ if (in_resid) - *in_resid = or->in.req ? or->in.req->data_len : 0; + *in_resid = or->in.req ? or->in.req->resid_len : 0; if (out_resid) - *out_resid = or->out.req ? or->out.req->data_len : 0; + *out_resid = or->out.req ? or->out.req->resid_len : 0; return ret; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3a5b1bd6582..6a967cad89f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -229,6 +229,7 @@ struct request { unsigned int data_len; unsigned int extra_len; /* length of alignment and padding */ unsigned int sense_len; + unsigned int resid_len; /* residual count */ void *sense; unsigned long deadline; -- cgit v1.2.3-70-g09d2 From 5b93629b4509c03ffa87a9316412fedf6f58cb37 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 7 May 2009 22:24:38 +0900 Subject: block: implement blk_rq_pos/[cur_]sectors() and convert obvious ones Implement accessors - blk_rq_pos(), blk_rq_sectors() and blk_rq_cur_sectors() which return rq->hard_sector, rq->hard_nr_sectors and rq->hard_cur_sectors respectively and convert direct references of the said fields to the accessors. This is in preparation of request data length handling cleanup. Geert : suggested adding const to struct request * parameter to accessors Sergei : spotted error in patch description [ Impact: cleanup ] Signed-off-by: Tejun Heo Acked-by: Geert Uytterhoeven Acked-by: Stephen Rothwell Tested-by: Grant Likely Acked-by: Grant Likely Ackec-by: Sergei Shtylyov Cc: Bartlomiej Zolnierkiewicz Cc: Borislav Petkov Cc: James Bottomley Signed-off-by: Jens Axboe --- block/blk-barrier.c | 2 +- block/blk-core.c | 2 +- block/cfq-iosched.c | 2 +- drivers/block/ps3disk.c | 2 +- drivers/block/viodasd.c | 6 +++--- drivers/block/xsysace.c | 10 +++++----- drivers/ide/ide-cd.c | 8 ++++---- drivers/ide/ide-io.c | 4 ++-- drivers/message/i2o/i2o_block.c | 2 +- drivers/scsi/scsi_lib.c | 2 +- include/linux/blkdev.h | 23 ++++++++++++++++++++--- kernel/trace/blktrace.c | 4 ++-- 12 files changed, 42 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index c8d087655ef..c167de5b9ef 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -163,7 +163,7 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp) * For an empty barrier, there's no actual BAR request, which * in turn makes POSTFLUSH unnecessary. Mask them off. */ - if (!rq->hard_nr_sectors) { + if (!blk_rq_sectors(rq)) { q->ordered &= ~(QUEUE_ORDERED_DO_BAR | QUEUE_ORDERED_DO_POSTFLUSH); /* diff --git a/block/blk-core.c b/block/blk-core.c index 394c5bd8127..895e55b74a4 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1683,7 +1683,7 @@ static void blk_account_io_done(struct request *req) unsigned int blk_rq_bytes(struct request *rq) { if (blk_fs_request(rq)) - return rq->hard_nr_sectors << 9; + return blk_rq_sectors(rq) << 9; return rq->data_len; } diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index def0c698a4b..575083a9ffe 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -760,7 +760,7 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq) cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d", cfqd->rq_in_driver); - cfqd->last_position = rq->hard_sector + rq->hard_nr_sectors; + cfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq); } static void cfq_deactivate_request(struct request_queue *q, struct request *rq) diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index f6586e4d351..c2388673684 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -136,7 +136,7 @@ static int ps3disk_submit_request_sg(struct ps3_storage_device *dev, dev_dbg(&dev->sbd.core, "%s:%u: %s req has %u bvecs for %lu sectors %lu hard sectors\n", __func__, __LINE__, op, n, req->nr_sectors, - req->hard_nr_sectors); + blk_rq_sectors(req)); #endif start_sector = req->sector * priv->blocking_factor; diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c index ecccf65dce2..e821eed7132 100644 --- a/drivers/block/viodasd.c +++ b/drivers/block/viodasd.c @@ -368,12 +368,12 @@ static void do_viodasd_request(struct request_queue *q) blkdev_dequeue_request(req); /* check that request contains a valid command */ if (!blk_fs_request(req)) { - viodasd_end_request(req, -EIO, req->hard_nr_sectors); + viodasd_end_request(req, -EIO, blk_rq_sectors(req)); continue; } /* Try sending the request */ if (send_request(req) != 0) - viodasd_end_request(req, -EIO, req->hard_nr_sectors); + viodasd_end_request(req, -EIO, blk_rq_sectors(req)); } } @@ -590,7 +590,7 @@ static int viodasd_handle_read_write(struct vioblocklpevent *bevent) err = vio_lookup_rc(viodasd_err_table, bevent->sub_result); printk(VIOD_KERN_WARNING "read/write error %d:0x%04x (%s)\n", event->xRc, bevent->sub_result, err->msg); - num_sect = req->hard_nr_sectors; + num_sect = blk_rq_sectors(req); } qlock = req->q->queue_lock; spin_lock_irqsave(qlock, irq_flags); diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c index b1e1d7e5ab1..5722931d14c 100644 --- a/drivers/block/xsysace.c +++ b/drivers/block/xsysace.c @@ -645,8 +645,8 @@ static void ace_fsm_dostate(struct ace_device *ace) /* Okay, it's a data request, set it up for transfer */ dev_dbg(ace->dev, - "request: sec=%llx hcnt=%lx, ccnt=%x, dir=%i\n", - (unsigned long long) req->sector, req->hard_nr_sectors, + "request: sec=%llx hcnt=%x, ccnt=%x, dir=%i\n", + (unsigned long long) req->sector, blk_rq_sectors(req), req->current_nr_sectors, rq_data_dir(req)); ace->req = req; @@ -654,7 +654,7 @@ static void ace_fsm_dostate(struct ace_device *ace) ace->data_count = req->current_nr_sectors * ACE_BUF_PER_SECTOR; ace_out32(ace, ACE_MPULBA, req->sector & 0x0FFFFFFF); - count = req->hard_nr_sectors; + count = blk_rq_sectors(req); if (rq_data_dir(req)) { /* Kick off write request */ dev_dbg(ace->dev, "write data\n"); @@ -719,8 +719,8 @@ static void ace_fsm_dostate(struct ace_device *ace) /* bio finished; is there another one? */ if (__blk_end_request(ace->req, 0, blk_rq_cur_bytes(ace->req))) { - /* dev_dbg(ace->dev, "next block; h=%li c=%i\n", - * ace->req->hard_nr_sectors, + /* dev_dbg(ace->dev, "next block; h=%u c=%u\n", + * blk_rq_sectors(ace->req), * ace->req->current_nr_sectors); */ ace->data_ptr = ace->req->buffer; diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 8bbe222c5e4..182320dd6ea 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -730,7 +730,7 @@ out_end: if (blk_pc_request(rq)) nsectors = (rq->data_len + 511) >> 9; else - nsectors = rq->hard_nr_sectors; + nsectors = blk_rq_sectors(rq); if (nsectors == 0) nsectors = 1; @@ -875,7 +875,7 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq, return ide_issue_pc(drive, &cmd); out_end: - nsectors = rq->hard_nr_sectors; + nsectors = blk_rq_sectors(rq); if (nsectors == 0) nsectors = 1; @@ -1359,8 +1359,8 @@ static int ide_cdrom_probe_capabilities(ide_drive_t *drive) static int ide_cdrom_prep_fs(struct request_queue *q, struct request *rq) { int hard_sect = queue_hardsect_size(q); - long block = (long)rq->hard_sector / (hard_sect >> 9); - unsigned long blocks = rq->hard_nr_sectors / (hard_sect >> 9); + long block = (long)blk_rq_pos(rq) / (hard_sect >> 9); + unsigned long blocks = blk_rq_sectors(rq) / (hard_sect >> 9); memset(rq->cmd, 0, BLK_MAX_CDB); diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index a0309ea661a..df23bcbd94b 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -118,7 +118,7 @@ unsigned int ide_rq_bytes(struct request *rq) if (blk_pc_request(rq)) return rq->data_len; else - return rq->hard_cur_sectors << 9; + return blk_rq_cur_sectors(rq) << 9; } EXPORT_SYMBOL_GPL(ide_rq_bytes); @@ -133,7 +133,7 @@ int ide_complete_rq(ide_drive_t *drive, int error, unsigned int nr_bytes) * and complete the whole request right now */ if (blk_noretry_request(rq) && error <= 0) - nr_bytes = rq->hard_nr_sectors << 9; + nr_bytes = blk_rq_sectors(rq) << 9; rc = ide_end_rq(drive, rq, error, nr_bytes); if (rc == 0) diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c index 221317e6a00..56e60f0d531 100644 --- a/drivers/message/i2o/i2o_block.c +++ b/drivers/message/i2o/i2o_block.c @@ -427,7 +427,7 @@ static void i2o_block_end_request(struct request *req, int error, unsigned long flags; if (blk_end_request(req, error, nr_bytes)) { - int leftover = (req->hard_nr_sectors << KERNEL_SECTOR_SHIFT); + int leftover = (blk_rq_sectors(req) << KERNEL_SECTOR_SHIFT); if (blk_pc_request(req)) leftover = req->data_len; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 7d49ef589f3..9ff0ca9988a 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -546,7 +546,7 @@ static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int error, * to queue the remainder of them. */ if (blk_end_request(req, error, bytes)) { - int leftover = (req->hard_nr_sectors << 9); + int leftover = blk_rq_sectors(req) << 9; if (blk_pc_request(req)) leftover = req->resid_len; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6a967cad89f..4e5f8559872 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -832,13 +832,30 @@ static inline void blk_run_address_space(struct address_space *mapping) extern void blkdev_dequeue_request(struct request *req); /* - * blk_end_request() takes bytes instead of sectors as a complete size. - * blk_rq_bytes() returns bytes left to complete in the entire request. - * blk_rq_cur_bytes() returns bytes left to complete in the current segment. + * blk_rq_pos() : the current sector + * blk_rq_bytes() : bytes left in the entire request + * blk_rq_cur_bytes() : bytes left in the current segment + * blk_rq_sectors() : sectors left in the entire request + * blk_rq_cur_sectors() : sectors left in the current segment */ +static inline sector_t blk_rq_pos(const struct request *rq) +{ + return rq->hard_sector; +} + extern unsigned int blk_rq_bytes(struct request *rq); extern unsigned int blk_rq_cur_bytes(struct request *rq); +static inline unsigned int blk_rq_sectors(const struct request *rq) +{ + return rq->hard_nr_sectors; +} + +static inline unsigned int blk_rq_cur_sectors(const struct request *rq) +{ + return rq->hard_cur_sectors; +} + /* * Request completion related functions. * diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 921ef5d1f0b..42f1c11e754 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -646,7 +646,7 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq, rq->cmd_len, rq->cmd); } else { what |= BLK_TC_ACT(BLK_TC_FS); - __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, + __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_sectors(rq) << 9, rw, what, rq->errors, 0, NULL); } } @@ -857,7 +857,7 @@ void blk_add_driver_data(struct request_queue *q, __blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA, rq->errors, len, data); else - __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, + __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_sectors(rq) << 9, 0, BLK_TA_DRV_DATA, rq->errors, len, data); } EXPORT_SYMBOL_GPL(blk_add_driver_data); -- cgit v1.2.3-70-g09d2 From 83096ebf1263b2c1ee5e653ba37d993d02e3eb7b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 7 May 2009 22:24:39 +0900 Subject: block: convert to pos and nr_sectors accessors With recent cleanups, there is no place where low level driver directly manipulates request fields. This means that the 'hard' request fields always equal the !hard fields. Convert all rq->sectors, nr_sectors and current_nr_sectors references to accessors. While at it, drop superflous blk_rq_pos() < 0 test in swim.c. [ Impact: use pos and nr_sectors accessors ] Signed-off-by: Tejun Heo Acked-by: Geert Uytterhoeven Tested-by: Grant Likely Acked-by: Grant Likely Tested-by: Adrian McMenamin Acked-by: Adrian McMenamin Acked-by: Mike Miller Cc: James Bottomley Cc: Bartlomiej Zolnierkiewicz Cc: Borislav Petkov Cc: Sergei Shtylyov Cc: Eric Moore Cc: Alan Stern Cc: FUJITA Tomonori Cc: Pete Zaitcev Cc: Stephen Rothwell Cc: Paul Clements Cc: Tim Waugh Cc: Jeff Garzik Cc: Jeremy Fitzhardinge Cc: Alex Dubov Cc: David Woodhouse Cc: Martin Schwidefsky Cc: Dario Ballabio Cc: David S. Miller Cc: Rusty Russell Cc: unsik Kim Cc: Laurent Vivier Signed-off-by: Jens Axboe --- arch/um/drivers/ubd_kern.c | 2 +- block/as-iosched.c | 18 +++++++------ block/blk-barrier.c | 2 +- block/blk-core.c | 17 ++++++------ block/blk-merge.c | 10 +++---- block/cfq-iosched.c | 18 ++++++------- block/deadline-iosched.c | 2 +- block/elevator.c | 22 +++++++-------- drivers/block/DAC960.c | 6 ++--- drivers/block/amiflop.c | 6 ++--- drivers/block/ataflop.c | 10 +++---- drivers/block/cciss.c | 22 +++++++-------- drivers/block/cpqarray.c | 9 ++++--- drivers/block/floppy.c | 53 +++++++++++++++++++------------------ drivers/block/hd.c | 14 +++++----- drivers/block/mg_disk.c | 25 ++++++++--------- drivers/block/nbd.c | 12 ++++----- drivers/block/paride/pcd.c | 4 +-- drivers/block/paride/pd.c | 8 +++--- drivers/block/paride/pf.c | 8 +++--- drivers/block/ps3disk.c | 9 +++---- drivers/block/sunvdc.c | 2 +- drivers/block/swim.c | 6 ++--- drivers/block/swim3.c | 34 +++++++++++++----------- drivers/block/sx8.c | 6 ++--- drivers/block/ub.c | 6 ++--- drivers/block/viodasd.c | 2 +- drivers/block/virtio_blk.c | 2 +- drivers/block/xd.c | 4 +-- drivers/block/xen-blkfront.c | 11 ++++---- drivers/block/xsysace.c | 17 ++++++------ drivers/block/z2ram.c | 6 ++--- drivers/cdrom/gdrom.c | 6 ++--- drivers/cdrom/viocd.c | 2 +- drivers/memstick/core/mspro_block.c | 6 ++--- drivers/message/i2o/i2o_block.c | 20 +++++++------- drivers/mmc/card/block.c | 10 +++---- drivers/mtd/mtd_blkdevs.c | 7 ++--- drivers/s390/block/dasd.c | 2 +- drivers/s390/block/dasd_diag.c | 5 ++-- drivers/s390/block/dasd_eckd.c | 6 ++--- drivers/s390/block/dasd_fba.c | 7 ++--- drivers/s390/char/tape_34xx.c | 2 +- drivers/s390/char/tape_3590.c | 2 +- drivers/s390/char/tape_block.c | 2 +- drivers/sbus/char/jsflash.c | 4 +-- drivers/scsi/eata.c | 24 ++++++++--------- drivers/scsi/lpfc/lpfc_scsi.c | 22 +++++++-------- drivers/scsi/scsi_lib.c | 6 ++--- drivers/scsi/sd.c | 24 ++++++++--------- drivers/scsi/sd_dif.c | 2 +- drivers/scsi/sr.c | 15 ++++++----- drivers/scsi/u14-34f.c | 22 ++++++++------- include/scsi/scsi_cmnd.h | 2 +- 54 files changed, 292 insertions(+), 279 deletions(-) (limited to 'include') diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 433012764a3..402ba8f70fc 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -1239,7 +1239,7 @@ static void do_ubd_request(struct request_queue *q) } req = dev->request; - sector = req->sector; + sector = blk_rq_pos(req); while(dev->start_sg < dev->end_sg){ struct scatterlist *sg = &dev->sg[dev->start_sg]; diff --git a/block/as-iosched.c b/block/as-iosched.c index 45bd07059c2..7a12cf6ee1d 100644 --- a/block/as-iosched.c +++ b/block/as-iosched.c @@ -306,8 +306,8 @@ as_choose_req(struct as_data *ad, struct request *rq1, struct request *rq2) data_dir = rq_is_sync(rq1); last = ad->last_sector[data_dir]; - s1 = rq1->sector; - s2 = rq2->sector; + s1 = blk_rq_pos(rq1); + s2 = blk_rq_pos(rq2); BUG_ON(data_dir != rq_is_sync(rq2)); @@ -566,13 +566,15 @@ static void as_update_iohist(struct as_data *ad, struct as_io_context *aic, as_update_thinktime(ad, aic, thinktime); /* Calculate read -> read seek distance */ - if (aic->last_request_pos < rq->sector) - seek_dist = rq->sector - aic->last_request_pos; + if (aic->last_request_pos < blk_rq_pos(rq)) + seek_dist = blk_rq_pos(rq) - + aic->last_request_pos; else - seek_dist = aic->last_request_pos - rq->sector; + seek_dist = aic->last_request_pos - + blk_rq_pos(rq); as_update_seekdist(ad, aic, seek_dist); } - aic->last_request_pos = rq->sector + rq->nr_sectors; + aic->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq); set_bit(AS_TASK_IOSTARTED, &aic->state); spin_unlock(&aic->lock); } @@ -587,7 +589,7 @@ static int as_close_req(struct as_data *ad, struct as_io_context *aic, { unsigned long delay; /* jiffies */ sector_t last = ad->last_sector[ad->batch_data_dir]; - sector_t next = rq->sector; + sector_t next = blk_rq_pos(rq); sector_t delta; /* acceptable close offset (in sectors) */ sector_t s; @@ -981,7 +983,7 @@ static void as_move_to_dispatch(struct as_data *ad, struct request *rq) * This has to be set in order to be correctly updated by * as_find_next_rq */ - ad->last_sector[data_dir] = rq->sector + rq->nr_sectors; + ad->last_sector[data_dir] = blk_rq_pos(rq) + blk_rq_sectors(rq); if (data_dir == BLK_RW_SYNC) { struct io_context *ioc = RQ_IOC(rq); diff --git a/block/blk-barrier.c b/block/blk-barrier.c index c167de5b9ef..8713c2fbc4f 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -324,7 +324,7 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) /* * The driver must store the error location in ->bi_sector, if * it supports it. For non-stacked drivers, this should be copied - * from rq->sector. + * from blk_rq_pos(rq). */ if (error_sector) *error_sector = bio->bi_sector; diff --git a/block/blk-core.c b/block/blk-core.c index 895e55b74a4..82dc20621c0 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -72,7 +72,7 @@ static void drive_stat_acct(struct request *rq, int new_io) return; cpu = part_stat_lock(); - part = disk_map_sector_rcu(rq->rq_disk, rq->sector); + part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq)); if (!new_io) part_stat_inc(cpu, part, merges[rw]); @@ -185,10 +185,9 @@ void blk_dump_rq_flags(struct request *rq, char *msg) rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type, rq->cmd_flags); - printk(KERN_INFO " sector %llu, nr/cnr %lu/%u\n", - (unsigned long long)rq->sector, - rq->nr_sectors, - rq->current_nr_sectors); + printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n", + (unsigned long long)blk_rq_pos(rq), + blk_rq_sectors(rq), blk_rq_cur_sectors(rq)); printk(KERN_INFO " bio %p, biotail %p, buffer %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data_len); @@ -1557,7 +1556,7 @@ EXPORT_SYMBOL(submit_bio); */ int blk_rq_check_limits(struct request_queue *q, struct request *rq) { - if (rq->nr_sectors > q->max_sectors || + if (blk_rq_sectors(rq) > q->max_sectors || rq->data_len > q->max_hw_sectors << 9) { printk(KERN_ERR "%s: over max size limit.\n", __func__); return -EIO; @@ -1645,7 +1644,7 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes) int cpu; cpu = part_stat_lock(); - part = disk_map_sector_rcu(req->rq_disk, req->sector); + part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req)); part_stat_add(cpu, part, sectors[rw], bytes >> 9); part_stat_unlock(); } @@ -1665,7 +1664,7 @@ static void blk_account_io_done(struct request *req) int cpu; cpu = part_stat_lock(); - part = disk_map_sector_rcu(req->rq_disk, req->sector); + part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req)); part_stat_inc(cpu, part, ios[rw]); part_stat_add(cpu, part, ticks[rw], duration); @@ -1846,7 +1845,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) if (error && (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))) { printk(KERN_ERR "end_request: I/O error, dev %s, sector %llu\n", req->rq_disk ? req->rq_disk->disk_name : "?", - (unsigned long long)req->sector); + (unsigned long long)blk_rq_pos(req)); } blk_account_io_completion(req, nr_bytes); diff --git a/block/blk-merge.c b/block/blk-merge.c index 23d2a6fe34a..bf62a87a9da 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -259,7 +259,7 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req, else max_sectors = q->max_sectors; - if (req->nr_sectors + bio_sectors(bio) > max_sectors) { + if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) { req->cmd_flags |= REQ_NOMERGE; if (req == q->last_merge) q->last_merge = NULL; @@ -284,7 +284,7 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req, max_sectors = q->max_sectors; - if (req->nr_sectors + bio_sectors(bio) > max_sectors) { + if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) { req->cmd_flags |= REQ_NOMERGE; if (req == q->last_merge) q->last_merge = NULL; @@ -315,7 +315,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, /* * Will it become too large? */ - if ((req->nr_sectors + next->nr_sectors) > q->max_sectors) + if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > q->max_sectors) return 0; total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; @@ -345,7 +345,7 @@ static void blk_account_io_merge(struct request *req) int cpu; cpu = part_stat_lock(); - part = disk_map_sector_rcu(req->rq_disk, req->sector); + part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req)); part_round_stats(cpu, part); part_dec_in_flight(part); @@ -366,7 +366,7 @@ static int attempt_merge(struct request_queue *q, struct request *req, /* * not contiguous */ - if (req->sector + req->nr_sectors != next->sector) + if (blk_rq_pos(req) + blk_rq_sectors(req) != blk_rq_pos(next)) return 0; if (rq_data_dir(req) != rq_data_dir(next) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 575083a9ffe..db4d990a66b 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -349,8 +349,8 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2) else if (rq_is_meta(rq2) && !rq_is_meta(rq1)) return rq2; - s1 = rq1->sector; - s2 = rq2->sector; + s1 = blk_rq_pos(rq1); + s2 = blk_rq_pos(rq2); last = cfqd->last_position; @@ -949,10 +949,10 @@ static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd, static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd, struct request *rq) { - if (rq->sector >= cfqd->last_position) - return rq->sector - cfqd->last_position; + if (blk_rq_pos(rq) >= cfqd->last_position) + return blk_rq_pos(rq) - cfqd->last_position; else - return cfqd->last_position - rq->sector; + return cfqd->last_position - blk_rq_pos(rq); } #define CIC_SEEK_THR 8 * 1024 @@ -1918,10 +1918,10 @@ cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_io_context *cic, if (!cic->last_request_pos) sdist = 0; - else if (cic->last_request_pos < rq->sector) - sdist = rq->sector - cic->last_request_pos; + else if (cic->last_request_pos < blk_rq_pos(rq)) + sdist = blk_rq_pos(rq) - cic->last_request_pos; else - sdist = cic->last_request_pos - rq->sector; + sdist = cic->last_request_pos - blk_rq_pos(rq); /* * Don't allow the seek distance to get too large from the @@ -2071,7 +2071,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, cfq_update_io_seektime(cfqd, cic, rq); cfq_update_idle_window(cfqd, cfqq, cic); - cic->last_request_pos = rq->sector + rq->nr_sectors; + cic->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq); if (cfqq == cfqd->active_queue) { /* diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index c4d991d4ade..b547cbca7b2 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c @@ -138,7 +138,7 @@ deadline_merge(struct request_queue *q, struct request **req, struct bio *bio) __rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector); if (__rq) { - BUG_ON(sector != __rq->sector); + BUG_ON(sector != blk_rq_pos(__rq)); if (elv_rq_merge_ok(__rq, bio)) { ret = ELEVATOR_FRONT_MERGE; diff --git a/block/elevator.c b/block/elevator.c index 1af5d9f04af..918920056e4 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -52,7 +52,7 @@ static const int elv_hash_shift = 6; #define ELV_HASH_FN(sec) \ (hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift)) #define ELV_HASH_ENTRIES (1 << elv_hash_shift) -#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) +#define rq_hash_key(rq) (blk_rq_pos(rq) + blk_rq_sectors(rq)) DEFINE_TRACE(block_rq_insert); DEFINE_TRACE(block_rq_issue); @@ -119,9 +119,9 @@ static inline int elv_try_merge(struct request *__rq, struct bio *bio) * we can merge and sequence is ok, check if it's possible */ if (elv_rq_merge_ok(__rq, bio)) { - if (__rq->sector + __rq->nr_sectors == bio->bi_sector) + if (blk_rq_pos(__rq) + blk_rq_sectors(__rq) == bio->bi_sector) ret = ELEVATOR_BACK_MERGE; - else if (__rq->sector - bio_sectors(bio) == bio->bi_sector) + else if (blk_rq_pos(__rq) - bio_sectors(bio) == bio->bi_sector) ret = ELEVATOR_FRONT_MERGE; } @@ -370,9 +370,9 @@ struct request *elv_rb_add(struct rb_root *root, struct request *rq) parent = *p; __rq = rb_entry(parent, struct request, rb_node); - if (rq->sector < __rq->sector) + if (blk_rq_pos(rq) < blk_rq_pos(__rq)) p = &(*p)->rb_left; - else if (rq->sector > __rq->sector) + else if (blk_rq_pos(rq) > blk_rq_pos(__rq)) p = &(*p)->rb_right; else return __rq; @@ -400,9 +400,9 @@ struct request *elv_rb_find(struct rb_root *root, sector_t sector) while (n) { rq = rb_entry(n, struct request, rb_node); - if (sector < rq->sector) + if (sector < blk_rq_pos(rq)) n = n->rb_left; - else if (sector > rq->sector) + else if (sector > blk_rq_pos(rq)) n = n->rb_right; else return rq; @@ -441,14 +441,14 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq) break; if (pos->cmd_flags & stop_flags) break; - if (rq->sector >= boundary) { - if (pos->sector < boundary) + if (blk_rq_pos(rq) >= boundary) { + if (blk_rq_pos(pos) < boundary) continue; } else { - if (pos->sector >= boundary) + if (blk_rq_pos(pos) >= boundary) break; } - if (rq->sector >= pos->sector) + if (blk_rq_pos(rq) >= blk_rq_pos(pos)) break; } diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index f22ed6cc69f..774ab05973a 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -3338,8 +3338,8 @@ static int DAC960_process_queue(DAC960_Controller_T *Controller, struct request_ } Command->Completion = Request->end_io_data; Command->LogicalDriveNumber = (long)Request->rq_disk->private_data; - Command->BlockNumber = Request->sector; - Command->BlockCount = Request->nr_sectors; + Command->BlockNumber = blk_rq_pos(Request); + Command->BlockCount = blk_rq_sectors(Request); Command->Request = Request; blkdev_dequeue_request(Request); Command->SegmentCount = blk_rq_map_sg(req_q, @@ -3431,7 +3431,7 @@ static void DAC960_queue_partial_rw(DAC960_Command_T *Command) * successfully as possible. */ Command->SegmentCount = 1; - Command->BlockNumber = Request->sector; + Command->BlockNumber = blk_rq_pos(Request); Command->BlockCount = 1; DAC960_QueueReadWriteCommand(Command); return; diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 8ff95f2c0ed..e4a14b94828 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -1351,13 +1351,13 @@ static void redo_fd_request(void) drive = floppy - unit; /* Here someone could investigate to be more efficient */ - for (cnt = 0; cnt < CURRENT->current_nr_sectors; cnt++) { + for (cnt = 0; cnt < blk_rq_cur_sectors(CURRENT); cnt++) { #ifdef DEBUG printk("fd: sector %ld + %d requested for %s\n", - CURRENT->sector,cnt, + blk_rq_pos(CURRENT), cnt, (rq_data_dir(CURRENT) == READ) ? "read" : "write"); #endif - block = CURRENT->sector + cnt; + block = blk_rq_pos(CURRENT) + cnt; if ((int)block > floppy->blocks) { __blk_end_request_cur(CURRENT, -EIO); goto repeat; diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 25067287211..234024cda5e 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -725,7 +725,7 @@ static void do_fd_action( int drive ) if (IS_BUFFERED( drive, ReqSide, ReqTrack )) { if (ReqCmd == READ) { copy_buffer( SECTOR_BUFFER(ReqSector), ReqData ); - if (++ReqCnt < CURRENT->current_nr_sectors) { + if (++ReqCnt < blk_rq_cur_sectors(CURRENT)) { /* read next sector */ setup_req_params( drive ); goto repeat; @@ -1130,7 +1130,7 @@ static void fd_rwsec_done1(int status) } } - if (++ReqCnt < CURRENT->current_nr_sectors) { + if (++ReqCnt < blk_rq_cur_sectors(CURRENT)) { /* read next sector */ setup_req_params( SelectedDrive ); do_fd_action( SelectedDrive ); @@ -1394,7 +1394,7 @@ static void redo_fd_request(void) DPRINT(("redo_fd_request: CURRENT=%p dev=%s CURRENT->sector=%ld\n", CURRENT, CURRENT ? CURRENT->rq_disk->disk_name : "", - CURRENT ? CURRENT->sector : 0 )); + CURRENT ? blk_rq_pos(CURRENT) : 0 )); IsFormatting = 0; @@ -1440,7 +1440,7 @@ repeat: UD.autoprobe = 0; } - if (CURRENT->sector + 1 > UDT->blocks) { + if (blk_rq_pos(CURRENT) + 1 > UDT->blocks) { __blk_end_request_cur(CURRENT, -EIO); goto repeat; } @@ -1450,7 +1450,7 @@ repeat: ReqCnt = 0; ReqCmd = rq_data_dir(CURRENT); - ReqBlock = CURRENT->sector; + ReqBlock = blk_rq_pos(CURRENT); ReqBuffer = CURRENT->buffer; setup_req_params( drive ); do_fd_action( drive ); diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index f22d4932433..ab7b04c0db7 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -2835,10 +2835,10 @@ static void do_cciss_request(struct request_queue *q) c->Request.Timeout = 0; // Don't time out c->Request.CDB[0] = (rq_data_dir(creq) == READ) ? h->cciss_read : h->cciss_write; - start_blk = creq->sector; + start_blk = blk_rq_pos(creq); #ifdef CCISS_DEBUG - printk(KERN_DEBUG "ciss: sector =%d nr_sectors=%d\n", (int)creq->sector, - (int)creq->nr_sectors); + printk(KERN_DEBUG "ciss: sector =%d nr_sectors=%d\n", + (int)blk_rq_pos(creq), (int)blk_rq_sectors(creq)); #endif /* CCISS_DEBUG */ sg_init_table(tmp_sg, MAXSGENTRIES); @@ -2864,8 +2864,8 @@ static void do_cciss_request(struct request_queue *q) h->maxSG = seg; #ifdef CCISS_DEBUG - printk(KERN_DEBUG "cciss: Submitting %lu sectors in %d segments\n", - creq->nr_sectors, seg); + printk(KERN_DEBUG "cciss: Submitting %u sectors in %d segments\n", + blk_rq_sectors(creq), seg); #endif /* CCISS_DEBUG */ c->Header.SGList = c->Header.SGTotal = seg; @@ -2877,8 +2877,8 @@ static void do_cciss_request(struct request_queue *q) c->Request.CDB[4] = (start_blk >> 8) & 0xff; c->Request.CDB[5] = start_blk & 0xff; c->Request.CDB[6] = 0; // (sect >> 24) & 0xff; MSB - c->Request.CDB[7] = (creq->nr_sectors >> 8) & 0xff; - c->Request.CDB[8] = creq->nr_sectors & 0xff; + c->Request.CDB[7] = (blk_rq_sectors(creq) >> 8) & 0xff; + c->Request.CDB[8] = blk_rq_sectors(creq) & 0xff; c->Request.CDB[9] = c->Request.CDB[11] = c->Request.CDB[12] = 0; } else { u32 upper32 = upper_32_bits(start_blk); @@ -2893,10 +2893,10 @@ static void do_cciss_request(struct request_queue *q) c->Request.CDB[7]= (start_blk >> 16) & 0xff; c->Request.CDB[8]= (start_blk >> 8) & 0xff; c->Request.CDB[9]= start_blk & 0xff; - c->Request.CDB[10]= (creq->nr_sectors >> 24) & 0xff; - c->Request.CDB[11]= (creq->nr_sectors >> 16) & 0xff; - c->Request.CDB[12]= (creq->nr_sectors >> 8) & 0xff; - c->Request.CDB[13]= creq->nr_sectors & 0xff; + c->Request.CDB[10]= (blk_rq_sectors(creq) >> 24) & 0xff; + c->Request.CDB[11]= (blk_rq_sectors(creq) >> 16) & 0xff; + c->Request.CDB[12]= (blk_rq_sectors(creq) >> 8) & 0xff; + c->Request.CDB[13]= blk_rq_sectors(creq) & 0xff; c->Request.CDB[14] = c->Request.CDB[15] = 0; } } else if (blk_pc_request(creq)) { diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index 488a8f4a60a..a5caeff4718 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -919,10 +919,11 @@ queue_next: c->hdr.size = sizeof(rblk_t) >> 2; c->size += sizeof(rblk_t); - c->req.hdr.blk = creq->sector; + c->req.hdr.blk = blk_rq_pos(creq); c->rq = creq; DBGPX( - printk("sector=%d, nr_sectors=%d\n", creq->sector, creq->nr_sectors); + printk("sector=%d, nr_sectors=%u\n", + blk_rq_pos(creq), blk_rq_sectors(creq)); ); sg_init_table(tmp_sg, SG_MAX); seg = blk_rq_map_sg(q, creq, tmp_sg); @@ -940,9 +941,9 @@ DBGPX( tmp_sg[i].offset, tmp_sg[i].length, dir); } -DBGPX( printk("Submitting %d sectors in %d segments\n", creq->nr_sectors, seg); ); +DBGPX( printk("Submitting %u sectors in %d segments\n", blk_rq_sectors(creq), seg); ); c->req.hdr.sg_cnt = seg; - c->req.hdr.blk_cnt = creq->nr_sectors; + c->req.hdr.blk_cnt = blk_rq_sectors(creq); c->req.hdr.cmd = (rq_data_dir(creq) == READ) ? IDA_READ : IDA_WRITE; c->type = CMD_RWREQ; diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 1300df6f164..45248628338 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -2303,7 +2303,7 @@ static void floppy_end_request(struct request *req, int error) /* current_count_sectors can be zero if transfer failed */ if (error) - nr_sectors = req->current_nr_sectors; + nr_sectors = blk_rq_cur_sectors(req); if (__blk_end_request(req, error, nr_sectors << 9)) return; @@ -2332,7 +2332,7 @@ static void request_done(int uptodate) if (uptodate) { /* maintain values for invalidation on geometry * change */ - block = current_count_sectors + req->sector; + block = current_count_sectors + blk_rq_pos(req); INFBOUND(DRS->maxblock, block); if (block > _floppy->sect) DRS->maxtrack = 1; @@ -2346,10 +2346,10 @@ static void request_done(int uptodate) /* record write error information */ DRWE->write_errors++; if (DRWE->write_errors == 1) { - DRWE->first_error_sector = req->sector; + DRWE->first_error_sector = blk_rq_pos(req); DRWE->first_error_generation = DRS->generation; } - DRWE->last_error_sector = req->sector; + DRWE->last_error_sector = blk_rq_pos(req); DRWE->last_error_generation = DRS->generation; } spin_lock_irqsave(q->queue_lock, flags); @@ -2503,24 +2503,24 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2) max_sector = transfer_size(ssize, min(max_sector, max_sector_2), - current_req->nr_sectors); + blk_rq_sectors(current_req)); if (current_count_sectors <= 0 && CT(COMMAND) == FD_WRITE && - buffer_max > fsector_t + current_req->nr_sectors) + buffer_max > fsector_t + blk_rq_sectors(current_req)) current_count_sectors = min_t(int, buffer_max - fsector_t, - current_req->nr_sectors); + blk_rq_sectors(current_req)); remaining = current_count_sectors << 9; #ifdef FLOPPY_SANITY_CHECK - if ((remaining >> 9) > current_req->nr_sectors && + if ((remaining >> 9) > blk_rq_sectors(current_req) && CT(COMMAND) == FD_WRITE) { DPRINT("in copy buffer\n"); printk("current_count_sectors=%ld\n", current_count_sectors); printk("remaining=%d\n", remaining >> 9); - printk("current_req->nr_sectors=%ld\n", - current_req->nr_sectors); + printk("current_req->nr_sectors=%u\n", + blk_rq_sectors(current_req)); printk("current_req->current_nr_sectors=%u\n", - current_req->current_nr_sectors); + blk_rq_cur_sectors(current_req)); printk("max_sector=%d\n", max_sector); printk("ssize=%d\n", ssize); } @@ -2530,7 +2530,7 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2) dma_buffer = floppy_track_buffer + ((fsector_t - buffer_min) << 9); - size = current_req->current_nr_sectors << 9; + size = blk_rq_cur_sectors(current_req) << 9; rq_for_each_segment(bv, current_req, iter) { if (!remaining) @@ -2648,10 +2648,10 @@ static int make_raw_rw_request(void) max_sector = _floppy->sect * _floppy->head; - TRACK = (int)current_req->sector / max_sector; - fsector_t = (int)current_req->sector % max_sector; + TRACK = (int)blk_rq_pos(current_req) / max_sector; + fsector_t = (int)blk_rq_pos(current_req) % max_sector; if (_floppy->track && TRACK >= _floppy->track) { - if (current_req->current_nr_sectors & 1) { + if (blk_rq_cur_sectors(current_req) & 1) { current_count_sectors = 1; return 1; } else @@ -2669,7 +2669,7 @@ static int make_raw_rw_request(void) if (fsector_t >= max_sector) { current_count_sectors = min_t(int, _floppy->sect - fsector_t, - current_req->nr_sectors); + blk_rq_sectors(current_req)); return 1; } SIZECODE = 2; @@ -2720,7 +2720,7 @@ static int make_raw_rw_request(void) in_sector_offset = (fsector_t % _floppy->sect) % ssize; aligned_sector_t = fsector_t - in_sector_offset; - max_size = current_req->nr_sectors; + max_size = blk_rq_sectors(current_req); if ((raw_cmd->track == buffer_track) && (current_drive == buffer_drive) && (fsector_t >= buffer_min) && (fsector_t < buffer_max)) { @@ -2729,10 +2729,10 @@ static int make_raw_rw_request(void) copy_buffer(1, max_sector, buffer_max); return 1; } - } else if (in_sector_offset || current_req->nr_sectors < ssize) { + } else if (in_sector_offset || blk_rq_sectors(current_req) < ssize) { if (CT(COMMAND) == FD_WRITE) { - if (fsector_t + current_req->nr_sectors > ssize && - fsector_t + current_req->nr_sectors < ssize + ssize) + if (fsector_t + blk_rq_sectors(current_req) > ssize && + fsector_t + blk_rq_sectors(current_req) < ssize + ssize) max_size = ssize + ssize; else max_size = ssize; @@ -2776,7 +2776,7 @@ static int make_raw_rw_request(void) (indirect * 2 > direct * 3 && *errors < DP->max_errors.read_track && ((!probing || (DP->read_track & (1 << DRS->probed_format)))))) { - max_size = current_req->nr_sectors; + max_size = blk_rq_sectors(current_req); } else { raw_cmd->kernel_data = current_req->buffer; raw_cmd->length = current_count_sectors << 9; @@ -2801,7 +2801,7 @@ static int make_raw_rw_request(void) fsector_t > buffer_max || fsector_t < buffer_min || ((CT(COMMAND) == FD_READ || - (!in_sector_offset && current_req->nr_sectors >= ssize)) && + (!in_sector_offset && blk_rq_sectors(current_req) >= ssize)) && max_sector > 2 * max_buffer_sectors + buffer_min && max_size + fsector_t > 2 * max_buffer_sectors + buffer_min) /* not enough space */ @@ -2879,8 +2879,8 @@ static int make_raw_rw_request(void) printk("write\n"); return 0; } - } else if (raw_cmd->length > current_req->nr_sectors << 9 || - current_count_sectors > current_req->nr_sectors) { + } else if (raw_cmd->length > blk_rq_sectors(current_req) << 9 || + current_count_sectors > blk_rq_sectors(current_req)) { DPRINT("buffer overrun in direct transfer\n"); return 0; } else if (raw_cmd->length < current_count_sectors << 9) { @@ -2990,8 +2990,9 @@ static void do_fd_request(struct request_queue * q) if (usage_count == 0) { printk("warning: usage count=0, current_req=%p exiting\n", current_req); - printk("sect=%ld type=%x flags=%x\n", (long)current_req->sector, - current_req->cmd_type, current_req->cmd_flags); + printk("sect=%ld type=%x flags=%x\n", + (long)blk_rq_pos(current_req), current_req->cmd_type, + current_req->cmd_flags); return; } if (test_bit(0, &fdc_busy)) { diff --git a/drivers/block/hd.c b/drivers/block/hd.c index 75b9ca95c4e..a3b39940ce0 100644 --- a/drivers/block/hd.c +++ b/drivers/block/hd.c @@ -228,7 +228,7 @@ static void dump_status(const char *msg, unsigned int stat) printk(", CHS=%d/%d/%d", (inb(HD_HCYL)<<8) + inb(HD_LCYL), inb(HD_CURRENT) & 0xf, inb(HD_SECTOR)); if (CURRENT) - printk(", sector=%ld", CURRENT->sector); + printk(", sector=%ld", blk_rq_pos(CURRENT)); } printk("\n"); } @@ -457,9 +457,9 @@ ok_to_read: req = CURRENT; insw(HD_DATA, req->buffer, 256); #ifdef DEBUG - printk("%s: read: sector %ld, remaining = %ld, buffer=%p\n", - req->rq_disk->disk_name, req->sector + 1, req->nr_sectors - 1, - req->buffer+512); + printk("%s: read: sector %ld, remaining = %u, buffer=%p\n", + req->rq_disk->disk_name, blk_rq_pos(req) + 1, + blk_rq_sectors(req) - 1, req->buffer+512); #endif if (__blk_end_request(req, 0, 512)) { SET_HANDLER(&read_intr); @@ -485,7 +485,7 @@ static void write_intr(void) continue; if (!OK_STATUS(i)) break; - if ((req->nr_sectors <= 1) || (i & DRQ_STAT)) + if ((blk_rq_sectors(req) <= 1) || (i & DRQ_STAT)) goto ok_to_write; } while (--retries > 0); dump_status("write_intr", i); @@ -589,8 +589,8 @@ repeat: return; } disk = req->rq_disk->private_data; - block = req->sector; - nsect = req->nr_sectors; + block = blk_rq_pos(req); + nsect = blk_rq_sectors(req); if (block >= get_capacity(req->rq_disk) || ((block+nsect) > get_capacity(req->rq_disk))) { printk("%s: bad access: block=%d, count=%d\n", diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c index 71e56cc28ca..826c3492b9f 100644 --- a/drivers/block/mg_disk.c +++ b/drivers/block/mg_disk.c @@ -220,7 +220,8 @@ static void mg_dump_status(const char *msg, unsigned int stat, if (host->breq) { req = elv_next_request(host->breq); if (req) - printk(", sector=%u", (u32)req->sector); + printk(", sector=%u", + (unsigned int)blk_rq_pos(req)); } } @@ -493,12 +494,12 @@ static void mg_read(struct request *req) u32 j; struct mg_host *host = req->rq_disk->private_data; - if (mg_out(host, req->sector, req->nr_sectors, MG_CMD_RD, NULL) != - MG_ERR_NONE) + if (mg_out(host, blk_rq_pos(req), blk_rq_sectors(req), + MG_CMD_RD, NULL) != MG_ERR_NONE) mg_bad_rw_intr(host); MG_DBG("requested %d sects (from %ld), buffer=0x%p\n", - req->nr_sectors, req->sector, req->buffer); + blk_rq_sectors(req), blk_rq_pos(req), req->buffer); do { u16 *buff = (u16 *)req->buffer; @@ -522,14 +523,14 @@ static void mg_write(struct request *req) u32 j; struct mg_host *host = req->rq_disk->private_data; - if (mg_out(host, req->sector, req->nr_sectors, MG_CMD_WR, NULL) != - MG_ERR_NONE) { + if (mg_out(host, blk_rq_pos(req), blk_rq_sectors(req), + MG_CMD_WR, NULL) != MG_ERR_NONE) { mg_bad_rw_intr(host); return; } MG_DBG("requested %d sects (from %ld), buffer=0x%p\n", - req->nr_sectors, req->sector, req->buffer); + blk_rq_sectors(req), blk_rq_pos(req), req->buffer); do { u16 *buff = (u16 *)req->buffer; @@ -579,7 +580,7 @@ ok_to_read: (i << 1)); MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n", - req->sector, req->nr_sectors - 1, req->buffer); + blk_rq_pos(req), blk_rq_sectors(req) - 1, req->buffer); /* send read confirm */ outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND); @@ -609,7 +610,7 @@ static void mg_write_intr(struct mg_host *host) break; if (!MG_READY_OK(i)) break; - if ((req->nr_sectors <= 1) || (i & ATA_DRQ)) + if ((blk_rq_sectors(req) <= 1) || (i & ATA_DRQ)) goto ok_to_write; } while (0); mg_dump_status("mg_write_intr", i, host); @@ -627,7 +628,7 @@ ok_to_write: buff++; } MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n", - req->sector, req->nr_sectors, req->buffer); + blk_rq_pos(req), blk_rq_sectors(req), req->buffer); host->mg_do_intr = mg_write_intr; mod_timer(&host->timer, jiffies + 3 * HZ); } @@ -749,9 +750,9 @@ static void mg_request(struct request_queue *q) del_timer(&host->timer); - sect_num = req->sector; + sect_num = blk_rq_pos(req); /* deal whole segments */ - sect_cnt = req->nr_sectors; + sect_cnt = blk_rq_sectors(req); /* sanity check */ if (sect_num >= get_capacity(req->rq_disk) || diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index a9ab8be9d92..977a5737793 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -110,7 +110,7 @@ static void nbd_end_request(struct request *req) req, error ? "failed" : "done"); spin_lock_irqsave(q->queue_lock, flags); - __blk_end_request(req, error, req->nr_sectors << 9); + __blk_end_request(req, error, blk_rq_sectors(req) << 9); spin_unlock_irqrestore(q->queue_lock, flags); } @@ -231,19 +231,19 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req) { int result, flags; struct nbd_request request; - unsigned long size = req->nr_sectors << 9; + unsigned long size = blk_rq_sectors(req) << 9; request.magic = htonl(NBD_REQUEST_MAGIC); request.type = htonl(nbd_cmd(req)); - request.from = cpu_to_be64((u64) req->sector << 9); + request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); request.len = htonl(size); memcpy(request.handle, &req, sizeof(req)); - dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%luB)\n", + dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%uB)\n", lo->disk->disk_name, req, nbdcmd_to_ascii(nbd_cmd(req)), - (unsigned long long)req->sector << 9, - req->nr_sectors << 9); + (unsigned long long)blk_rq_pos(req) << 9, + blk_rq_sectors(req) << 9); result = sock_xmit(lo, 1, &request, sizeof(request), (nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0); if (result <= 0) { diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index 9fd57c2aa46..2d5dc0af55e 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -728,8 +728,8 @@ static void do_pcd_request(struct request_queue * q) if (cd != pcd_current) pcd_bufblk = -1; pcd_current = cd; - pcd_sector = pcd_req->sector; - pcd_count = pcd_req->current_nr_sectors; + pcd_sector = blk_rq_pos(pcd_req); + pcd_count = blk_rq_cur_sectors(pcd_req); pcd_buf = pcd_req->buffer; pcd_busy = 1; ps_set_intr(do_pcd_read, NULL, 0, nice); diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 0732df4e901..9ec5d4ac0b6 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -444,11 +444,11 @@ static enum action do_pd_io_start(void) pd_cmd = rq_data_dir(pd_req); if (pd_cmd == READ || pd_cmd == WRITE) { - pd_block = pd_req->sector; - pd_count = pd_req->current_nr_sectors; + pd_block = blk_rq_pos(pd_req); + pd_count = blk_rq_cur_sectors(pd_req); if (pd_block + pd_count > get_capacity(pd_req->rq_disk)) return Fail; - pd_run = pd_req->nr_sectors; + pd_run = blk_rq_sectors(pd_req); pd_buf = pd_req->buffer; pd_retries = 0; if (pd_cmd == READ) @@ -479,7 +479,7 @@ static int pd_next_buf(void) return 0; spin_lock_irqsave(&pd_lock, saved_flags); __blk_end_request_cur(pd_req, 0); - pd_count = pd_req->current_nr_sectors; + pd_count = blk_rq_cur_sectors(pd_req); pd_buf = pd_req->buffer; spin_unlock_irqrestore(&pd_lock, saved_flags); return 0; diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index 3871e3586d6..e88c889aa7f 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c @@ -768,9 +768,9 @@ repeat: return; pf_current = pf_req->rq_disk->private_data; - pf_block = pf_req->sector; - pf_run = pf_req->nr_sectors; - pf_count = pf_req->current_nr_sectors; + pf_block = blk_rq_pos(pf_req); + pf_run = blk_rq_sectors(pf_req); + pf_count = blk_rq_cur_sectors(pf_req); if (pf_block + pf_count > get_capacity(pf_req->rq_disk)) { pf_end_request(-EIO); @@ -810,7 +810,7 @@ static int pf_next_buf(void) spin_unlock_irqrestore(&pf_spin_lock, saved_flags); if (!pf_req) return 1; - pf_count = pf_req->current_nr_sectors; + pf_count = blk_rq_cur_sectors(pf_req); pf_buf = pf_req->buffer; } return 0; diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index c2388673684..8d583081b50 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -134,13 +134,12 @@ static int ps3disk_submit_request_sg(struct ps3_storage_device *dev, rq_for_each_segment(bv, req, iter) n++; dev_dbg(&dev->sbd.core, - "%s:%u: %s req has %u bvecs for %lu sectors %lu hard sectors\n", - __func__, __LINE__, op, n, req->nr_sectors, - blk_rq_sectors(req)); + "%s:%u: %s req has %u bvecs for %u sectors\n", + __func__, __LINE__, op, n, blk_rq_sectors(req)); #endif - start_sector = req->sector * priv->blocking_factor; - sectors = req->nr_sectors * priv->blocking_factor; + start_sector = blk_rq_pos(req) * priv->blocking_factor; + sectors = blk_rq_sectors(req) * priv->blocking_factor; dev_dbg(&dev->sbd.core, "%s:%u: %s %llu sectors starting at %llu\n", __func__, __LINE__, op, sectors, start_sector); diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index f59887c5ffb..9f351bfa15e 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -416,7 +416,7 @@ static int __send_request(struct request *req) desc->slice = 0; } desc->status = ~0; - desc->offset = (req->sector << 9) / port->vdisk_block_size; + desc->offset = (blk_rq_pos(req) << 9) / port->vdisk_block_size; desc->size = len; desc->ncookies = err; diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 97ef4266c4c..fc6a1c32293 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -531,7 +531,7 @@ static void redo_fd_request(struct request_queue *q) while ((req = elv_next_request(q))) { fs = req->rq_disk->private_data; - if (req->sector < 0 || req->sector >= fs->total_secs) { + if (blk_rq_pos(req) >= fs->total_secs) { __blk_end_request_cur(req, -EIO); continue; } @@ -551,8 +551,8 @@ static void redo_fd_request(struct request_queue *q) __blk_end_request_cur(req, -EIO); break; case READ: - if (floppy_read_sectors(fs, req->sector, - req->current_nr_sectors, + if (floppy_read_sectors(fs, blk_rq_pos(req), + blk_rq_cur_sectors(req), req->buffer)) { __blk_end_request_cur(req, -EIO); continue; diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 424855945b9..c1b9a4dc11b 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -312,14 +312,14 @@ static void start_request(struct floppy_state *fs) } while (fs->state == idle && (req = elv_next_request(swim3_queue))) { #if 0 - printk("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%ld buf=%p\n", + printk("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%u buf=%p\n", req->rq_disk->disk_name, req->cmd, - (long)req->sector, req->nr_sectors, req->buffer); - printk(" errors=%d current_nr_sectors=%ld\n", - req->errors, req->current_nr_sectors); + (long)blk_rq_pos(req), blk_rq_sectors(req), req->buffer); + printk(" errors=%d current_nr_sectors=%u\n", + req->errors, blk_rq_cur_sectors(req)); #endif - if (req->sector >= fs->total_secs) { + if (blk_rq_pos(req) >= fs->total_secs) { __blk_end_request_cur(req, -EIO); continue; } @@ -337,13 +337,14 @@ static void start_request(struct floppy_state *fs) } } - /* Do not remove the cast. req->sector is now a sector_t and - * can be 64 bits, but it will never go past 32 bits for this - * driver anyway, so we can safely cast it down and not have - * to do a 64/32 division + /* Do not remove the cast. blk_rq_pos(req) is now a + * sector_t and can be 64 bits, but it will never go + * past 32 bits for this driver anyway, so we can + * safely cast it down and not have to do a 64/32 + * division */ - fs->req_cyl = ((long)req->sector) / fs->secpercyl; - x = ((long)req->sector) % fs->secpercyl; + fs->req_cyl = ((long)blk_rq_pos(req)) / fs->secpercyl; + x = ((long)blk_rq_pos(req)) % fs->secpercyl; fs->head = x / fs->secpertrack; fs->req_sector = x % fs->secpertrack + 1; fd_req = req; @@ -420,7 +421,7 @@ static inline void setup_transfer(struct floppy_state *fs) struct dbdma_cmd *cp = fs->dma_cmd; struct dbdma_regs __iomem *dr = fs->dma; - if (fd_req->current_nr_sectors <= 0) { + if (blk_rq_cur_sectors(fd_req) <= 0) { printk(KERN_ERR "swim3: transfer 0 sectors?\n"); return; } @@ -428,8 +429,8 @@ static inline void setup_transfer(struct floppy_state *fs) n = 1; else { n = fs->secpertrack - fs->req_sector + 1; - if (n > fd_req->current_nr_sectors) - n = fd_req->current_nr_sectors; + if (n > blk_rq_cur_sectors(fd_req)) + n = blk_rq_cur_sectors(fd_req); } fs->scount = n; swim3_select(fs, fs->head? READ_DATA_1: READ_DATA_0); @@ -600,7 +601,8 @@ static void xfer_timeout(unsigned long data) out_8(&sw->control_bic, WRITE_SECTORS | DO_ACTION); out_8(&sw->select, RELAX); printk(KERN_ERR "swim3: timeout %sing sector %ld\n", - (rq_data_dir(fd_req)==WRITE? "writ": "read"), (long)fd_req->sector); + (rq_data_dir(fd_req)==WRITE? "writ": "read"), + (long)blk_rq_pos(fd_req)); __blk_end_request_cur(fd_req, -EIO); fs->state = idle; start_request(fs); @@ -714,7 +716,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id) } else { printk("swim3: error %sing block %ld (err=%x)\n", rq_data_dir(fd_req) == WRITE? "writ": "read", - (long)fd_req->sector, err); + (long)blk_rq_pos(fd_req), err); __blk_end_request_cur(fd_req, -EIO); fs->state = idle; } diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c index 60e85bb6f79..087c94c8b2d 100644 --- a/drivers/block/sx8.c +++ b/drivers/block/sx8.c @@ -903,10 +903,10 @@ queue_one_request: msg->sg_count = n_elem; msg->sg_type = SGT_32BIT; msg->handle = cpu_to_le32(TAG_ENCODE(crq->tag)); - msg->lba = cpu_to_le32(rq->sector & 0xffffffff); - tmp = (rq->sector >> 16) >> 16; + msg->lba = cpu_to_le32(blk_rq_pos(rq) & 0xffffffff); + tmp = (blk_rq_pos(rq) >> 16) >> 16; msg->lba_high = cpu_to_le16( (u16) tmp ); - msg->lba_count = cpu_to_le16(rq->nr_sectors); + msg->lba_count = cpu_to_le16(blk_rq_sectors(rq)); msg_size = sizeof(struct carm_msg_rw) - sizeof(msg->sg); for (i = 0; i < n_elem; i++) { diff --git a/drivers/block/ub.c b/drivers/block/ub.c index 8c2cc71327e..dc3b899ad2b 100644 --- a/drivers/block/ub.c +++ b/drivers/block/ub.c @@ -726,8 +726,8 @@ static void ub_cmd_build_block(struct ub_dev *sc, struct ub_lun *lun, * The call to blk_queue_hardsect_size() guarantees that request * is aligned, but it is given in terms of 512 byte units, always. */ - block = rq->sector >> lun->capacity.bshift; - nblks = rq->nr_sectors >> lun->capacity.bshift; + block = blk_rq_pos(rq) >> lun->capacity.bshift; + nblks = blk_rq_sectors(rq) >> lun->capacity.bshift; cmd->cdb[0] = (cmd->dir == UB_DIR_READ)? READ_10: WRITE_10; /* 10-byte uses 4 bytes of LBA: 2147483648KB, 2097152MB, 2048GB */ @@ -739,7 +739,7 @@ static void ub_cmd_build_block(struct ub_dev *sc, struct ub_lun *lun, cmd->cdb[8] = nblks; cmd->cdb_len = 10; - cmd->len = rq->nr_sectors * 512; + cmd->len = blk_rq_sectors(rq) * 512; } static void ub_cmd_build_packet(struct ub_dev *sc, struct ub_lun *lun, diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c index e821eed7132..2086cb12d3e 100644 --- a/drivers/block/viodasd.c +++ b/drivers/block/viodasd.c @@ -252,7 +252,7 @@ static int send_request(struct request *req) struct viodasd_device *d; unsigned long flags; - start = (u64)req->sector << 9; + start = (u64)blk_rq_pos(req) << 9; if (rq_data_dir(req) == READ) { direction = DMA_FROM_DEVICE; diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 50745e64414..1980ab45635 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -85,7 +85,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, vbr->req = req; if (blk_fs_request(vbr->req)) { vbr->out_hdr.type = 0; - vbr->out_hdr.sector = vbr->req->sector; + vbr->out_hdr.sector = blk_rq_pos(vbr->req); vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); } else if (blk_pc_request(vbr->req)) { vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD; diff --git a/drivers/block/xd.c b/drivers/block/xd.c index 14be4c1ed1a..4ef88018bcd 100644 --- a/drivers/block/xd.c +++ b/drivers/block/xd.c @@ -306,8 +306,8 @@ static void do_xd_request (struct request_queue * q) return; while ((req = elv_next_request(q)) != NULL) { - unsigned block = req->sector; - unsigned count = req->nr_sectors; + unsigned block = blk_rq_pos(req); + unsigned count = blk_rq_sectors(req); XD_INFO *disk = req->rq_disk->private_data; int res = 0; int retry; diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index b4564479f64..91fc56597e9 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -231,7 +231,7 @@ static int blkif_queue_request(struct request *req) info->shadow[id].request = (unsigned long)req; ring_req->id = id; - ring_req->sector_number = (blkif_sector_t)req->sector; + ring_req->sector_number = (blkif_sector_t)blk_rq_pos(req); ring_req->handle = info->handle; ring_req->operation = rq_data_dir(req) ? @@ -310,11 +310,10 @@ static void do_blkif_request(struct request_queue *rq) goto wait; pr_debug("do_blk_req %p: cmd %p, sec %lx, " - "(%u/%li) buffer:%p [%s]\n", - req, req->cmd, (unsigned long)req->sector, - req->current_nr_sectors, - req->nr_sectors, req->buffer, - rq_data_dir(req) ? "write" : "read"); + "(%u/%u) buffer:%p [%s]\n", + req, req->cmd, (unsigned long)blk_rq_pos(req), + blk_rq_cur_sectors(req), blk_rq_sectors(req), + req->buffer, rq_data_dir(req) ? "write" : "read"); blkdev_dequeue_request(req); diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c index 5722931d14c..97c99b43f88 100644 --- a/drivers/block/xsysace.c +++ b/drivers/block/xsysace.c @@ -646,13 +646,14 @@ static void ace_fsm_dostate(struct ace_device *ace) /* Okay, it's a data request, set it up for transfer */ dev_dbg(ace->dev, "request: sec=%llx hcnt=%x, ccnt=%x, dir=%i\n", - (unsigned long long) req->sector, blk_rq_sectors(req), - req->current_nr_sectors, rq_data_dir(req)); + (unsigned long long)blk_rq_pos(req), + blk_rq_sectors(req), blk_rq_cur_sectors(req), + rq_data_dir(req)); ace->req = req; ace->data_ptr = req->buffer; - ace->data_count = req->current_nr_sectors * ACE_BUF_PER_SECTOR; - ace_out32(ace, ACE_MPULBA, req->sector & 0x0FFFFFFF); + ace->data_count = blk_rq_cur_sectors(req) * ACE_BUF_PER_SECTOR; + ace_out32(ace, ACE_MPULBA, blk_rq_pos(req) & 0x0FFFFFFF); count = blk_rq_sectors(req); if (rq_data_dir(req)) { @@ -688,7 +689,7 @@ static void ace_fsm_dostate(struct ace_device *ace) dev_dbg(ace->dev, "CFBSY set; t=%i iter=%i c=%i dc=%i irq=%i\n", ace->fsm_task, ace->fsm_iter_num, - ace->req->current_nr_sectors * 16, + blk_rq_cur_sectors(ace->req) * 16, ace->data_count, ace->in_irq); ace_fsm_yield(ace); /* need to poll CFBSY bit */ break; @@ -697,7 +698,7 @@ static void ace_fsm_dostate(struct ace_device *ace) dev_dbg(ace->dev, "DATABUF not set; t=%i iter=%i c=%i dc=%i irq=%i\n", ace->fsm_task, ace->fsm_iter_num, - ace->req->current_nr_sectors * 16, + blk_rq_cur_sectors(ace->req) * 16, ace->data_count, ace->in_irq); ace_fsm_yieldirq(ace); break; @@ -721,10 +722,10 @@ static void ace_fsm_dostate(struct ace_device *ace) blk_rq_cur_bytes(ace->req))) { /* dev_dbg(ace->dev, "next block; h=%u c=%u\n", * blk_rq_sectors(ace->req), - * ace->req->current_nr_sectors); + * blk_rq_cur_sectors(ace->req)); */ ace->data_ptr = ace->req->buffer; - ace->data_count = ace->req->current_nr_sectors * 16; + ace->data_count = blk_rq_cur_sectors(ace->req) * 16; ace_fsm_yieldirq(ace); break; } diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c index b66ad58a3c3..d4e6b71f514 100644 --- a/drivers/block/z2ram.c +++ b/drivers/block/z2ram.c @@ -71,12 +71,12 @@ static void do_z2_request(struct request_queue *q) { struct request *req; while ((req = elv_next_request(q)) != NULL) { - unsigned long start = req->sector << 9; - unsigned long len = req->current_nr_sectors << 9; + unsigned long start = blk_rq_pos(req) << 9; + unsigned long len = blk_rq_cur_sectors(req) << 9; if (start + len > z2ram_size) { printk( KERN_ERR DEVICE_NAME ": bad access: block=%lu, count=%u\n", - req->sector, req->current_nr_sectors); + blk_rq_pos(req), blk_rq_cur_sectors(req)); __blk_end_request_cur(req, -EIO); continue; } diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index cab2b1fb2fe..488423cab51 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -584,8 +584,8 @@ static void gdrom_readdisk_dma(struct work_struct *work) list_for_each_safe(elem, next, &gdrom_deferred) { req = list_entry(elem, struct request, queuelist); spin_unlock(&gdrom_lock); - block = req->sector/GD_TO_BLK + GD_SESSION_OFFSET; - block_cnt = req->nr_sectors/GD_TO_BLK; + block = blk_rq_pos(req)/GD_TO_BLK + GD_SESSION_OFFSET; + block_cnt = blk_rq_sectors(req)/GD_TO_BLK; ctrl_outl(PHYSADDR(req->buffer), GDROM_DMA_STARTADDR_REG); ctrl_outl(block_cnt * GDROM_HARD_SECTOR, GDROM_DMA_LENGTH_REG); ctrl_outl(1, GDROM_DMA_DIRECTION_REG); @@ -661,7 +661,7 @@ static void gdrom_request(struct request_queue *rq) printk(" write request ignored\n"); __blk_end_request_cur(req, -EIO); } - if (req->nr_sectors) + if (blk_rq_sectors(req)) gdrom_request_handler_dma(req); else __blk_end_request_cur(req, -EIO); diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c index cc3efa096e1..6e190a93d8d 100644 --- a/drivers/cdrom/viocd.c +++ b/drivers/cdrom/viocd.c @@ -282,7 +282,7 @@ static int send_request(struct request *req) viopath_targetinst(viopath_hostLp), (u64)req, VIOVERSION << 16, ((u64)DEVICE_NR(diskinfo) << 48) | dmaaddr, - (u64)req->sector * 512, len, 0); + (u64)blk_rq_pos(req) * 512, len, 0); if (hvrc != HvLpEvent_Rc_Good) { printk(VIOCD_KERN_WARNING "hv error on op %d\n", (int)hvrc); return -1; diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index a41634699f8..9e600d22f40 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -677,10 +677,10 @@ try_again: continue; } - t_sec = msb->block_req->sector << 9; + t_sec = blk_rq_pos(msb->block_req) << 9; sector_div(t_sec, msb->page_size); - count = msb->block_req->nr_sectors << 9; + count = blk_rq_sectors(msb->block_req) << 9; count /= msb->page_size; param.system = msb->system; @@ -745,7 +745,7 @@ static int mspro_block_complete_req(struct memstick_dev *card, int error) t_len *= msb->page_size; } } else - t_len = msb->block_req->nr_sectors << 9; + t_len = blk_rq_sectors(msb->block_req) << 9; dev_dbg(&card->dev, "transferred %x (%d)\n", t_len, error); diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c index 56e60f0d531..510eb472637 100644 --- a/drivers/message/i2o/i2o_block.c +++ b/drivers/message/i2o/i2o_block.c @@ -761,7 +761,7 @@ static int i2o_block_transfer(struct request *req) break; case CACHE_SMARTFETCH: - if (req->nr_sectors > 16) + if (blk_rq_sectors(req) > 16) ctl_flags = 0x201F0008; else ctl_flags = 0x001F0000; @@ -781,13 +781,13 @@ static int i2o_block_transfer(struct request *req) ctl_flags = 0x001F0010; break; case CACHE_SMARTBACK: - if (req->nr_sectors > 16) + if (blk_rq_sectors(req) > 16) ctl_flags = 0x001F0004; else ctl_flags = 0x001F0010; break; case CACHE_SMARTTHROUGH: - if (req->nr_sectors > 16) + if (blk_rq_sectors(req) > 16) ctl_flags = 0x001F0004; else ctl_flags = 0x001F0010; @@ -827,22 +827,24 @@ static int i2o_block_transfer(struct request *req) *mptr++ = cpu_to_le32(scsi_flags); - *((u32 *) & cmd[2]) = cpu_to_be32(req->sector * hwsec); - *((u16 *) & cmd[7]) = cpu_to_be16(req->nr_sectors * hwsec); + *((u32 *) & cmd[2]) = cpu_to_be32(blk_rq_pos(req) * hwsec); + *((u16 *) & cmd[7]) = cpu_to_be16(blk_rq_sectors(req) * hwsec); memcpy(mptr, cmd, 10); mptr += 4; - *mptr++ = cpu_to_le32(req->nr_sectors << KERNEL_SECTOR_SHIFT); + *mptr++ = + cpu_to_le32(blk_rq_sectors(req) << KERNEL_SECTOR_SHIFT); } else #endif { msg->u.head[1] = cpu_to_le32(cmd | HOST_TID << 12 | tid); *mptr++ = cpu_to_le32(ctl_flags); - *mptr++ = cpu_to_le32(req->nr_sectors << KERNEL_SECTOR_SHIFT); *mptr++ = - cpu_to_le32((u32) (req->sector << KERNEL_SECTOR_SHIFT)); + cpu_to_le32(blk_rq_sectors(req) << KERNEL_SECTOR_SHIFT); + *mptr++ = + cpu_to_le32((u32) (blk_rq_pos(req) << KERNEL_SECTOR_SHIFT)); *mptr++ = - cpu_to_le32(req->sector >> (32 - KERNEL_SECTOR_SHIFT)); + cpu_to_le32(blk_rq_pos(req) >> (32 - KERNEL_SECTOR_SHIFT)); } if (!i2o_block_sglist_alloc(c, ireq, &mptr)) { diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index fe8041e619e..949e99770ad 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -243,7 +243,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) brq.mrq.cmd = &brq.cmd; brq.mrq.data = &brq.data; - brq.cmd.arg = req->sector; + brq.cmd.arg = blk_rq_pos(req); if (!mmc_card_blockaddr(card)) brq.cmd.arg <<= 9; brq.cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_ADTC; @@ -251,7 +251,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) brq.stop.opcode = MMC_STOP_TRANSMISSION; brq.stop.arg = 0; brq.stop.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC; - brq.data.blocks = req->nr_sectors; + brq.data.blocks = blk_rq_sectors(req); /* * After a read error, we redo the request one sector at a time @@ -293,7 +293,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) * Adjust the sg list so it is the same size as the * request. */ - if (brq.data.blocks != req->nr_sectors) { + if (brq.data.blocks != blk_rq_sectors(req)) { int i, data_size = brq.data.blocks << 9; struct scatterlist *sg; @@ -344,8 +344,8 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) printk(KERN_ERR "%s: error %d transferring data," " sector %u, nr %u, card status %#x\n", req->rq_disk->disk_name, brq.data.error, - (unsigned)req->sector, - (unsigned)req->nr_sectors, status); + (unsigned)blk_rq_pos(req), + (unsigned)blk_rq_sectors(req), status); } if (brq.stop.error) { diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 76c4c8d1307..4ea2e67ac97 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -47,8 +47,8 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr, unsigned long block, nsect; char *buf; - block = req->sector << 9 >> tr->blkshift; - nsect = req->current_nr_sectors << 9 >> tr->blkshift; + block = blk_rq_pos(req) << 9 >> tr->blkshift; + nsect = blk_rq_cur_sectors(req) << 9 >> tr->blkshift; buf = req->buffer; @@ -59,7 +59,8 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr, if (!blk_fs_request(req)) return -EIO; - if (req->sector + req->current_nr_sectors > get_capacity(req->rq_disk)) + if (blk_rq_pos(req) + blk_rq_cur_sectors(req) > + get_capacity(req->rq_disk)) return -EIO; switch(rq_data_dir(req)) { diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index fabec95686b..7df03c7aea0 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -603,7 +603,7 @@ static void dasd_profile_end(struct dasd_block *block, if (dasd_profile_level != DASD_PROFILE_ON) return; - sectors = req->nr_sectors; + sectors = blk_rq_sectors(req); if (!cqr->buildclk || !cqr->startclk || !cqr->stopclk || !cqr->endclk || !sectors) diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c index b9a7f773344..2efaddfae56 100644 --- a/drivers/s390/block/dasd_diag.c +++ b/drivers/s390/block/dasd_diag.c @@ -505,8 +505,9 @@ static struct dasd_ccw_req *dasd_diag_build_cp(struct dasd_device *memdev, return ERR_PTR(-EINVAL); blksize = block->bp_block; /* Calculate record id of first and last block. */ - first_rec = req->sector >> block->s2b_shift; - last_rec = (req->sector + req->nr_sectors - 1) >> block->s2b_shift; + first_rec = blk_rq_pos(req) >> block->s2b_shift; + last_rec = + (blk_rq_pos(req) + blk_rq_sectors(req) - 1) >> block->s2b_shift; /* Check struct bio and count the number of blocks for the request. */ count = 0; rq_for_each_segment(bv, req, iter) { diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index cb52da033f0..a41c94053e6 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -2354,10 +2354,10 @@ static struct dasd_ccw_req *dasd_eckd_build_cp(struct dasd_device *startdev, blksize = block->bp_block; blk_per_trk = recs_per_track(&private->rdc_data, 0, blksize); /* Calculate record id of first and last block. */ - first_rec = first_trk = req->sector >> block->s2b_shift; + first_rec = first_trk = blk_rq_pos(req) >> block->s2b_shift; first_offs = sector_div(first_trk, blk_per_trk); last_rec = last_trk = - (req->sector + req->nr_sectors - 1) >> block->s2b_shift; + (blk_rq_pos(req) + blk_rq_sectors(req) - 1) >> block->s2b_shift; last_offs = sector_div(last_trk, blk_per_trk); cdlspecial = (private->uses_cdl && first_rec < 2*blk_per_trk); @@ -2420,7 +2420,7 @@ dasd_eckd_free_cp(struct dasd_ccw_req *cqr, struct request *req) private = (struct dasd_eckd_private *) cqr->block->base->private; blksize = cqr->block->bp_block; blk_per_trk = recs_per_track(&private->rdc_data, 0, blksize); - recid = req->sector >> cqr->block->s2b_shift; + recid = blk_rq_pos(req) >> cqr->block->s2b_shift; ccw = cqr->cpaddr; /* Skip over define extent & locate record. */ ccw++; diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c index a3eb6fd1467..8912358daa2 100644 --- a/drivers/s390/block/dasd_fba.c +++ b/drivers/s390/block/dasd_fba.c @@ -270,8 +270,9 @@ static struct dasd_ccw_req *dasd_fba_build_cp(struct dasd_device * memdev, return ERR_PTR(-EINVAL); blksize = block->bp_block; /* Calculate record id of first and last block. */ - first_rec = req->sector >> block->s2b_shift; - last_rec = (req->sector + req->nr_sectors - 1) >> block->s2b_shift; + first_rec = blk_rq_pos(req) >> block->s2b_shift; + last_rec = + (blk_rq_pos(req) + blk_rq_sectors(req) - 1) >> block->s2b_shift; /* Check struct bio and count the number of blocks for the request. */ count = 0; cidaw = 0; @@ -309,7 +310,7 @@ static struct dasd_ccw_req *dasd_fba_build_cp(struct dasd_device * memdev, ccw = cqr->cpaddr; /* First ccw is define extent. */ define_extent(ccw++, cqr->data, rq_data_dir(req), - block->bp_block, req->sector, req->nr_sectors); + block->bp_block, blk_rq_pos(req), blk_rq_sectors(req)); /* Build locate_record + read/write ccws. */ idaws = (unsigned long *) (cqr->data + sizeof(struct DE_fba_data)); LO_data = (struct LO_fba_data *) (idaws + cidaw); diff --git a/drivers/s390/char/tape_34xx.c b/drivers/s390/char/tape_34xx.c index 5f8e8ef43dd..2d00a383a47 100644 --- a/drivers/s390/char/tape_34xx.c +++ b/drivers/s390/char/tape_34xx.c @@ -1134,7 +1134,7 @@ tape_34xx_bread(struct tape_device *device, struct request *req) /* Setup ccws. */ request->op = TO_BLOCK; start_block = (struct tape_34xx_block_id *) request->cpdata; - start_block->block = req->sector >> TAPEBLOCK_HSEC_S2B; + start_block->block = blk_rq_pos(req) >> TAPEBLOCK_HSEC_S2B; DBF_EVENT(6, "start_block = %i\n", start_block->block); ccw = request->cpaddr; diff --git a/drivers/s390/char/tape_3590.c b/drivers/s390/char/tape_3590.c index 823b05bd0dd..c453b2f3e9f 100644 --- a/drivers/s390/char/tape_3590.c +++ b/drivers/s390/char/tape_3590.c @@ -633,7 +633,7 @@ tape_3590_bread(struct tape_device *device, struct request *req) struct req_iterator iter; DBF_EVENT(6, "xBREDid:"); - start_block = req->sector >> TAPEBLOCK_HSEC_S2B; + start_block = blk_rq_pos(req) >> TAPEBLOCK_HSEC_S2B; DBF_EVENT(6, "start_block = %i\n", start_block); rq_for_each_segment(bv, req, iter) diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c index 86596d3813b..5d035e4939d 100644 --- a/drivers/s390/char/tape_block.c +++ b/drivers/s390/char/tape_block.c @@ -87,7 +87,7 @@ __tapeblock_end_request(struct tape_request *ccw_req, void *data) if (ccw_req->rc == 0) /* Update position. */ device->blk_data.block_position = - (req->sector + req->nr_sectors) >> TAPEBLOCK_HSEC_S2B; + (blk_rq_pos(req) + blk_rq_sectors(req)) >> TAPEBLOCK_HSEC_S2B; else /* We lost the position information due to an error. */ device->blk_data.block_position = -1; diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c index 09617884a50..2132c906e53 100644 --- a/drivers/sbus/char/jsflash.c +++ b/drivers/sbus/char/jsflash.c @@ -188,8 +188,8 @@ static void jsfd_do_request(struct request_queue *q) while ((req = elv_next_request(q)) != NULL) { struct jsfd_part *jdp = req->rq_disk->private_data; - unsigned long offset = req->sector << 9; - size_t len = req->current_nr_sectors << 9; + unsigned long offset = blk_rq_pos(req) << 9; + size_t len = blk_rq_cur_sectors(req) << 9; if ((offset + len) > jdp->dsize) { __blk_end_request_cur(req, -EIO); diff --git a/drivers/scsi/eata.c b/drivers/scsi/eata.c index be5099dd94b..c7076ce25e2 100644 --- a/drivers/scsi/eata.c +++ b/drivers/scsi/eata.c @@ -1825,7 +1825,7 @@ static int eata2x_queuecommand(struct scsi_cmnd *SCpnt, if (linked_comm && SCpnt->device->queue_depth > 2 && TLDEV(SCpnt->device->type)) { ha->cp_stat[i] = READY; - flush_dev(SCpnt->device, SCpnt->request->sector, ha, 0); + flush_dev(SCpnt->device, blk_rq_pos(SCpnt->request), ha, 0); return 0; } @@ -2144,13 +2144,13 @@ static int reorder(struct hostdata *ha, unsigned long cursec, if (!cpp->din) input_only = 0; - if (SCpnt->request->sector < minsec) - minsec = SCpnt->request->sector; - if (SCpnt->request->sector > maxsec) - maxsec = SCpnt->request->sector; + if (blk_rq_pos(SCpnt->request) < minsec) + minsec = blk_rq_pos(SCpnt->request); + if (blk_rq_pos(SCpnt->request) > maxsec) + maxsec = blk_rq_pos(SCpnt->request); - sl[n] = SCpnt->request->sector; - ioseek += SCpnt->request->nr_sectors; + sl[n] = blk_rq_pos(SCpnt->request); + ioseek += blk_rq_sectors(SCpnt->request); if (!n) continue; @@ -2190,7 +2190,7 @@ static int reorder(struct hostdata *ha, unsigned long cursec, k = il[n]; cpp = &ha->cp[k]; SCpnt = cpp->SCpnt; - ll[n] = SCpnt->request->nr_sectors; + ll[n] = blk_rq_sectors(SCpnt->request); pl[n] = SCpnt->serial_number; if (!n) @@ -2236,12 +2236,12 @@ static int reorder(struct hostdata *ha, unsigned long cursec, cpp = &ha->cp[k]; SCpnt = cpp->SCpnt; scmd_printk(KERN_INFO, SCpnt, - "%s pid %ld mb %d fc %d nr %d sec %ld ns %ld" + "%s pid %ld mb %d fc %d nr %d sec %ld ns %u" " cur %ld s:%c r:%c rev:%c in:%c ov:%c xd %d.\n", (ihdlr ? "ihdlr" : "qcomm"), SCpnt->serial_number, k, flushcount, - n_ready, SCpnt->request->sector, - SCpnt->request->nr_sectors, cursec, YESNO(s), + n_ready, blk_rq_pos(SCpnt->request), + blk_rq_sectors(SCpnt->request), cursec, YESNO(s), YESNO(r), YESNO(rev), YESNO(input_only), YESNO(overlap), cpp->din); } @@ -2408,7 +2408,7 @@ static irqreturn_t ihdlr(struct Scsi_Host *shost) if (linked_comm && SCpnt->device->queue_depth > 2 && TLDEV(SCpnt->device->type)) - flush_dev(SCpnt->device, SCpnt->request->sector, ha, 1); + flush_dev(SCpnt->device, blk_rq_pos(SCpnt->request), ha, 1); tstatus = status_byte(spp->target_status); diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 36fd2e75da1..a8fab397711 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -1313,10 +1313,10 @@ lpfc_parse_bg_err(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd, uint32_t bgstat = bgf->bgstat; uint64_t failing_sector = 0; - printk(KERN_ERR "BG ERROR in cmd 0x%x lba 0x%llx blk cnt 0x%lx " + printk(KERN_ERR "BG ERROR in cmd 0x%x lba 0x%llx blk cnt 0x%x " "bgstat=0x%x bghm=0x%x\n", cmd->cmnd[0], (unsigned long long)scsi_get_lba(cmd), - cmd->request->nr_sectors, bgstat, bghm); + blk_rq_sectors(cmd->request), bgstat, bghm); spin_lock(&_dump_buf_lock); if (!_dump_buf_done) { @@ -2375,15 +2375,15 @@ lpfc_queuecommand(struct scsi_cmnd *cmnd, void (*done) (struct scsi_cmnd *)) if (cmnd->cmnd[0] == READ_10) lpfc_printf_vlog(vport, KERN_WARNING, LOG_BG, "9035 BLKGRD: READ @ sector %llu, " - "count %lu\n", - (unsigned long long)scsi_get_lba(cmnd), - cmnd->request->nr_sectors); + "count %u\n", + (unsigned long long)scsi_get_lba(cmnd), + blk_rq_sectors(cmnd->request)); else if (cmnd->cmnd[0] == WRITE_10) lpfc_printf_vlog(vport, KERN_WARNING, LOG_BG, "9036 BLKGRD: WRITE @ sector %llu, " - "count %lu cmd=%p\n", + "count %u cmd=%p\n", (unsigned long long)scsi_get_lba(cmnd), - cmnd->request->nr_sectors, + blk_rq_sectors(cmnd->request), cmnd); err = lpfc_bg_scsi_prep_dma_buf(phba, lpfc_cmd); @@ -2403,15 +2403,15 @@ lpfc_queuecommand(struct scsi_cmnd *cmnd, void (*done) (struct scsi_cmnd *)) if (cmnd->cmnd[0] == READ_10) lpfc_printf_vlog(vport, KERN_WARNING, LOG_BG, "9040 dbg: READ @ sector %llu, " - "count %lu\n", + "count %u\n", (unsigned long long)scsi_get_lba(cmnd), - cmnd->request->nr_sectors); + blk_rq_sectors(cmnd->request)); else if (cmnd->cmnd[0] == WRITE_10) lpfc_printf_vlog(vport, KERN_WARNING, LOG_BG, "9041 dbg: WRITE @ sector %llu, " - "count %lu cmd=%p\n", + "count %u cmd=%p\n", (unsigned long long)scsi_get_lba(cmnd), - cmnd->request->nr_sectors, cmnd); + blk_rq_sectors(cmnd->request), cmnd); else lpfc_printf_vlog(vport, KERN_WARNING, LOG_BG, "9042 dbg: parser not implemented\n"); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 9ff0ca9988a..39b3acfc0dd 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -787,9 +787,9 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) * Next deal with any sectors which we were able to correctly * handle. */ - SCSI_LOG_HLCOMPLETE(1, printk("%ld sectors total, " + SCSI_LOG_HLCOMPLETE(1, printk("%u sectors total, " "%d bytes done.\n", - req->nr_sectors, good_bytes)); + blk_rq_sectors(req), good_bytes)); /* * Recovered errors need reporting, but they're always treated @@ -968,7 +968,7 @@ static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb, if (blk_pc_request(req)) sdb->length = req->data_len; else - sdb->length = req->nr_sectors << 9; + sdb->length = blk_rq_sectors(req) << 9; return BLKPREP_OK; } diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 3fcb64b91c4..70c4dd99bbf 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -383,9 +383,9 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq) struct scsi_device *sdp = q->queuedata; struct gendisk *disk = rq->rq_disk; struct scsi_disk *sdkp; - sector_t block = rq->sector; + sector_t block = blk_rq_pos(rq); sector_t threshold; - unsigned int this_count = rq->nr_sectors; + unsigned int this_count = blk_rq_sectors(rq); int ret, host_dif; if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { @@ -412,10 +412,10 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq) this_count)); if (!sdp || !scsi_device_online(sdp) || - block + rq->nr_sectors > get_capacity(disk)) { + block + blk_rq_sectors(rq) > get_capacity(disk)) { SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, - "Finishing %ld sectors\n", - rq->nr_sectors)); + "Finishing %u sectors\n", + blk_rq_sectors(rq))); SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, "Retry with 0x%p\n", SCpnt)); goto out; @@ -462,7 +462,7 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq) * for this. */ if (sdp->sector_size == 1024) { - if ((block & 1) || (rq->nr_sectors & 1)) { + if ((block & 1) || (blk_rq_sectors(rq) & 1)) { scmd_printk(KERN_ERR, SCpnt, "Bad block number requested\n"); goto out; @@ -472,7 +472,7 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq) } } if (sdp->sector_size == 2048) { - if ((block & 3) || (rq->nr_sectors & 3)) { + if ((block & 3) || (blk_rq_sectors(rq) & 3)) { scmd_printk(KERN_ERR, SCpnt, "Bad block number requested\n"); goto out; @@ -482,7 +482,7 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq) } } if (sdp->sector_size == 4096) { - if ((block & 7) || (rq->nr_sectors & 7)) { + if ((block & 7) || (blk_rq_sectors(rq) & 7)) { scmd_printk(KERN_ERR, SCpnt, "Bad block number requested\n"); goto out; @@ -511,10 +511,10 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq) } SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, - "%s %d/%ld 512 byte blocks.\n", + "%s %d/%u 512 byte blocks.\n", (rq_data_dir(rq) == WRITE) ? "writing" : "reading", this_count, - rq->nr_sectors)); + blk_rq_sectors(rq))); /* Set RDPROTECT/WRPROTECT if disk is formatted with DIF */ host_dif = scsi_host_dif_capable(sdp->host, sdkp->protection_type); @@ -970,8 +970,8 @@ static struct block_device_operations sd_fops = { static unsigned int sd_completed_bytes(struct scsi_cmnd *scmd) { - u64 start_lba = scmd->request->sector; - u64 end_lba = scmd->request->sector + (scsi_bufflen(scmd) / 512); + u64 start_lba = blk_rq_pos(scmd->request); + u64 end_lba = blk_rq_pos(scmd->request) + (scsi_bufflen(scmd) / 512); u64 bad_lba; int info_valid; diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c index 184dff49279..82f14a9482d 100644 --- a/drivers/scsi/sd_dif.c +++ b/drivers/scsi/sd_dif.c @@ -507,7 +507,7 @@ void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes) sector_sz = scmd->device->sector_size; sectors = good_bytes / sector_sz; - phys = scmd->request->sector & 0xffffffff; + phys = blk_rq_pos(scmd->request) & 0xffffffff; if (sector_sz == 4096) phys >>= 3; diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 0e1a0f2d2ad..fddba53c7fe 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -292,7 +292,8 @@ static int sr_done(struct scsi_cmnd *SCpnt) if (cd->device->sector_size == 2048) error_sector <<= 2; error_sector &= ~(block_sectors - 1); - good_bytes = (error_sector - SCpnt->request->sector) << 9; + good_bytes = (error_sector - + blk_rq_pos(SCpnt->request)) << 9; if (good_bytes < 0 || good_bytes >= this_count) good_bytes = 0; /* @@ -349,8 +350,8 @@ static int sr_prep_fn(struct request_queue *q, struct request *rq) cd->disk->disk_name, block)); if (!cd->device || !scsi_device_online(cd->device)) { - SCSI_LOG_HLQUEUE(2, printk("Finishing %ld sectors\n", - rq->nr_sectors)); + SCSI_LOG_HLQUEUE(2, printk("Finishing %u sectors\n", + blk_rq_sectors(rq))); SCSI_LOG_HLQUEUE(2, printk("Retry with 0x%p\n", SCpnt)); goto out; } @@ -413,7 +414,7 @@ static int sr_prep_fn(struct request_queue *q, struct request *rq) /* * request doesn't start on hw block boundary, add scatter pads */ - if (((unsigned int)rq->sector % (s_size >> 9)) || + if (((unsigned int)blk_rq_pos(rq) % (s_size >> 9)) || (scsi_bufflen(SCpnt) % s_size)) { scmd_printk(KERN_NOTICE, SCpnt, "unaligned transfer\n"); goto out; @@ -422,14 +423,14 @@ static int sr_prep_fn(struct request_queue *q, struct request *rq) this_count = (scsi_bufflen(SCpnt) >> 9) / (s_size >> 9); - SCSI_LOG_HLQUEUE(2, printk("%s : %s %d/%ld 512 byte blocks.\n", + SCSI_LOG_HLQUEUE(2, printk("%s : %s %d/%u 512 byte blocks.\n", cd->cdi.name, (rq_data_dir(rq) == WRITE) ? "writing" : "reading", - this_count, rq->nr_sectors)); + this_count, blk_rq_sectors(rq))); SCpnt->cmnd[1] = 0; - block = (unsigned int)rq->sector / (s_size >> 9); + block = (unsigned int)blk_rq_pos(rq) / (s_size >> 9); if (this_count > 0xffff) { this_count = 0xffff; diff --git a/drivers/scsi/u14-34f.c b/drivers/scsi/u14-34f.c index 601e95141cb..54023d41fd1 100644 --- a/drivers/scsi/u14-34f.c +++ b/drivers/scsi/u14-34f.c @@ -1306,7 +1306,7 @@ static int u14_34f_queuecommand(struct scsi_cmnd *SCpnt, void (*done)(struct scs if (linked_comm && SCpnt->device->queue_depth > 2 && TLDEV(SCpnt->device->type)) { HD(j)->cp_stat[i] = READY; - flush_dev(SCpnt->device, SCpnt->request->sector, j, FALSE); + flush_dev(SCpnt->device, blk_rq_pos(SCpnt->request), j, FALSE); return 0; } @@ -1610,11 +1610,13 @@ static int reorder(unsigned int j, unsigned long cursec, if (!(cpp->xdir == DTD_IN)) input_only = FALSE; - if (SCpnt->request->sector < minsec) minsec = SCpnt->request->sector; - if (SCpnt->request->sector > maxsec) maxsec = SCpnt->request->sector; + if (blk_rq_pos(SCpnt->request) < minsec) + minsec = blk_rq_pos(SCpnt->request); + if (blk_rq_pos(SCpnt->request) > maxsec) + maxsec = blk_rq_pos(SCpnt->request); - sl[n] = SCpnt->request->sector; - ioseek += SCpnt->request->nr_sectors; + sl[n] = blk_rq_pos(SCpnt->request); + ioseek += blk_rq_sectors(SCpnt->request); if (!n) continue; @@ -1642,7 +1644,7 @@ static int reorder(unsigned int j, unsigned long cursec, if (!input_only) for (n = 0; n < n_ready; n++) { k = il[n]; cpp = &HD(j)->cp[k]; SCpnt = cpp->SCpnt; - ll[n] = SCpnt->request->nr_sectors; pl[n] = SCpnt->serial_number; + ll[n] = blk_rq_sectors(SCpnt->request); pl[n] = SCpnt->serial_number; if (!n) continue; @@ -1666,12 +1668,12 @@ static int reorder(unsigned int j, unsigned long cursec, if (link_statistics && (overlap || !(flushcount % link_statistics))) for (n = 0; n < n_ready; n++) { k = il[n]; cpp = &HD(j)->cp[k]; SCpnt = cpp->SCpnt; - printk("%s %d.%d:%d pid %ld mb %d fc %d nr %d sec %ld ns %ld"\ + printk("%s %d.%d:%d pid %ld mb %d fc %d nr %d sec %ld ns %u"\ " cur %ld s:%c r:%c rev:%c in:%c ov:%c xd %d.\n", (ihdlr ? "ihdlr" : "qcomm"), SCpnt->channel, SCpnt->target, SCpnt->lun, SCpnt->serial_number, k, flushcount, n_ready, - SCpnt->request->sector, SCpnt->request->nr_sectors, cursec, - YESNO(s), YESNO(r), YESNO(rev), YESNO(input_only), + blk_rq_pos(SCpnt->request), blk_rq_sectors(SCpnt->request), + cursec, YESNO(s), YESNO(r), YESNO(rev), YESNO(input_only), YESNO(overlap), cpp->xdir); } #endif @@ -1799,7 +1801,7 @@ static irqreturn_t ihdlr(unsigned int j) if (linked_comm && SCpnt->device->queue_depth > 2 && TLDEV(SCpnt->device->type)) - flush_dev(SCpnt->device, SCpnt->request->sector, j, TRUE); + flush_dev(SCpnt->device, blk_rq_pos(SCpnt->request), j, TRUE); tstatus = status_byte(spp->target_status); diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index 43b50d36925..3878d1dc7f5 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -270,7 +270,7 @@ static inline unsigned char scsi_get_prot_type(struct scsi_cmnd *scmd) static inline sector_t scsi_get_lba(struct scsi_cmnd *scmd) { - return scmd->request->sector; + return blk_rq_pos(scmd->request); } static inline unsigned scsi_prot_sg_count(struct scsi_cmnd *cmd) -- cgit v1.2.3-70-g09d2 From 2e46e8b27aa57c6bd34b3102b40ee4d0144b4fab Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 7 May 2009 22:24:41 +0900 Subject: block: drop request->hard_* and *nr_sectors struct request has had a few different ways to represent some properties of a request. ->hard_* represent block layer's view of the request progress (completion cursor) and the ones without the prefix are supposed to represent the issue cursor and allowed to be updated as necessary by the low level drivers. The thing is that as block layer supports partial completion, the two cursors really aren't necessary and only cause confusion. In addition, manual management of request detail from low level drivers is cumbersome and error-prone at the very least. Another interesting duplicate fields are rq->[hard_]nr_sectors and rq->{hard_cur|current}_nr_sectors against rq->data_len and rq->bio->bi_size. This is more convoluted than the hard_ case. rq->[hard_]nr_sectors are initialized for requests with bio but blk_rq_bytes() uses it only for !pc requests. rq->data_len is initialized for all request but blk_rq_bytes() uses it only for pc requests. This causes good amount of confusion throughout block layer and its drivers and determining the request length has been a bit of black magic which may or may not work depending on circumstances and what the specific LLD is actually doing. rq->{hard_cur|current}_nr_sectors represent the number of sectors in the contiguous data area at the front. This is mainly used by drivers which transfers data by walking request segment-by-segment. This value always equals rq->bio->bi_size >> 9. However, data length for pc requests may not be multiple of 512 bytes and using this field becomes a bit confusing. In general, having multiple fields to represent the same property leads only to confusion and subtle bugs. With recent block low level driver cleanups, no driver is accessing or manipulating these duplicate fields directly. Drop all the duplicates. Now rq->sector means the current sector, rq->data_len the current total length and rq->bio->bi_size the current segment length. Everything else is defined in terms of these three and available only through accessors. * blk_recalc_rq_sectors() is collapsed into blk_update_request() and now handles pc and fs requests equally other than rq->sector update. This means that now pc requests can use partial completion too (no in-kernel user yet tho). * bio_cur_sectors() is replaced with bio_cur_bytes() as block layer now uses byte count as the primary data length. * blk_rq_pos() is now guranteed to be always correct. In-block users converted. * blk_rq_bytes() is now guaranteed to be always valid as is blk_rq_sectors(). In-block users converted. * blk_rq_sectors() is now guaranteed to equal blk_rq_bytes() >> 9. More convenient one is used. * blk_rq_bytes() and blk_rq_cur_bytes() are now inlined and take const pointer to request. [ Impact: API cleanup, single way to represent one property of a request ] Signed-off-by: Tejun Heo Cc: Boaz Harrosh Signed-off-by: Jens Axboe --- block/blk-core.c | 81 ++++++++++++++++++------------------------------ block/blk-merge.c | 36 +++------------------ block/blk.h | 1 - block/cfq-iosched.c | 10 +++--- include/linux/bio.h | 6 ++-- include/linux/blkdev.h | 37 ++++++++++------------ include/linux/elevator.h | 2 +- kernel/trace/blktrace.c | 16 +++++----- 8 files changed, 67 insertions(+), 122 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index 82dc20621c0..3596ca71909 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -127,7 +127,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq) INIT_LIST_HEAD(&rq->timeout_list); rq->cpu = -1; rq->q = q; - rq->sector = rq->hard_sector = (sector_t) -1; + rq->sector = (sector_t) -1; INIT_HLIST_NODE(&rq->hash); RB_CLEAR_NODE(&rq->rb_node); rq->cmd = rq->__cmd; @@ -189,8 +189,7 @@ void blk_dump_rq_flags(struct request *rq, char *msg) (unsigned long long)blk_rq_pos(rq), blk_rq_sectors(rq), blk_rq_cur_sectors(rq)); printk(KERN_INFO " bio %p, biotail %p, buffer %p, len %u\n", - rq->bio, rq->biotail, - rq->buffer, rq->data_len); + rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq)); if (blk_pc_request(rq)) { printk(KERN_INFO " cdb: "); @@ -1096,7 +1095,7 @@ void init_request_from_bio(struct request *req, struct bio *bio) req->cmd_flags |= REQ_NOIDLE; req->errors = 0; - req->hard_sector = req->sector = bio->bi_sector; + req->sector = bio->bi_sector; req->ioprio = bio_prio(bio); blk_rq_bio_prep(req->q, req, bio); } @@ -1113,14 +1112,13 @@ static inline bool queue_should_plug(struct request_queue *q) static int __make_request(struct request_queue *q, struct bio *bio) { struct request *req; - int el_ret, nr_sectors; + int el_ret; + unsigned int bytes = bio->bi_size; const unsigned short prio = bio_prio(bio); const int sync = bio_sync(bio); const int unplug = bio_unplug(bio); int rw_flags; - nr_sectors = bio_sectors(bio); - /* * low level driver can indicate that it wants pages above a * certain limit bounced to low memory (ie for highmem, or even @@ -1145,7 +1143,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) req->biotail->bi_next = bio; req->biotail = bio; - req->nr_sectors = req->hard_nr_sectors += nr_sectors; + req->data_len += bytes; req->ioprio = ioprio_best(req->ioprio, prio); if (!blk_rq_cpu_valid(req)) req->cpu = bio->bi_comp_cpu; @@ -1171,10 +1169,8 @@ static int __make_request(struct request_queue *q, struct bio *bio) * not touch req->buffer either... */ req->buffer = bio_data(bio); - req->current_nr_sectors = bio_cur_sectors(bio); - req->hard_cur_sectors = req->current_nr_sectors; - req->sector = req->hard_sector = bio->bi_sector; - req->nr_sectors = req->hard_nr_sectors += nr_sectors; + req->sector = bio->bi_sector; + req->data_len += bytes; req->ioprio = ioprio_best(req->ioprio, prio); if (!blk_rq_cpu_valid(req)) req->cpu = bio->bi_comp_cpu; @@ -1557,7 +1553,7 @@ EXPORT_SYMBOL(submit_bio); int blk_rq_check_limits(struct request_queue *q, struct request *rq) { if (blk_rq_sectors(rq) > q->max_sectors || - rq->data_len > q->max_hw_sectors << 9) { + blk_rq_bytes(rq) > q->max_hw_sectors << 9) { printk(KERN_ERR "%s: over max size limit.\n", __func__); return -EIO; } @@ -1675,35 +1671,6 @@ static void blk_account_io_done(struct request *req) } } -/** - * blk_rq_bytes - Returns bytes left to complete in the entire request - * @rq: the request being processed - **/ -unsigned int blk_rq_bytes(struct request *rq) -{ - if (blk_fs_request(rq)) - return blk_rq_sectors(rq) << 9; - - return rq->data_len; -} -EXPORT_SYMBOL_GPL(blk_rq_bytes); - -/** - * blk_rq_cur_bytes - Returns bytes left to complete in the current segment - * @rq: the request being processed - **/ -unsigned int blk_rq_cur_bytes(struct request *rq) -{ - if (blk_fs_request(rq)) - return rq->current_nr_sectors << 9; - - if (rq->bio) - return rq->bio->bi_size; - - return rq->data_len; -} -EXPORT_SYMBOL_GPL(blk_rq_cur_bytes); - struct request *elv_next_request(struct request_queue *q) { struct request *rq; @@ -1736,7 +1703,7 @@ struct request *elv_next_request(struct request_queue *q) if (rq->cmd_flags & REQ_DONTPREP) break; - if (q->dma_drain_size && rq->data_len) { + if (q->dma_drain_size && blk_rq_bytes(rq)) { /* * make sure space for the drain appears we * know we can do this because max_hw_segments @@ -1759,7 +1726,7 @@ struct request *elv_next_request(struct request_queue *q) * avoid resource deadlock. REQ_STARTED will * prevent other fs requests from passing this one. */ - if (q->dma_drain_size && rq->data_len && + if (q->dma_drain_size && blk_rq_bytes(rq) && !(rq->cmd_flags & REQ_DONTPREP)) { /* * remove the space for the drain we added @@ -1911,8 +1878,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) * can find how many bytes remain in the request * later. */ - req->nr_sectors = req->hard_nr_sectors = 0; - req->current_nr_sectors = req->hard_cur_sectors = 0; + req->data_len = 0; return false; } @@ -1926,8 +1892,25 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) bio_iovec(bio)->bv_len -= nr_bytes; } - blk_recalc_rq_sectors(req, total_bytes >> 9); + req->data_len -= total_bytes; + req->buffer = bio_data(req->bio); + + /* update sector only for requests with clear definition of sector */ + if (blk_fs_request(req) || blk_discard_rq(req)) + req->sector += total_bytes >> 9; + + /* + * If total number of sectors is less than the first segment + * size, something has gone terribly wrong. + */ + if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) { + printk(KERN_ERR "blk: request botched\n"); + req->data_len = blk_rq_cur_bytes(req); + } + + /* recalculate the number of segments */ blk_recalc_rq_segments(req); + return true; } EXPORT_SYMBOL_GPL(blk_update_request); @@ -2049,11 +2032,7 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq, rq->nr_phys_segments = bio_phys_segments(q, bio); rq->buffer = bio_data(bio); } - rq->current_nr_sectors = bio_cur_sectors(bio); - rq->hard_cur_sectors = rq->current_nr_sectors; - rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio); rq->data_len = bio->bi_size; - rq->bio = rq->biotail = bio; if (bio->bi_bdev) diff --git a/block/blk-merge.c b/block/blk-merge.c index bf62a87a9da..b8df66aef0f 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -9,35 +9,6 @@ #include "blk.h" -void blk_recalc_rq_sectors(struct request *rq, int nsect) -{ - if (blk_fs_request(rq) || blk_discard_rq(rq)) { - rq->hard_sector += nsect; - rq->hard_nr_sectors -= nsect; - - /* - * Move the I/O submission pointers ahead if required. - */ - if ((rq->nr_sectors >= rq->hard_nr_sectors) && - (rq->sector <= rq->hard_sector)) { - rq->sector = rq->hard_sector; - rq->nr_sectors = rq->hard_nr_sectors; - rq->hard_cur_sectors = bio_cur_sectors(rq->bio); - rq->current_nr_sectors = rq->hard_cur_sectors; - rq->buffer = bio_data(rq->bio); - } - - /* - * if total number of sectors is less than the first segment - * size, something has gone terribly wrong - */ - if (rq->nr_sectors < rq->current_nr_sectors) { - printk(KERN_ERR "blk: request botched\n"); - rq->nr_sectors = rq->current_nr_sectors; - } - } -} - static unsigned int __blk_recalc_rq_segments(struct request_queue *q, struct bio *bio) { @@ -199,8 +170,9 @@ new_segment: if (unlikely(rq->cmd_flags & REQ_COPY_USER) && - (rq->data_len & q->dma_pad_mask)) { - unsigned int pad_len = (q->dma_pad_mask & ~rq->data_len) + 1; + (blk_rq_bytes(rq) & q->dma_pad_mask)) { + unsigned int pad_len = + (q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1; sg->length += pad_len; rq->extra_len += pad_len; @@ -398,7 +370,7 @@ static int attempt_merge(struct request_queue *q, struct request *req, req->biotail->bi_next = next->bio; req->biotail = next->biotail; - req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors; + req->data_len += blk_rq_bytes(next); elv_merge_requests(q, req, next); diff --git a/block/blk.h b/block/blk.h index 51115599df9..ab54529103c 100644 --- a/block/blk.h +++ b/block/blk.h @@ -101,7 +101,6 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req, int attempt_back_merge(struct request_queue *q, struct request *rq); int attempt_front_merge(struct request_queue *q, struct request *rq); void blk_recalc_rq_segments(struct request *rq); -void blk_recalc_rq_sectors(struct request *rq, int nsect); void blk_queue_congestion_threshold(struct request_queue *q); diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index db4d990a66b..99ac4304d71 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -579,9 +579,9 @@ cfq_prio_tree_lookup(struct cfq_data *cfqd, struct rb_root *root, * Sort strictly based on sector. Smallest to the left, * largest to the right. */ - if (sector > cfqq->next_rq->sector) + if (sector > blk_rq_pos(cfqq->next_rq)) n = &(*p)->rb_right; - else if (sector < cfqq->next_rq->sector) + else if (sector < blk_rq_pos(cfqq->next_rq)) n = &(*p)->rb_left; else break; @@ -611,8 +611,8 @@ static void cfq_prio_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq) return; cfqq->p_root = &cfqd->prio_trees[cfqq->org_ioprio]; - __cfqq = cfq_prio_tree_lookup(cfqd, cfqq->p_root, cfqq->next_rq->sector, - &parent, &p); + __cfqq = cfq_prio_tree_lookup(cfqd, cfqq->p_root, + blk_rq_pos(cfqq->next_rq), &parent, &p); if (!__cfqq) { rb_link_node(&cfqq->p_node, parent, p); rb_insert_color(&cfqq->p_node, cfqq->p_root); @@ -996,7 +996,7 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd, if (cfq_rq_close(cfqd, __cfqq->next_rq)) return __cfqq; - if (__cfqq->next_rq->sector < sector) + if (blk_rq_pos(__cfqq->next_rq) < sector) node = rb_next(&__cfqq->p_node); else node = rb_prev(&__cfqq->p_node); diff --git a/include/linux/bio.h b/include/linux/bio.h index f37ca8c726b..d30ec6f30dd 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -218,12 +218,12 @@ struct bio { #define bio_sectors(bio) ((bio)->bi_size >> 9) #define bio_empty_barrier(bio) (bio_barrier(bio) && !bio_has_data(bio) && !bio_discard(bio)) -static inline unsigned int bio_cur_sectors(struct bio *bio) +static inline unsigned int bio_cur_bytes(struct bio *bio) { if (bio->bi_vcnt) - return bio_iovec(bio)->bv_len >> 9; + return bio_iovec(bio)->bv_len; else /* dataless requests such as discard */ - return bio->bi_size >> 9; + return bio->bi_size; } static inline void *bio_data(struct bio *bio) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4e5f8559872..ce2bf5efa9b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -166,19 +166,8 @@ struct request { enum rq_cmd_type_bits cmd_type; unsigned long atomic_flags; - /* Maintain bio traversal state for part by part I/O submission. - * hard_* are block layer internals, no driver should touch them! - */ - - sector_t sector; /* next sector to submit */ - sector_t hard_sector; /* next sector to complete */ - unsigned long nr_sectors; /* no. of sectors left to submit */ - unsigned long hard_nr_sectors; /* no. of sectors left to complete */ - /* no. of sectors left to submit in the current segment */ - unsigned int current_nr_sectors; - - /* no. of sectors left to complete in the current segment */ - unsigned int hard_cur_sectors; + sector_t sector; /* sector cursor */ + unsigned int data_len; /* total data len, don't access directly */ struct bio *bio; struct bio *biotail; @@ -226,7 +215,6 @@ struct request { unsigned char __cmd[BLK_MAX_CDB]; unsigned char *cmd; - unsigned int data_len; unsigned int extra_len; /* length of alignment and padding */ unsigned int sense_len; unsigned int resid_len; /* residual count */ @@ -840,20 +828,27 @@ extern void blkdev_dequeue_request(struct request *req); */ static inline sector_t blk_rq_pos(const struct request *rq) { - return rq->hard_sector; + return rq->sector; +} + +static inline unsigned int blk_rq_bytes(const struct request *rq) +{ + return rq->data_len; } -extern unsigned int blk_rq_bytes(struct request *rq); -extern unsigned int blk_rq_cur_bytes(struct request *rq); +static inline int blk_rq_cur_bytes(const struct request *rq) +{ + return rq->bio ? bio_cur_bytes(rq->bio) : 0; +} static inline unsigned int blk_rq_sectors(const struct request *rq) { - return rq->hard_nr_sectors; + return blk_rq_bytes(rq) >> 9; } static inline unsigned int blk_rq_cur_sectors(const struct request *rq) { - return rq->hard_cur_sectors; + return blk_rq_cur_bytes(rq) >> 9; } /* @@ -928,7 +923,7 @@ static inline void blk_end_request_all(struct request *rq, int error) */ static inline bool blk_end_request_cur(struct request *rq, int error) { - return blk_end_request(rq, error, rq->hard_cur_sectors << 9); + return blk_end_request(rq, error, blk_rq_cur_bytes(rq)); } /** @@ -981,7 +976,7 @@ static inline void __blk_end_request_all(struct request *rq, int error) */ static inline bool __blk_end_request_cur(struct request *rq, int error) { - return __blk_end_request(rq, error, rq->hard_cur_sectors << 9); + return __blk_end_request(rq, error, blk_rq_cur_bytes(rq)); } extern void blk_complete_request(struct request *); diff --git a/include/linux/elevator.h b/include/linux/elevator.h index c59b769f62b..4e462878c9c 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -171,7 +171,7 @@ enum { ELV_MQUEUE_MUST, }; -#define rq_end_sector(rq) ((rq)->sector + (rq)->nr_sectors) +#define rq_end_sector(rq) (blk_rq_pos(rq) + blk_rq_sectors(rq)) #define rb_entry_rq(node) rb_entry((node), struct request, rb_node) /* diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 42f1c11e754..5708a14bee5 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -642,12 +642,12 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq, if (blk_pc_request(rq)) { what |= BLK_TC_ACT(BLK_TC_PC); - __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, - rq->cmd_len, rq->cmd); + __blk_add_trace(bt, 0, blk_rq_bytes(rq), rw, + what, rq->errors, rq->cmd_len, rq->cmd); } else { what |= BLK_TC_ACT(BLK_TC_FS); - __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_sectors(rq) << 9, - rw, what, rq->errors, 0, NULL); + __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), rw, + what, rq->errors, 0, NULL); } } @@ -854,11 +854,11 @@ void blk_add_driver_data(struct request_queue *q, return; if (blk_pc_request(rq)) - __blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA, - rq->errors, len, data); + __blk_add_trace(bt, 0, blk_rq_bytes(rq), 0, + BLK_TA_DRV_DATA, rq->errors, len, data); else - __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_sectors(rq) << 9, - 0, BLK_TA_DRV_DATA, rq->errors, len, data); + __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), 0, + BLK_TA_DRV_DATA, rq->errors, len, data); } EXPORT_SYMBOL_GPL(blk_add_driver_data); -- cgit v1.2.3-70-g09d2 From a2dec7b36364a5cc564c4d76cf16d2e7d33f5c05 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 7 May 2009 22:24:44 +0900 Subject: block: hide request sector and data_len Block low level drivers for some reason have been pretty good at abusing block layer API. Especially struct request's fields tend to get violated in all possible ways. Make it clear that low level drivers MUST NOT access or manipulate rq->sector and rq->data_len directly by prefixing them with double underscores. This change is also necessary to break build of out-of-tree codes which assume the previous block API where internal fields can be manipulated and rq->data_len carries residual count on completion. [ Impact: hide internal fields, block API change ] Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-core.c | 20 ++++++++++---------- block/blk-map.c | 2 +- block/blk-merge.c | 2 +- include/linux/blkdev.h | 9 +++++---- 4 files changed, 17 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index 3596ca71909..6226a380fb6 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -127,7 +127,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq) INIT_LIST_HEAD(&rq->timeout_list); rq->cpu = -1; rq->q = q; - rq->sector = (sector_t) -1; + rq->__sector = (sector_t) -1; INIT_HLIST_NODE(&rq->hash); RB_CLEAR_NODE(&rq->rb_node); rq->cmd = rq->__cmd; @@ -1095,7 +1095,7 @@ void init_request_from_bio(struct request *req, struct bio *bio) req->cmd_flags |= REQ_NOIDLE; req->errors = 0; - req->sector = bio->bi_sector; + req->__sector = bio->bi_sector; req->ioprio = bio_prio(bio); blk_rq_bio_prep(req->q, req, bio); } @@ -1143,7 +1143,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) req->biotail->bi_next = bio; req->biotail = bio; - req->data_len += bytes; + req->__data_len += bytes; req->ioprio = ioprio_best(req->ioprio, prio); if (!blk_rq_cpu_valid(req)) req->cpu = bio->bi_comp_cpu; @@ -1169,8 +1169,8 @@ static int __make_request(struct request_queue *q, struct bio *bio) * not touch req->buffer either... */ req->buffer = bio_data(bio); - req->sector = bio->bi_sector; - req->data_len += bytes; + req->__sector = bio->bi_sector; + req->__data_len += bytes; req->ioprio = ioprio_best(req->ioprio, prio); if (!blk_rq_cpu_valid(req)) req->cpu = bio->bi_comp_cpu; @@ -1878,7 +1878,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) * can find how many bytes remain in the request * later. */ - req->data_len = 0; + req->__data_len = 0; return false; } @@ -1892,12 +1892,12 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) bio_iovec(bio)->bv_len -= nr_bytes; } - req->data_len -= total_bytes; + req->__data_len -= total_bytes; req->buffer = bio_data(req->bio); /* update sector only for requests with clear definition of sector */ if (blk_fs_request(req) || blk_discard_rq(req)) - req->sector += total_bytes >> 9; + req->__sector += total_bytes >> 9; /* * If total number of sectors is less than the first segment @@ -1905,7 +1905,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) */ if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) { printk(KERN_ERR "blk: request botched\n"); - req->data_len = blk_rq_cur_bytes(req); + req->__data_len = blk_rq_cur_bytes(req); } /* recalculate the number of segments */ @@ -2032,7 +2032,7 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq, rq->nr_phys_segments = bio_phys_segments(q, bio); rq->buffer = bio_data(bio); } - rq->data_len = bio->bi_size; + rq->__data_len = bio->bi_size; rq->bio = rq->biotail = bio; if (bio->bi_bdev) diff --git a/block/blk-map.c b/block/blk-map.c index 694fefad34e..56082bea450 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -20,7 +20,7 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq, rq->biotail->bi_next = bio; rq->biotail = bio; - rq->data_len += bio->bi_size; + rq->__data_len += bio->bi_size; } return 0; } diff --git a/block/blk-merge.c b/block/blk-merge.c index b8df66aef0f..4974dd5767e 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -370,7 +370,7 @@ static int attempt_merge(struct request_queue *q, struct request *req, req->biotail->bi_next = next->bio; req->biotail = next->biotail; - req->data_len += blk_rq_bytes(next); + req->__data_len += blk_rq_bytes(next); elv_merge_requests(q, req, next); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ce2bf5efa9b..c7558034570 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -166,8 +166,9 @@ struct request { enum rq_cmd_type_bits cmd_type; unsigned long atomic_flags; - sector_t sector; /* sector cursor */ - unsigned int data_len; /* total data len, don't access directly */ + /* the following two fields are internal, NEVER access directly */ + sector_t __sector; /* sector cursor */ + unsigned int __data_len; /* total data len */ struct bio *bio; struct bio *biotail; @@ -828,12 +829,12 @@ extern void blkdev_dequeue_request(struct request *req); */ static inline sector_t blk_rq_pos(const struct request *rq) { - return rq->sector; + return rq->__sector; } static inline unsigned int blk_rq_bytes(const struct request *rq) { - return rq->data_len; + return rq->__data_len; } static inline int blk_rq_cur_bytes(const struct request *rq) -- cgit v1.2.3-70-g09d2 From 9934c8c04561413609d2bc38c6b9f268cba774a4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 8 May 2009 11:54:16 +0900 Subject: block: implement and enforce request peek/start/fetch Till now block layer allowed two separate modes of request execution. A request is always acquired from the request queue via elv_next_request(). After that, drivers are free to either dequeue it or process it without dequeueing. Dequeue allows elv_next_request() to return the next request so that multiple requests can be in flight. Executing requests without dequeueing has its merits mostly in allowing drivers for simpler devices which can't do sg to deal with segments only without considering request boundary. However, the benefit this brings is dubious and declining while the cost of the API ambiguity is increasing. Segment based drivers are usually for very old or limited devices and as converting to dequeueing model isn't difficult, it doesn't justify the API overhead it puts on block layer and its more modern users. Previous patches converted all block low level drivers to dequeueing model. This patch completes the API transition by... * renaming elv_next_request() to blk_peek_request() * renaming blkdev_dequeue_request() to blk_start_request() * adding blk_fetch_request() which is combination of peek and start * disallowing completion of queued (not started) requests * applying new API to all LLDs Renamings are for consistency and to break out of tree code so that it's apparent that out of tree drivers need updating. [ Impact: block request issue API cleanup, no functional change ] Signed-off-by: Tejun Heo Cc: Rusty Russell Cc: James Bottomley Cc: Mike Miller Cc: unsik Kim Cc: Paul Clements Cc: Tim Waugh Cc: Geert Uytterhoeven Cc: David S. Miller Cc: Laurent Vivier Cc: Jeff Garzik Cc: Jeremy Fitzhardinge Cc: Grant Likely Cc: Adrian McMenamin Cc: Stephen Rothwell Cc: Bartlomiej Zolnierkiewicz Cc: Borislav Petkov Cc: Sergei Shtylyov Cc: Alex Dubov Cc: Pierre Ossman Cc: David Woodhouse Cc: Markus Lidel Cc: Stefan Weinhuber Cc: Martin Schwidefsky Cc: Pete Zaitcev Cc: FUJITA Tomonori Signed-off-by: Jens Axboe --- arch/arm/plat-omap/mailbox.c | 12 ++--- arch/um/drivers/ubd_kern.c | 3 +- block/blk-barrier.c | 4 +- block/blk-core.c | 105 ++++++++++++++++++++++++++---------- block/blk-tag.c | 2 +- block/blk.h | 1 + drivers/block/DAC960.c | 4 +- drivers/block/amiflop.c | 3 +- drivers/block/ataflop.c | 3 +- drivers/block/cciss.c | 4 +- drivers/block/cpqarray.c | 4 +- drivers/block/floppy.c | 6 +-- drivers/block/hd.c | 3 +- drivers/block/mg_disk.c | 12 ++--- drivers/block/nbd.c | 4 +- drivers/block/paride/pcd.c | 3 +- drivers/block/paride/pd.c | 7 +-- drivers/block/paride/pf.c | 3 +- drivers/block/ps3disk.c | 4 +- drivers/block/sunvdc.c | 3 +- drivers/block/swim.c | 12 ++--- drivers/block/swim3.c | 3 +- drivers/block/sx8.c | 8 ++- drivers/block/ub.c | 8 +-- drivers/block/viodasd.c | 4 +- drivers/block/virtio_blk.c | 4 +- drivers/block/xd.c | 12 ++--- drivers/block/xen-blkfront.c | 4 +- drivers/block/xsysace.c | 10 ++-- drivers/block/z2ram.c | 12 ++--- drivers/cdrom/gdrom.c | 4 +- drivers/cdrom/viocd.c | 4 +- drivers/ide/ide-atapi.c | 2 +- drivers/ide/ide-io.c | 9 ++-- drivers/memstick/core/mspro_block.c | 9 ++-- drivers/message/i2o/i2o_block.c | 6 +-- drivers/mmc/card/queue.c | 11 ++-- drivers/mtd/mtd_blkdevs.c | 7 +-- drivers/s390/block/dasd.c | 16 ++---- drivers/s390/char/tape_block.c | 7 +-- drivers/sbus/char/jsflash.c | 12 ++--- drivers/scsi/scsi_lib.c | 10 ++-- drivers/scsi/scsi_transport_sas.c | 4 +- include/linux/blkdev.h | 9 +++- include/linux/elevator.h | 2 - 45 files changed, 172 insertions(+), 207 deletions(-) (limited to 'include') diff --git a/arch/arm/plat-omap/mailbox.c b/arch/arm/plat-omap/mailbox.c index 7a1f5c25fd1..40424edae93 100644 --- a/arch/arm/plat-omap/mailbox.c +++ b/arch/arm/plat-omap/mailbox.c @@ -197,9 +197,7 @@ static void mbox_tx_work(struct work_struct *work) struct omap_msg_tx_data *tx_data; spin_lock(q->queue_lock); - rq = elv_next_request(q); - if (rq) - blkdev_dequeue_request(rq); + rq = blk_fetch_request(q); spin_unlock(q->queue_lock); if (!rq) @@ -242,9 +240,7 @@ static void mbox_rx_work(struct work_struct *work) while (1) { spin_lock_irqsave(q->queue_lock, flags); - rq = elv_next_request(q); - if (rq) - blkdev_dequeue_request(rq); + rq = blk_fetch_request(q); spin_unlock_irqrestore(q->queue_lock, flags); if (!rq) break; @@ -351,9 +347,7 @@ omap_mbox_read(struct device *dev, struct device_attribute *attr, char *buf) while (1) { spin_lock_irqsave(q->queue_lock, flags); - rq = elv_next_request(q); - if (rq) - blkdev_dequeue_request(rq); + rq = blk_fetch_request(q); spin_unlock_irqrestore(q->queue_lock, flags); if (!rq) diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 402ba8f70fc..aa9e926e13d 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -1228,12 +1228,11 @@ static void do_ubd_request(struct request_queue *q) while(1){ struct ubd *dev = q->queuedata; if(dev->end_sg == 0){ - struct request *req = elv_next_request(q); + struct request *req = blk_fetch_request(q); if(req == NULL) return; dev->request = req; - blkdev_dequeue_request(req); dev->start_sg = 0; dev->end_sg = blk_rq_map_sg(q, req, dev->sg); } diff --git a/block/blk-barrier.c b/block/blk-barrier.c index 8713c2fbc4f..0ab81a0a750 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -180,7 +180,7 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp) } /* stash away the original request */ - elv_dequeue_request(q, rq); + blk_dequeue_request(rq); q->orig_bar_rq = rq; rq = NULL; @@ -248,7 +248,7 @@ bool blk_do_ordered(struct request_queue *q, struct request **rqp) * Queue ordering not supported. Terminate * with prejudice. */ - elv_dequeue_request(q, rq); + blk_dequeue_request(rq); __blk_end_request_all(rq, -EOPNOTSUPP); *rqp = NULL; return false; diff --git a/block/blk-core.c b/block/blk-core.c index 6226a380fb6..93691d2ac5a 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -902,6 +902,8 @@ EXPORT_SYMBOL(blk_get_request); */ void blk_requeue_request(struct request_queue *q, struct request *rq) { + BUG_ON(blk_queued_rq(rq)); + blk_delete_timer(rq); blk_clear_rq_complete(rq); trace_block_rq_requeue(q, rq); @@ -1610,28 +1612,6 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq) } EXPORT_SYMBOL_GPL(blk_insert_cloned_request); -/** - * blkdev_dequeue_request - dequeue request and start timeout timer - * @req: request to dequeue - * - * Dequeue @req and start timeout timer on it. This hands off the - * request to the driver. - * - * Block internal functions which don't want to start timer should - * call elv_dequeue_request(). - */ -void blkdev_dequeue_request(struct request *req) -{ - elv_dequeue_request(req->q, req); - - /* - * We are now handing the request to the hardware, add the - * timeout handler. - */ - blk_add_timer(req); -} -EXPORT_SYMBOL(blkdev_dequeue_request); - static void blk_account_io_completion(struct request *req, unsigned int bytes) { if (blk_do_io_stat(req)) { @@ -1671,7 +1651,23 @@ static void blk_account_io_done(struct request *req) } } -struct request *elv_next_request(struct request_queue *q) +/** + * blk_peek_request - peek at the top of a request queue + * @q: request queue to peek at + * + * Description: + * Return the request at the top of @q. The returned request + * should be started using blk_start_request() before LLD starts + * processing it. + * + * Return: + * Pointer to the request at the top of @q if available. Null + * otherwise. + * + * Context: + * queue_lock must be held. + */ +struct request *blk_peek_request(struct request_queue *q) { struct request *rq; int ret; @@ -1748,10 +1744,12 @@ struct request *elv_next_request(struct request_queue *q) return rq; } -EXPORT_SYMBOL(elv_next_request); +EXPORT_SYMBOL(blk_peek_request); -void elv_dequeue_request(struct request_queue *q, struct request *rq) +void blk_dequeue_request(struct request *rq) { + struct request_queue *q = rq->q; + BUG_ON(list_empty(&rq->queuelist)); BUG_ON(ELV_ON_HASH(rq)); @@ -1766,6 +1764,58 @@ void elv_dequeue_request(struct request_queue *q, struct request *rq) q->in_flight++; } +/** + * blk_start_request - start request processing on the driver + * @req: request to dequeue + * + * Description: + * Dequeue @req and start timeout timer on it. This hands off the + * request to the driver. + * + * Block internal functions which don't want to start timer should + * call blk_dequeue_request(). + * + * Context: + * queue_lock must be held. + */ +void blk_start_request(struct request *req) +{ + blk_dequeue_request(req); + + /* + * We are now handing the request to the hardware, add the + * timeout handler. + */ + blk_add_timer(req); +} +EXPORT_SYMBOL(blk_start_request); + +/** + * blk_fetch_request - fetch a request from a request queue + * @q: request queue to fetch a request from + * + * Description: + * Return the request at the top of @q. The request is started on + * return and LLD can start processing it immediately. + * + * Return: + * Pointer to the request at the top of @q if available. Null + * otherwise. + * + * Context: + * queue_lock must be held. + */ +struct request *blk_fetch_request(struct request_queue *q) +{ + struct request *rq; + + rq = blk_peek_request(q); + if (rq) + blk_start_request(rq); + return rq; +} +EXPORT_SYMBOL(blk_fetch_request); + /** * blk_update_request - Special helper function for request stacking drivers * @rq: the request being processed @@ -1937,12 +1987,11 @@ static bool blk_update_bidi_request(struct request *rq, int error, */ static void blk_finish_request(struct request *req, int error) { + BUG_ON(blk_queued_rq(req)); + if (blk_rq_tagged(req)) blk_queue_end_tag(req->q, req); - if (blk_queued_rq(req)) - elv_dequeue_request(req->q, req); - if (unlikely(laptop_mode) && blk_fs_request(req)) laptop_io_completion(); diff --git a/block/blk-tag.c b/block/blk-tag.c index 3c518e3303a..c260f7c30dd 100644 --- a/block/blk-tag.c +++ b/block/blk-tag.c @@ -374,7 +374,7 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq) rq->cmd_flags |= REQ_QUEUED; rq->tag = tag; bqt->tag_index[tag] = rq; - blkdev_dequeue_request(rq); + blk_start_request(rq); list_add(&rq->queuelist, &q->tag_busy_list); return 0; } diff --git a/block/blk.h b/block/blk.h index ab54529103c..9e0042ca949 100644 --- a/block/blk.h +++ b/block/blk.h @@ -13,6 +13,7 @@ extern struct kobj_type blk_queue_ktype; void init_request_from_bio(struct request *req, struct bio *bio); void blk_rq_bio_prep(struct request_queue *q, struct request *rq, struct bio *bio); +void blk_dequeue_request(struct request *rq); void __blk_queue_free_tags(struct request_queue *q); void blk_unplug_work(struct work_struct *work); diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index 774ab05973a..668dc234b8e 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -3321,7 +3321,7 @@ static int DAC960_process_queue(DAC960_Controller_T *Controller, struct request_ DAC960_Command_T *Command; while(1) { - Request = elv_next_request(req_q); + Request = blk_peek_request(req_q); if (!Request) return 1; @@ -3341,7 +3341,7 @@ static int DAC960_process_queue(DAC960_Controller_T *Controller, struct request_ Command->BlockNumber = blk_rq_pos(Request); Command->BlockCount = blk_rq_sectors(Request); Command->Request = Request; - blkdev_dequeue_request(Request); + blk_start_request(Request); Command->SegmentCount = blk_rq_map_sg(req_q, Command->Request, Command->cmd_sglist); /* pci_map_sg MAY change the value of SegCount */ diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 80a68b2e045..9c6e5b0fe89 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -1342,12 +1342,11 @@ static void redo_fd_request(void) int err; next_req: - rq = elv_next_request(floppy_queue); + rq = blk_fetch_request(floppy_queue); if (!rq) { /* Nothing left to do */ return; } - blkdev_dequeue_request(rq); floppy = rq->rq_disk->private_data; drive = floppy - unit; diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 89a591d9c83..f5e7180d7f4 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -1404,10 +1404,9 @@ static void redo_fd_request(void) repeat: if (!fd_request) { - fd_request = elv_next_request(floppy_queue); + fd_request = blk_fetch_request(floppy_queue); if (!fd_request) goto the_end; - blkdev_dequeue_request(fd_request); } floppy = fd_request->rq_disk->private_data; diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index ab7b04c0db7..e714e7cce6f 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -2801,7 +2801,7 @@ static void do_cciss_request(struct request_queue *q) goto startio; queue: - creq = elv_next_request(q); + creq = blk_peek_request(q); if (!creq) goto startio; @@ -2810,7 +2810,7 @@ static void do_cciss_request(struct request_queue *q) if ((c = cmd_alloc(h, 1)) == NULL) goto full; - blkdev_dequeue_request(creq); + blk_start_request(creq); spin_unlock_irq(q->queue_lock); diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index a5caeff4718..a02dcfc00f1 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -903,7 +903,7 @@ static void do_ida_request(struct request_queue *q) goto startio; queue_next: - creq = elv_next_request(q); + creq = blk_peek_request(q); if (!creq) goto startio; @@ -912,7 +912,7 @@ queue_next: if ((c = cmd_alloc(h,1)) == NULL) goto startio; - blkdev_dequeue_request(creq); + blk_start_request(creq); c->ctlr = h->ctlr; c->hdr.unit = (drv_info_t *)(creq->rq_disk->private_data) - h->drv; diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index e2c70d2085a..90877fee0ee 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -931,7 +931,7 @@ static inline void unlock_fdc(void) del_timer(&fd_timeout); cont = NULL; clear_bit(0, &fdc_busy); - if (current_req || elv_next_request(floppy_queue)) + if (current_req || blk_peek_request(floppy_queue)) do_fd_request(floppy_queue); spin_unlock_irqrestore(&floppy_lock, flags); wake_up(&fdc_wait); @@ -2912,9 +2912,7 @@ static void redo_fd_request(void) struct request *req; spin_lock_irq(floppy_queue->queue_lock); - req = elv_next_request(floppy_queue); - if (req) - blkdev_dequeue_request(req); + req = blk_fetch_request(floppy_queue); spin_unlock_irq(floppy_queue->queue_lock); if (!req) { do_floppy = NULL; diff --git a/drivers/block/hd.c b/drivers/block/hd.c index 288ab63c102..961de56d00a 100644 --- a/drivers/block/hd.c +++ b/drivers/block/hd.c @@ -592,12 +592,11 @@ repeat: del_timer(&device_timer); if (!hd_req) { - hd_req = elv_next_request(hd_queue); + hd_req = blk_fetch_request(hd_queue); if (!hd_req) { do_hd = NULL; return; } - blkdev_dequeue_request(hd_req); } req = hd_req; diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c index 1ca5d1423fa..c0cd0a03f69 100644 --- a/drivers/block/mg_disk.c +++ b/drivers/block/mg_disk.c @@ -671,10 +671,8 @@ static void mg_request_poll(struct request_queue *q) while (1) { if (!host->req) { - host->req = elv_next_request(q); - if (host->req) - blkdev_dequeue_request(host->req); - else + host->req = blk_fetch_request(q); + if (!host->req) break; } @@ -744,10 +742,8 @@ static void mg_request(struct request_queue *q) while (1) { if (!host->req) { - host->req = elv_next_request(q); - if (host->req) - blkdev_dequeue_request(host->req); - else + host->req = blk_fetch_request(q); + if (!host->req) break; } req = host->req; diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index fad167de23b..5d23ffad7c7 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -533,11 +533,9 @@ static void do_nbd_request(struct request_queue *q) { struct request *req; - while ((req = elv_next_request(q)) != NULL) { + while ((req = blk_fetch_request(q)) != NULL) { struct nbd_device *lo; - blkdev_dequeue_request(req); - spin_unlock_irq(q->queue_lock); dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%x)\n", diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index 425f81586a3..911dfd98d81 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -720,10 +720,9 @@ static void do_pcd_request(struct request_queue * q) return; while (1) { if (!pcd_req) { - pcd_req = elv_next_request(q); + pcd_req = blk_fetch_request(q); if (!pcd_req) return; - blkdev_dequeue_request(pcd_req); } if (rq_data_dir(pcd_req) == READ) { diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index d2ca3f55206..bf5955b3d87 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -412,11 +412,9 @@ static void run_fsm(void) spin_lock_irqsave(&pd_lock, saved_flags); if (!__blk_end_request_cur(pd_req, res == Ok ? 0 : -EIO)) { - pd_req = elv_next_request(pd_queue); + pd_req = blk_fetch_request(pd_queue); if (!pd_req) stop = 1; - else - blkdev_dequeue_request(pd_req); } spin_unlock_irqrestore(&pd_lock, saved_flags); if (stop) @@ -706,10 +704,9 @@ static void do_pd_request(struct request_queue * q) { if (pd_req) return; - pd_req = elv_next_request(q); + pd_req = blk_fetch_request(q); if (!pd_req) return; - blkdev_dequeue_request(pd_req); schedule_fsm(); } diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index d6f7bd84ed3..68a90834e99 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c @@ -762,10 +762,9 @@ static void do_pf_request(struct request_queue * q) return; repeat: if (!pf_req) { - pf_req = elv_next_request(q); + pf_req = blk_fetch_request(q); if (!pf_req) return; - blkdev_dequeue_request(pf_req); } pf_current = pf_req->rq_disk->private_data; diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index f4d8db944e7..338cee4cc0b 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -194,9 +194,7 @@ static void ps3disk_do_request(struct ps3_storage_device *dev, dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__); - while ((req = elv_next_request(q))) { - blkdev_dequeue_request(req); - + while ((req = blk_fetch_request(q))) { if (blk_fs_request(req)) { if (ps3disk_submit_request_sg(dev, req)) break; diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index 9f351bfa15e..cbfd9c0aef0 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -441,12 +441,11 @@ out: static void do_vdc_request(struct request_queue *q) { while (1) { - struct request *req = elv_next_request(q); + struct request *req = blk_fetch_request(q); if (!req) break; - blkdev_dequeue_request(req); if (__send_request(req) < 0) __blk_end_request_all(req, -EIO); } diff --git a/drivers/block/swim.c b/drivers/block/swim.c index dedd4893f5e..cf7877fb8a7 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -528,10 +528,7 @@ static void redo_fd_request(struct request_queue *q) struct request *req; struct floppy_state *fs; - req = elv_next_request(q); - if (req) - blkdev_dequeue_request(req); - + req = blk_fetch_request(q); while (req) { int err = -EIO; @@ -554,11 +551,8 @@ static void redo_fd_request(struct request_queue *q) break; } done: - if (!__blk_end_request_cur(req, err)) { - req = elv_next_request(q); - if (req) - blkdev_dequeue_request(req); - } + if (!__blk_end_request_cur(req, err)) + req = blk_fetch_request(q); } } diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index f48c6dd47e0..80df93e3cdd 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -326,10 +326,9 @@ static void start_request(struct floppy_state *fs) } while (fs->state == idle) { if (!fd_req) { - fd_req = elv_next_request(swim3_queue); + fd_req = blk_fetch_request(swim3_queue); if (!fd_req) break; - blkdev_dequeue_request(fd_req); } req = fd_req; #if 0 diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c index 087c94c8b2d..da403b6a7f4 100644 --- a/drivers/block/sx8.c +++ b/drivers/block/sx8.c @@ -810,12 +810,10 @@ static void carm_oob_rq_fn(struct request_queue *q) while (1) { DPRINTK("get req\n"); - rq = elv_next_request(q); + rq = blk_fetch_request(q); if (!rq) break; - blkdev_dequeue_request(rq); - crq = rq->special; assert(crq != NULL); assert(crq->rq == rq); @@ -846,7 +844,7 @@ static void carm_rq_fn(struct request_queue *q) queue_one_request: VPRINTK("get req\n"); - rq = elv_next_request(q); + rq = blk_peek_request(q); if (!rq) return; @@ -857,7 +855,7 @@ queue_one_request: } crq->rq = rq; - blkdev_dequeue_request(rq); + blk_start_request(rq); if (rq_data_dir(rq) == WRITE) { writing = 1; diff --git a/drivers/block/ub.c b/drivers/block/ub.c index 40d03cf63f2..178f459a50e 100644 --- a/drivers/block/ub.c +++ b/drivers/block/ub.c @@ -627,7 +627,7 @@ static void ub_request_fn(struct request_queue *q) struct ub_lun *lun = q->queuedata; struct request *rq; - while ((rq = elv_next_request(q)) != NULL) { + while ((rq = blk_peek_request(q)) != NULL) { if (ub_request_fn_1(lun, rq) != 0) { blk_stop_queue(q); break; @@ -643,13 +643,13 @@ static int ub_request_fn_1(struct ub_lun *lun, struct request *rq) int n_elem; if (atomic_read(&sc->poison)) { - blkdev_dequeue_request(rq); + blk_start_request(rq); ub_end_rq(rq, DID_NO_CONNECT << 16, blk_rq_bytes(rq)); return 0; } if (lun->changed && !blk_pc_request(rq)) { - blkdev_dequeue_request(rq); + blk_start_request(rq); ub_end_rq(rq, SAM_STAT_CHECK_CONDITION, blk_rq_bytes(rq)); return 0; } @@ -660,7 +660,7 @@ static int ub_request_fn_1(struct ub_lun *lun, struct request *rq) return -1; memset(cmd, 0, sizeof(struct ub_scsi_cmd)); - blkdev_dequeue_request(rq); + blk_start_request(rq); urq = &lun->urq; memset(urq, 0, sizeof(struct ub_request)); diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c index 2086cb12d3e..390d69bb7c4 100644 --- a/drivers/block/viodasd.c +++ b/drivers/block/viodasd.c @@ -361,11 +361,9 @@ static void do_viodasd_request(struct request_queue *q) * back later. */ while (num_req_outstanding < VIOMAXREQ) { - req = elv_next_request(q); + req = blk_fetch_request(q); if (req == NULL) return; - /* dequeue the current request from the queue */ - blkdev_dequeue_request(req); /* check that request contains a valid command */ if (!blk_fs_request(req)) { viodasd_end_request(req, -EIO, blk_rq_sectors(req)); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 1980ab45635..29a9daf4862 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -128,7 +128,7 @@ static void do_virtblk_request(struct request_queue *q) struct request *req; unsigned int issued = 0; - while ((req = elv_next_request(q)) != NULL) { + while ((req = blk_peek_request(q)) != NULL) { vblk = req->rq_disk->private_data; BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems); @@ -138,7 +138,7 @@ static void do_virtblk_request(struct request_queue *q) blk_stop_queue(q); break; } - blkdev_dequeue_request(req); + blk_start_request(req); issued++; } diff --git a/drivers/block/xd.c b/drivers/block/xd.c index d4c4352354b..ce242921992 100644 --- a/drivers/block/xd.c +++ b/drivers/block/xd.c @@ -305,10 +305,7 @@ static void do_xd_request (struct request_queue * q) if (xdc_busy) return; - req = elv_next_request(q); - if (req) - blkdev_dequeue_request(req); - + req = blk_fetch_request(q); while (req) { unsigned block = blk_rq_pos(req); unsigned count = blk_rq_cur_sectors(req); @@ -325,11 +322,8 @@ static void do_xd_request (struct request_queue * q) block, count); done: /* wrap up, 0 = success, -errno = fail */ - if (!__blk_end_request_cur(req, res)) { - req = elv_next_request(q); - if (req) - blkdev_dequeue_request(req); - } + if (!__blk_end_request_cur(req, res)) + req = blk_fetch_request(q); } } diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 66f834571b8..6d4ac76c280 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -299,13 +299,13 @@ static void do_blkif_request(struct request_queue *rq) queued = 0; - while ((req = elv_next_request(rq)) != NULL) { + while ((req = blk_peek_request(rq)) != NULL) { info = req->rq_disk->private_data; if (RING_FULL(&info->ring)) goto wait; - blkdev_dequeue_request(req); + blk_start_request(req); if (!blk_fs_request(req)) { __blk_end_request_all(req, -EIO); diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c index edf137b6c37..3a4397edab7 100644 --- a/drivers/block/xsysace.c +++ b/drivers/block/xsysace.c @@ -463,10 +463,10 @@ struct request *ace_get_next_request(struct request_queue * q) { struct request *req; - while ((req = elv_next_request(q)) != NULL) { + while ((req = blk_peek_request(q)) != NULL) { if (blk_fs_request(req)) break; - blkdev_dequeue_request(req); + blk_start_request(req); __blk_end_request_all(req, -EIO); } return req; @@ -498,10 +498,8 @@ static void ace_fsm_dostate(struct ace_device *ace) __blk_end_request_all(ace->req, -EIO); ace->req = NULL; } - while ((req = elv_next_request(ace->queue)) != NULL) { - blkdev_dequeue_request(req); + while ((req = blk_fetch_request(ace->queue)) != NULL) __blk_end_request_all(req, -EIO); - } /* Drop back to IDLE state and notify waiters */ ace->fsm_state = ACE_FSM_STATE_IDLE; @@ -649,7 +647,7 @@ static void ace_fsm_dostate(struct ace_device *ace) ace->fsm_state = ACE_FSM_STATE_IDLE; break; } - blkdev_dequeue_request(req); + blk_start_request(req); /* Okay, it's a data request, set it up for transfer */ dev_dbg(ace->dev, diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c index c909c1a3f65..4575171e5be 100644 --- a/drivers/block/z2ram.c +++ b/drivers/block/z2ram.c @@ -71,10 +71,7 @@ static void do_z2_request(struct request_queue *q) { struct request *req; - req = elv_next_request(q); - if (req) - blkdev_dequeue_request(req); - + req = blk_fetch_request(q); while (req) { unsigned long start = blk_rq_pos(req) << 9; unsigned long len = blk_rq_cur_bytes(req); @@ -100,11 +97,8 @@ static void do_z2_request(struct request_queue *q) len -= size; } done: - if (!__blk_end_request_cur(req, err)) { - req = elv_next_request(q); - if (req) - blkdev_dequeue_request(req); - } + if (!__blk_end_request_cur(req, err)) + req = blk_fetch_request(q); } } diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index 3cc02bfe828..1e366ad8f68 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -642,9 +642,7 @@ static void gdrom_request(struct request_queue *rq) { struct request *req; - while ((req = elv_next_request(rq)) != NULL) { - blkdev_dequeue_request(req); - + while ((req = blk_fetch_request(rq)) != NULL) { if (!blk_fs_request(req)) { printk(KERN_DEBUG "GDROM: Non-fs request ignored\n"); __blk_end_request_all(req, -EIO); diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c index bbe9f086734..ca741c21e4a 100644 --- a/drivers/cdrom/viocd.c +++ b/drivers/cdrom/viocd.c @@ -297,9 +297,7 @@ static void do_viocd_request(struct request_queue *q) { struct request *req; - while ((rwreq == 0) && ((req = elv_next_request(q)) != NULL)) { - blkdev_dequeue_request(req); - + while ((rwreq == 0) && ((req = blk_fetch_request(q)) != NULL)) { if (!blk_fs_request(req)) __blk_end_request_all(req, -EIO); else if (send_request(req) < 0) { diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index 2874c3d703a..8a894fa37b5 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -269,7 +269,7 @@ void ide_retry_pc(ide_drive_t *drive) blk_requeue_request(failed_rq->q, failed_rq); drive->hwif->rq = NULL; if (ide_queue_sense_rq(drive, pc)) { - blkdev_dequeue_request(failed_rq); + blk_start_request(failed_rq); ide_complete_rq(drive, -EIO, blk_rq_bytes(failed_rq)); } } diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index abda7337b3f..e4e3a0e3201 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -519,11 +519,8 @@ repeat: * we know that the queue isn't empty, but this can happen * if the q->prep_rq_fn() decides to kill a request */ - if (!rq) { - rq = elv_next_request(drive->queue); - if (rq) - blkdev_dequeue_request(rq); - } + if (!rq) + rq = blk_fetch_request(drive->queue); spin_unlock_irq(q->queue_lock); spin_lock_irq(&hwif->lock); @@ -536,7 +533,7 @@ repeat: /* * Sanity: don't accept a request that isn't a PM request * if we are currently power managed. This is very important as - * blk_stop_queue() doesn't prevent the elv_next_request() + * blk_stop_queue() doesn't prevent the blk_fetch_request() * above to return us whatever is in the queue. Since we call * ide_do_request() ourselves, we end up taking requests while * the queue is blocked... diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index 58f5be8cd69..c0bebc6a2f2 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -704,13 +704,12 @@ try_again: return 0; } - dev_dbg(&card->dev, "elv_next\n"); - msb->block_req = elv_next_request(msb->queue); + dev_dbg(&card->dev, "blk_fetch\n"); + msb->block_req = blk_fetch_request(msb->queue); if (!msb->block_req) { dev_dbg(&card->dev, "issue end\n"); return -EAGAIN; } - blkdev_dequeue_request(msb->block_req); dev_dbg(&card->dev, "trying again\n"); chunk = 1; @@ -825,10 +824,8 @@ static void mspro_block_submit_req(struct request_queue *q) return; if (msb->eject) { - while ((req = elv_next_request(q)) != NULL) { - blkdev_dequeue_request(req); + while ((req = blk_fetch_request(q)) != NULL) __blk_end_request_all(req, -ENODEV); - } return; } diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c index 8b5cbfc3ba9..6573ef4408f 100644 --- a/drivers/message/i2o/i2o_block.c +++ b/drivers/message/i2o/i2o_block.c @@ -877,7 +877,7 @@ static void i2o_block_request_fn(struct request_queue *q) struct request *req; while (!blk_queue_plugged(q)) { - req = elv_next_request(q); + req = blk_peek_request(q); if (!req) break; @@ -890,7 +890,7 @@ static void i2o_block_request_fn(struct request_queue *q) if (queue_depth < I2O_BLOCK_MAX_OPEN_REQUESTS) { if (!i2o_block_transfer(req)) { - blkdev_dequeue_request(req); + blk_start_request(req); continue; } else osm_info("transfer error\n"); @@ -917,7 +917,7 @@ static void i2o_block_request_fn(struct request_queue *q) break; } } else { - blkdev_dequeue_request(req); + blk_start_request(req); __blk_end_request_all(req, -EIO); } } diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c index 4b70f1e2834..49e582356c6 100644 --- a/drivers/mmc/card/queue.c +++ b/drivers/mmc/card/queue.c @@ -54,11 +54,8 @@ static int mmc_queue_thread(void *d) spin_lock_irq(q->queue_lock); set_current_state(TASK_INTERRUPTIBLE); - if (!blk_queue_plugged(q)) { - req = elv_next_request(q); - if (req) - blkdev_dequeue_request(req); - } + if (!blk_queue_plugged(q)) + req = blk_fetch_request(q); mq->req = req; spin_unlock_irq(q->queue_lock); @@ -94,10 +91,8 @@ static void mmc_request(struct request_queue *q) if (!mq) { printk(KERN_ERR "MMC: killing requests for dead queue\n"); - while ((req = elv_next_request(q)) != NULL) { - blkdev_dequeue_request(req); + while ((req = blk_fetch_request(q)) != NULL) __blk_end_request_all(req, -EIO); - } return; } diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 3e10442615d..502622f628b 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -100,12 +100,7 @@ static int mtd_blktrans_thread(void *arg) struct mtd_blktrans_dev *dev; int res; - if (!req) { - req = elv_next_request(rq); - if (req) - blkdev_dequeue_request(req); - } - if (!req) { + if (!req && !(req = blk_fetch_request(rq))) { set_current_state(TASK_INTERRUPTIBLE); spin_unlock_irq(rq->queue_lock); schedule(); diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 7df03c7aea0..e64f62d5e0f 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1656,17 +1656,13 @@ static void __dasd_process_request_queue(struct dasd_block *block) if (basedev->state < DASD_STATE_READY) return; /* Now we try to fetch requests from the request queue */ - while (!blk_queue_plugged(queue) && - elv_next_request(queue)) { - - req = elv_next_request(queue); - + while (!blk_queue_plugged(queue) && (req = blk_peek_request(queue))) { if (basedev->features & DASD_FEATURE_READONLY && rq_data_dir(req) == WRITE) { DBF_DEV_EVENT(DBF_ERR, basedev, "Rejecting write request %p", req); - blkdev_dequeue_request(req); + blk_start_request(req); __blk_end_request_all(req, -EIO); continue; } @@ -1695,7 +1691,7 @@ static void __dasd_process_request_queue(struct dasd_block *block) "CCW creation failed (rc=%ld) " "on request %p", PTR_ERR(cqr), req); - blkdev_dequeue_request(req); + blk_start_request(req); __blk_end_request_all(req, -EIO); continue; } @@ -1705,7 +1701,7 @@ static void __dasd_process_request_queue(struct dasd_block *block) */ cqr->callback_data = (void *) req; cqr->status = DASD_CQR_FILLED; - blkdev_dequeue_request(req); + blk_start_request(req); list_add_tail(&cqr->blocklist, &block->ccw_queue); dasd_profile_start(block, cqr, req); } @@ -2029,10 +2025,8 @@ static void dasd_flush_request_queue(struct dasd_block *block) return; spin_lock_irq(&block->request_queue_lock); - while ((req = elv_next_request(block->request_queue))) { - blkdev_dequeue_request(req); + while ((req = blk_fetch_request(block->request_queue))) __blk_end_request_all(req, -EIO); - } spin_unlock_irq(&block->request_queue_lock); } diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c index 5d035e4939d..1e796767598 100644 --- a/drivers/s390/char/tape_block.c +++ b/drivers/s390/char/tape_block.c @@ -93,7 +93,7 @@ __tapeblock_end_request(struct tape_request *ccw_req, void *data) device->blk_data.block_position = -1; device->discipline->free_bread(ccw_req); if (!list_empty(&device->req_queue) || - elv_next_request(device->blk_data.request_queue)) + blk_peek_request(device->blk_data.request_queue)) tapeblock_trigger_requeue(device); } @@ -162,19 +162,16 @@ tapeblock_requeue(struct work_struct *work) { spin_lock_irq(&device->blk_data.request_queue_lock); while ( !blk_queue_plugged(queue) && - elv_next_request(queue) && + (req = blk_fetch_request(queue)) && nr_queued < TAPEBLOCK_MIN_REQUEUE ) { - req = elv_next_request(queue); if (rq_data_dir(req) == WRITE) { DBF_EVENT(1, "TBLOCK: Rejecting write request\n"); - blkdev_dequeue_request(req); spin_unlock_irq(&device->blk_data.request_queue_lock); blk_end_request_all(req, -EIO); spin_lock_irq(&device->blk_data.request_queue_lock); continue; } - blkdev_dequeue_request(req); nr_queued++; spin_unlock_irq(&device->blk_data.request_queue_lock); rc = tapeblock_start_request(device, req); diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c index f572a4a1d14..6d465168468 100644 --- a/drivers/sbus/char/jsflash.c +++ b/drivers/sbus/char/jsflash.c @@ -186,10 +186,7 @@ static void jsfd_do_request(struct request_queue *q) { struct request *req; - req = elv_next_request(q); - if (req) - blkdev_dequeue_request(req); - + req = blk_fetch_request(q); while (req) { struct jsfd_part *jdp = req->rq_disk->private_data; unsigned long offset = blk_rq_pos(req) << 9; @@ -212,11 +209,8 @@ static void jsfd_do_request(struct request_queue *q) jsfd_read(req->buffer, jdp->dbase + offset, len); err = 0; end: - if (!__blk_end_request_cur(req, err)) { - req = elv_next_request(q); - if (req) - blkdev_dequeue_request(req); - } + if (!__blk_end_request_cur(req, err)) + req = blk_fetch_request(q); } } diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index ee308f6f798..b12750f8216 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1207,7 +1207,7 @@ int scsi_prep_return(struct request_queue *q, struct request *req, int ret) break; case BLKPREP_DEFER: /* - * If we defer, the elv_next_request() returns NULL, but the + * If we defer, the blk_peek_request() returns NULL, but the * queue must be restarted, so we plug here if no returning * command will automatically do that. */ @@ -1385,7 +1385,7 @@ static void scsi_kill_request(struct request *req, struct request_queue *q) struct scsi_target *starget = scsi_target(sdev); struct Scsi_Host *shost = sdev->host; - blkdev_dequeue_request(req); + blk_start_request(req); if (unlikely(cmd == NULL)) { printk(KERN_CRIT "impossible request in %s.\n", @@ -1477,7 +1477,7 @@ static void scsi_request_fn(struct request_queue *q) if (!sdev) { printk("scsi: killing requests for dead queue\n"); - while ((req = elv_next_request(q)) != NULL) + while ((req = blk_peek_request(q)) != NULL) scsi_kill_request(req, q); return; } @@ -1498,7 +1498,7 @@ static void scsi_request_fn(struct request_queue *q) * that the request is fully prepared even if we cannot * accept it. */ - req = elv_next_request(q); + req = blk_peek_request(q); if (!req || !scsi_dev_queue_ready(q, sdev)) break; @@ -1514,7 +1514,7 @@ static void scsi_request_fn(struct request_queue *q) * Remove the request from the request list. */ if (!(blk_queue_tagged(q) && !blk_queue_start_tag(q, req))) - blkdev_dequeue_request(req); + blk_start_request(req); sdev->device_busy++; spin_unlock(q->queue_lock); diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index 50988cbf7b2..d606452297c 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -163,12 +163,10 @@ static void sas_smp_request(struct request_queue *q, struct Scsi_Host *shost, int (*handler)(struct Scsi_Host *, struct sas_rphy *, struct request *); while (!blk_queue_plugged(q)) { - req = elv_next_request(q); + req = blk_fetch_request(q); if (!req) break; - blkdev_dequeue_request(req); - spin_unlock_irq(q->queue_lock); handler = to_sas_internal(shost->transportt)->f->smp_handler; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c7558034570..6e59d3b92ff 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -818,8 +818,6 @@ static inline void blk_run_address_space(struct address_space *mapping) blk_run_backing_dev(mapping->backing_dev_info, NULL); } -extern void blkdev_dequeue_request(struct request *req); - /* * blk_rq_pos() : the current sector * blk_rq_bytes() : bytes left in the entire request @@ -852,6 +850,13 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq) return blk_rq_cur_bytes(rq) >> 9; } +/* + * Request issue related functions. + */ +extern struct request *blk_peek_request(struct request_queue *q); +extern void blk_start_request(struct request *rq); +extern struct request *blk_fetch_request(struct request_queue *q); + /* * Request completion related functions. * diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 4e462878c9c..1cb3372e65d 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -103,10 +103,8 @@ extern int elv_merge(struct request_queue *, struct request **, struct bio *); extern void elv_merge_requests(struct request_queue *, struct request *, struct request *); extern void elv_merged_request(struct request_queue *, struct request *, int); -extern void elv_dequeue_request(struct request_queue *, struct request *); extern void elv_requeue_request(struct request_queue *, struct request *); extern int elv_queue_empty(struct request_queue *); -extern struct request *elv_next_request(struct request_queue *q); extern struct request *elv_former_request(struct request_queue *, struct request *); extern struct request *elv_latter_request(struct request_queue *, struct request *); extern int elv_register_queue(struct request_queue *q); -- cgit v1.2.3-70-g09d2 From 1822952ba2b9f22f79019d07ebbeca31dc14b718 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 11 May 2009 17:56:07 +0900 Subject: block: let blk_end_request_all handle bidi requests blk_end_request_all() and __blk_end_request_all() should finish all bytes including bidi, by definition. That's what all bidi users need , bidi requests must be complete as a whole (partial completion is impossible). Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6e59d3b92ff..1069f4483c6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -910,8 +910,12 @@ static inline bool blk_end_request(struct request *rq, int error, static inline void blk_end_request_all(struct request *rq, int error) { bool pending; + unsigned int bidi_bytes = 0; - pending = blk_end_request(rq, error, blk_rq_bytes(rq)); + if (unlikely(blk_bidi_rq(rq))) + bidi_bytes = blk_rq_bytes(rq->next_rq); + + pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); BUG_ON(pending); } @@ -962,8 +966,12 @@ static inline bool __blk_end_request(struct request *rq, int error, static inline void __blk_end_request_all(struct request *rq, int error) { bool pending; + unsigned int bidi_bytes = 0; + + if (unlikely(blk_bidi_rq(rq))) + bidi_bytes = blk_rq_bytes(rq->next_rq); - pending = __blk_end_request(rq, error, blk_rq_bytes(rq)); + pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); BUG_ON(pending); } -- cgit v1.2.3-70-g09d2 From b1f744937f1be3e6d3009382a755679133cf782d Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 11 May 2009 17:56:09 +0900 Subject: block: move completion related functions back to blk-core.c Let's put the completion related functions back to block/blk-core.c where they have lived. We can also unexport blk_end_bidi_request() and __blk_end_bidi_request(), which nobody uses. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- block/blk-core.c | 128 ++++++++++++++++++++++++++++++++++++++++++++++--- include/linux/blkdev.h | 128 ++++--------------------------------------------- 2 files changed, 130 insertions(+), 126 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index 93691d2ac5a..a2d97de1a12 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2026,8 +2026,8 @@ static void blk_finish_request(struct request *req, int error) * %false - we are done with this request * %true - still buffers pending for this request **/ -bool blk_end_bidi_request(struct request *rq, int error, - unsigned int nr_bytes, unsigned int bidi_bytes) +static bool blk_end_bidi_request(struct request *rq, int error, + unsigned int nr_bytes, unsigned int bidi_bytes) { struct request_queue *q = rq->q; unsigned long flags; @@ -2041,7 +2041,6 @@ bool blk_end_bidi_request(struct request *rq, int error, return false; } -EXPORT_SYMBOL_GPL(blk_end_bidi_request); /** * __blk_end_bidi_request - Complete a bidi request with queue lock held @@ -2058,8 +2057,8 @@ EXPORT_SYMBOL_GPL(blk_end_bidi_request); * %false - we are done with this request * %true - still buffers pending for this request **/ -bool __blk_end_bidi_request(struct request *rq, int error, - unsigned int nr_bytes, unsigned int bidi_bytes) +static bool __blk_end_bidi_request(struct request *rq, int error, + unsigned int nr_bytes, unsigned int bidi_bytes) { if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) return true; @@ -2068,7 +2067,124 @@ bool __blk_end_bidi_request(struct request *rq, int error, return false; } -EXPORT_SYMBOL_GPL(__blk_end_bidi_request); + +/** + * blk_end_request - Helper function for drivers to complete the request. + * @rq: the request being processed + * @error: %0 for success, < %0 for error + * @nr_bytes: number of bytes to complete + * + * Description: + * Ends I/O on a number of bytes attached to @rq. + * If @rq has leftover, sets it up for the next range of segments. + * + * Return: + * %false - we are done with this request + * %true - still buffers pending for this request + **/ +bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes) +{ + return blk_end_bidi_request(rq, error, nr_bytes, 0); +} +EXPORT_SYMBOL_GPL(blk_end_request); + +/** + * blk_end_request_all - Helper function for drives to finish the request. + * @rq: the request to finish + * @err: %0 for success, < %0 for error + * + * Description: + * Completely finish @rq. + */ +void blk_end_request_all(struct request *rq, int error) +{ + bool pending; + unsigned int bidi_bytes = 0; + + if (unlikely(blk_bidi_rq(rq))) + bidi_bytes = blk_rq_bytes(rq->next_rq); + + pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); + BUG_ON(pending); +} +EXPORT_SYMBOL_GPL(blk_end_request_all); + +/** + * blk_end_request_cur - Helper function to finish the current request chunk. + * @rq: the request to finish the current chunk for + * @err: %0 for success, < %0 for error + * + * Description: + * Complete the current consecutively mapped chunk from @rq. + * + * Return: + * %false - we are done with this request + * %true - still buffers pending for this request + */ +bool blk_end_request_cur(struct request *rq, int error) +{ + return blk_end_request(rq, error, blk_rq_cur_bytes(rq)); +} +EXPORT_SYMBOL_GPL(blk_end_request_cur); + +/** + * __blk_end_request - Helper function for drivers to complete the request. + * @rq: the request being processed + * @error: %0 for success, < %0 for error + * @nr_bytes: number of bytes to complete + * + * Description: + * Must be called with queue lock held unlike blk_end_request(). + * + * Return: + * %false - we are done with this request + * %true - still buffers pending for this request + **/ +bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes) +{ + return __blk_end_bidi_request(rq, error, nr_bytes, 0); +} +EXPORT_SYMBOL_GPL(__blk_end_request); + +/** + * __blk_end_request_all - Helper function for drives to finish the request. + * @rq: the request to finish + * @err: %0 for success, < %0 for error + * + * Description: + * Completely finish @rq. Must be called with queue lock held. + */ +void __blk_end_request_all(struct request *rq, int error) +{ + bool pending; + unsigned int bidi_bytes = 0; + + if (unlikely(blk_bidi_rq(rq))) + bidi_bytes = blk_rq_bytes(rq->next_rq); + + pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); + BUG_ON(pending); +} +EXPORT_SYMBOL_GPL(__blk_end_request_all); + +/** + * __blk_end_request_cur - Helper function to finish the current request chunk. + * @rq: the request to finish the current chunk for + * @err: %0 for success, < %0 for error + * + * Description: + * Complete the current consecutively mapped chunk from @rq. Must + * be called with queue lock held. + * + * Return: + * %false - we are done with this request + * %true - still buffers pending for this request + */ +bool __blk_end_request_cur(struct request *rq, int error) +{ + return __blk_end_request(rq, error, blk_rq_cur_bytes(rq)); +} +EXPORT_SYMBOL_GPL(__blk_end_request_cur); void blk_rq_bio_prep(struct request_queue *q, struct request *rq, struct bio *bio) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1069f4483c6..f9d60a78c08 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -872,126 +872,14 @@ extern struct request *blk_fetch_request(struct request_queue *q); */ extern bool blk_update_request(struct request *rq, int error, unsigned int nr_bytes); -extern bool blk_end_bidi_request(struct request *rq, int error, - unsigned int nr_bytes, - unsigned int bidi_bytes); -extern bool __blk_end_bidi_request(struct request *rq, int error, - unsigned int nr_bytes, - unsigned int bidi_bytes); - -/** - * blk_end_request - Helper function for drivers to complete the request. - * @rq: the request being processed - * @error: %0 for success, < %0 for error - * @nr_bytes: number of bytes to complete - * - * Description: - * Ends I/O on a number of bytes attached to @rq. - * If @rq has leftover, sets it up for the next range of segments. - * - * Return: - * %false - we are done with this request - * %true - still buffers pending for this request - **/ -static inline bool blk_end_request(struct request *rq, int error, - unsigned int nr_bytes) -{ - return blk_end_bidi_request(rq, error, nr_bytes, 0); -} - -/** - * blk_end_request_all - Helper function for drives to finish the request. - * @rq: the request to finish - * @err: %0 for success, < %0 for error - * - * Description: - * Completely finish @rq. - */ -static inline void blk_end_request_all(struct request *rq, int error) -{ - bool pending; - unsigned int bidi_bytes = 0; - - if (unlikely(blk_bidi_rq(rq))) - bidi_bytes = blk_rq_bytes(rq->next_rq); - - pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); - BUG_ON(pending); -} - -/** - * blk_end_request_cur - Helper function to finish the current request chunk. - * @rq: the request to finish the current chunk for - * @err: %0 for success, < %0 for error - * - * Description: - * Complete the current consecutively mapped chunk from @rq. - * - * Return: - * %false - we are done with this request - * %true - still buffers pending for this request - */ -static inline bool blk_end_request_cur(struct request *rq, int error) -{ - return blk_end_request(rq, error, blk_rq_cur_bytes(rq)); -} - -/** - * __blk_end_request - Helper function for drivers to complete the request. - * @rq: the request being processed - * @error: %0 for success, < %0 for error - * @nr_bytes: number of bytes to complete - * - * Description: - * Must be called with queue lock held unlike blk_end_request(). - * - * Return: - * %false - we are done with this request - * %true - still buffers pending for this request - **/ -static inline bool __blk_end_request(struct request *rq, int error, - unsigned int nr_bytes) -{ - return __blk_end_bidi_request(rq, error, nr_bytes, 0); -} - -/** - * __blk_end_request_all - Helper function for drives to finish the request. - * @rq: the request to finish - * @err: %0 for success, < %0 for error - * - * Description: - * Completely finish @rq. Must be called with queue lock held. - */ -static inline void __blk_end_request_all(struct request *rq, int error) -{ - bool pending; - unsigned int bidi_bytes = 0; - - if (unlikely(blk_bidi_rq(rq))) - bidi_bytes = blk_rq_bytes(rq->next_rq); - - pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); - BUG_ON(pending); -} - -/** - * __blk_end_request_cur - Helper function to finish the current request chunk. - * @rq: the request to finish the current chunk for - * @err: %0 for success, < %0 for error - * - * Description: - * Complete the current consecutively mapped chunk from @rq. Must - * be called with queue lock held. - * - * Return: - * %false - we are done with this request - * %true - still buffers pending for this request - */ -static inline bool __blk_end_request_cur(struct request *rq, int error) -{ - return __blk_end_request(rq, error, blk_rq_cur_bytes(rq)); -} +extern bool blk_end_request(struct request *rq, int error, + unsigned int nr_bytes); +extern void blk_end_request_all(struct request *rq, int error); +extern bool blk_end_request_cur(struct request *rq, int error); +extern bool __blk_end_request(struct request *rq, int error, + unsigned int nr_bytes); +extern void __blk_end_request_all(struct request *rq, int error); +extern bool __blk_end_request_cur(struct request *rq, int error); extern void blk_complete_request(struct request *); extern void __blk_complete_request(struct request *); -- cgit v1.2.3-70-g09d2 From 79c5d3ce614d8fe706545c7bca2158b63db6bb5e Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 11 May 2009 15:06:46 +0800 Subject: blktrace: from-sector redundant in trace_block_remap, cleanup The last argument of block_remap prober is the original sector before remap, so it should be 'from', not 'to'. [ Impact: clean up ] Signed-off-by: Li Zefan Cc: "Alan D. Brunelle" Cc: Jens Axboe Cc: Arnaldo Carvalho de Melo Cc: KOSAKI Motohiro LKML-Reference: <4A07CE86.5090301@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/block.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/block.h b/include/trace/block.h index 8ac945b7746..5b12efa096b 100644 --- a/include/trace/block.h +++ b/include/trace/block.h @@ -70,7 +70,7 @@ DECLARE_TRACE(block_split, DECLARE_TRACE(block_remap, TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, - sector_t to), - TP_ARGS(q, bio, dev, to)); + sector_t from), + TP_ARGS(q, bio, dev, from)); #endif -- cgit v1.2.3-70-g09d2 From 6818173bd658439b83896a2a7586f64ab51bf29c Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 7 May 2009 15:37:36 +0200 Subject: splice: implement default splice_read method If f_op->splice_read() is not implemented, fall back to a plain read. Use vfs_readv() to read into previously allocated pages. This will allow splice and functions using splice, such as the loop device, to work on all filesystems. This includes "direct_io" files in fuse which bypass the page cache. Signed-off-by: Miklos Szeredi Signed-off-by: Jens Axboe --- drivers/block/loop.c | 11 +---- fs/coda/file.c | 9 ++-- fs/pipe.c | 14 ++++++ fs/read_write.c | 7 +-- fs/splice.c | 120 ++++++++++++++++++++++++++++++++++++++++++++-- include/linux/fs.h | 2 + include/linux/pipe_fs_i.h | 1 + 7 files changed, 140 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 9ca4bb01465..801f4ab8330 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -709,10 +709,6 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) goto out_putf; - /* new backing store needs to support loop (eg splice_read) */ - if (!inode->i_fop->splice_read) - goto out_putf; - /* size of the new backing store needs to be the same */ if (get_loop_size(lo, file) != get_loop_size(lo, old_file)) goto out_putf; @@ -788,12 +784,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, error = -EINVAL; if (S_ISREG(inode->i_mode) || S_ISBLK(inode->i_mode)) { const struct address_space_operations *aops = mapping->a_ops; - /* - * If we can't read - sorry. If we only can't write - well, - * it's going to be read-only. - */ - if (!file->f_op->splice_read) - goto out_putf; + if (aops->write_begin) lo_flags |= LO_FLAGS_USE_AOPS; if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write) diff --git a/fs/coda/file.c b/fs/coda/file.c index 6a347fbc998..ffd42815fda 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -47,6 +47,8 @@ coda_file_splice_read(struct file *coda_file, loff_t *ppos, struct pipe_inode_info *pipe, size_t count, unsigned int flags) { + ssize_t (*splice_read)(struct file *, loff_t *, + struct pipe_inode_info *, size_t, unsigned int); struct coda_file_info *cfi; struct file *host_file; @@ -54,10 +56,11 @@ coda_file_splice_read(struct file *coda_file, loff_t *ppos, BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); host_file = cfi->cfi_container; - if (!host_file->f_op || !host_file->f_op->splice_read) - return -EINVAL; + splice_read = host_file->f_op->splice_read; + if (!splice_read) + splice_read = default_file_splice_read; - return host_file->f_op->splice_read(host_file, ppos, pipe, count,flags); + return splice_read(host_file, ppos, pipe, count, flags); } static ssize_t diff --git a/fs/pipe.c b/fs/pipe.c index 13414ec45b8..f7dd21ad85a 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -302,6 +302,20 @@ int generic_pipe_buf_confirm(struct pipe_inode_info *info, return 0; } +/** + * generic_pipe_buf_release - put a reference to a &struct pipe_buffer + * @pipe: the pipe that the buffer belongs to + * @buf: the buffer to put a reference to + * + * Description: + * This function releases a reference to @buf. + */ +void generic_pipe_buf_release(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ + page_cache_release(buf->page); +} + static const struct pipe_buf_operations anon_pipe_buf_ops = { .can_merge = 1, .map = generic_pipe_buf_map, diff --git a/fs/read_write.c b/fs/read_write.c index 9d1e76bb9ee..6c8c55dec2b 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -805,12 +805,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, goto out; if (!(in_file->f_mode & FMODE_READ)) goto fput_in; - retval = -EINVAL; - in_inode = in_file->f_path.dentry->d_inode; - if (!in_inode) - goto fput_in; - if (!in_file->f_op || !in_file->f_op->splice_read) - goto fput_in; retval = -ESPIPE; if (!ppos) ppos = &in_file->f_pos; @@ -834,6 +828,7 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, retval = -EINVAL; if (!out_file->f_op || !out_file->f_op->sendpage) goto fput_out; + in_inode = in_file->f_path.dentry->d_inode; out_inode = out_file->f_path.dentry->d_inode; retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); if (retval < 0) diff --git a/fs/splice.c b/fs/splice.c index e405cf552f5..3bd9cb21b38 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -507,9 +507,116 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, return ret; } - EXPORT_SYMBOL(generic_file_splice_read); +static const struct pipe_buf_operations default_pipe_buf_ops = { + .can_merge = 0, + .map = generic_pipe_buf_map, + .unmap = generic_pipe_buf_unmap, + .confirm = generic_pipe_buf_confirm, + .release = generic_pipe_buf_release, + .steal = generic_pipe_buf_steal, + .get = generic_pipe_buf_get, +}; + +static ssize_t kernel_readv(struct file *file, const struct iovec *vec, + unsigned long vlen, loff_t offset) +{ + mm_segment_t old_fs; + loff_t pos = offset; + ssize_t res; + + old_fs = get_fs(); + set_fs(get_ds()); + /* The cast to a user pointer is valid due to the set_fs() */ + res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos); + set_fs(old_fs); + + return res; +} + +ssize_t default_file_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags) +{ + unsigned int nr_pages; + unsigned int nr_freed; + size_t offset; + struct page *pages[PIPE_BUFFERS]; + struct partial_page partial[PIPE_BUFFERS]; + struct iovec vec[PIPE_BUFFERS]; + pgoff_t index; + ssize_t res; + size_t this_len; + int error; + int i; + struct splice_pipe_desc spd = { + .pages = pages, + .partial = partial, + .flags = flags, + .ops = &default_pipe_buf_ops, + .spd_release = spd_release_page, + }; + + index = *ppos >> PAGE_CACHE_SHIFT; + offset = *ppos & ~PAGE_CACHE_MASK; + nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + + for (i = 0; i < nr_pages && i < PIPE_BUFFERS && len; i++) { + struct page *page; + + page = alloc_page(GFP_HIGHUSER); + error = -ENOMEM; + if (!page) + goto err; + + this_len = min_t(size_t, len, PAGE_CACHE_SIZE - offset); + vec[i].iov_base = (void __user *) kmap(page); + vec[i].iov_len = this_len; + pages[i] = page; + spd.nr_pages++; + len -= this_len; + offset = 0; + } + + res = kernel_readv(in, vec, spd.nr_pages, *ppos); + if (res < 0) + goto err; + + error = 0; + if (!res) + goto err; + + nr_freed = 0; + for (i = 0; i < spd.nr_pages; i++) { + kunmap(pages[i]); + this_len = min_t(size_t, vec[i].iov_len, res); + partial[i].offset = 0; + partial[i].len = this_len; + if (!this_len) { + __free_page(pages[i]); + pages[i] = NULL; + nr_freed++; + } + res -= this_len; + } + spd.nr_pages -= nr_freed; + + res = splice_to_pipe(pipe, &spd); + if (res > 0) + *ppos += res; + + return res; + +err: + for (i = 0; i < spd.nr_pages; i++) { + kunmap(pages[i]); + __free_page(pages[i]); + } + return error; +} +EXPORT_SYMBOL(default_file_splice_read); + /* * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' * using sendpage(). Return the number of bytes sent. @@ -933,11 +1040,10 @@ static long do_splice_to(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) { + ssize_t (*splice_read)(struct file *, loff_t *, + struct pipe_inode_info *, size_t, unsigned int); int ret; - if (unlikely(!in->f_op || !in->f_op->splice_read)) - return -EINVAL; - if (unlikely(!(in->f_mode & FMODE_READ))) return -EBADF; @@ -945,7 +1051,11 @@ static long do_splice_to(struct file *in, loff_t *ppos, if (unlikely(ret < 0)) return ret; - return in->f_op->splice_read(in, ppos, pipe, len, flags); + splice_read = in->f_op->splice_read; + if (!splice_read) + splice_read = default_file_splice_read; + + return splice_read(in, ppos, pipe, len, flags); } /** diff --git a/include/linux/fs.h b/include/linux/fs.h index 5bed436f435..d926c2bea16 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2204,6 +2204,8 @@ extern int generic_segment_checks(const struct iovec *iov, /* fs/splice.c */ extern ssize_t generic_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); +extern ssize_t default_file_splice_read(struct file *, loff_t *, + struct pipe_inode_info *, size_t, unsigned int); extern ssize_t generic_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index c8f038554e8..b43a9e03905 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -152,5 +152,6 @@ void generic_pipe_buf_unmap(struct pipe_inode_info *, struct pipe_buffer *, void void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *); +void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *); #endif -- cgit v1.2.3-70-g09d2 From dc6382ced07d6bad61d0b591fb12ab5da7ca632c Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Wed, 6 May 2009 22:09:37 +0300 Subject: nl80211 : Add support for configuring MFP NL80211_CMD_ASSOCIATE request must be able to indicate whether management frame protection (IEEE 802.11w) is being used. mac80211 was able to use MFP in client mode only with WEXT, but the new NL80211_ATTR_USE_MFP attribute will allow this to be done with nl80211, too. Since we are currently using nl80211 for MFP only with drivers that use user space SME, only MFP disabled and required values are used. However, the NL80211_ATTR_USE_MFP attribute is an enum that can be extended with MFP optional in the future, if that is needed with some drivers (e.g., if the RSN IE is generated by the driver). Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- include/linux/nl80211.h | 17 +++++++++++++++++ include/net/cfg80211.h | 2 ++ net/mac80211/cfg.c | 8 ++++++++ net/wireless/nl80211.c | 12 ++++++++++++ 4 files changed, 39 insertions(+) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index e9fd13aa79f..58c4ee1822d 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -494,6 +494,11 @@ enum nl80211_commands { * @NL80211_ATTR_TIMED_OUT: a flag indicating than an operation timed out; this * is used, e.g., with %NL80211_CMD_AUTHENTICATE event * + * @NL80211_ATTR_USE_MFP: Whether management frame protection (IEEE 802.11w) is + * used for the association (&enum nl80211_mfp, represented as a u32); + * this attribute can be used + * with %NL80211_CMD_ASSOCIATE request + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -596,6 +601,8 @@ enum nl80211_attrs { NL80211_ATTR_TIMED_OUT, + NL80211_ATTR_USE_MFP, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -1179,4 +1186,14 @@ enum nl80211_key_type { NL80211_KEYTYPE_PEERKEY, }; +/** + * enum nl80211_mfp - Management frame protection state + * @NL80211_MFP_NO: Management frame protection not used + * @NL80211_MFP_REQUIRED: Management frame protection required + */ +enum nl80211_mfp { + NL80211_MFP_NO, + NL80211_MFP_REQUIRED, +}; + #endif /* __LINUX_NL80211_H */ diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index b8a76764e1c..47e30e1d91f 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -672,6 +672,7 @@ struct cfg80211_auth_request { * @ssid_len: Length of ssid in octets * @ie: Extra IEs to add to (Re)Association Request frame or %NULL * @ie_len: Length of ie buffer in octets + * @use_mfp: Use management frame protection (IEEE 802.11w) in this association */ struct cfg80211_assoc_request { struct ieee80211_channel *chan; @@ -680,6 +681,7 @@ struct cfg80211_assoc_request { size_t ssid_len; const u8 *ie; size_t ie_len; + bool use_mfp; }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index d0ca6da33ca..4e627cf2b8c 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1253,6 +1253,14 @@ static int ieee80211_assoc(struct wiphy *wiphy, struct net_device *dev, if (ret) return ret; + if (req->use_mfp) { + sdata->u.mgd.mfp = IEEE80211_MFP_REQUIRED; + sdata->u.mgd.flags |= IEEE80211_STA_MFP_ENABLED; + } else { + sdata->u.mgd.mfp = IEEE80211_MFP_DISABLED; + sdata->u.mgd.flags &= ~IEEE80211_STA_MFP_ENABLED; + } + sdata->u.mgd.flags |= IEEE80211_STA_EXT_SME; sdata->u.mgd.state = IEEE80211_STA_MLME_ASSOCIATE; ieee80211_sta_req_auth(sdata); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 3c53c5cbc3a..79927706937 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -122,6 +122,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { [NL80211_ATTR_REASON_CODE] = { .type = NLA_U16 }, [NL80211_ATTR_FREQ_FIXED] = { .type = NLA_FLAG }, [NL80211_ATTR_TIMED_OUT] = { .type = NLA_FLAG }, + [NL80211_ATTR_USE_MFP] = { .type = NLA_U32 }, }; /* IE validation */ @@ -3012,6 +3013,17 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) req.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); } + if (info->attrs[NL80211_ATTR_USE_MFP]) { + enum nl80211_mfp use_mfp = + nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]); + if (use_mfp == NL80211_MFP_REQUIRED) + req.use_mfp = true; + else if (use_mfp != NL80211_MFP_NO) { + err = -EINVAL; + goto out; + } + } + err = drv->ops->assoc(&drv->wiphy, dev, &req); out: -- cgit v1.2.3-70-g09d2 From 9ed6bcce77f75d98af6ee07069deac6041948bee Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 8 May 2009 20:47:39 +0200 Subject: mac80211: move HT operation mode BSS info There really is no need to have a separate struct for a single variable. The fact that it exists is due to the code legacy, but we can remove that now. Very simple. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-core.c | 4 ++-- drivers/net/wireless/mac80211_hwsim.c | 2 +- drivers/net/wireless/mwl8k.c | 2 +- include/net/mac80211.h | 12 +++--------- net/mac80211/mlme.c | 11 ++++------- 5 files changed, 11 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c index 77f4b43b9b7..a9a4fcef7e4 100644 --- a/drivers/net/wireless/iwlwifi/iwl-core.c +++ b/drivers/net/wireless/iwlwifi/iwl-core.c @@ -2225,9 +2225,9 @@ static void iwl_ht_conf(struct iwl_priv *priv, iwl_conf->tx_chan_width = iwl_conf->supported_chan_width != 0; iwl_conf->ht_protection = - bss_conf->ht.operation_mode & IEEE80211_HT_OP_MODE_PROTECTION; + bss_conf->ht_operation_mode & IEEE80211_HT_OP_MODE_PROTECTION; iwl_conf->non_GF_STA_present = - !!(bss_conf->ht.operation_mode & IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT); + !!(bss_conf->ht_operation_mode & IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT); rcu_read_unlock(); diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index b1213b6a6b9..61a4ad7cc1c 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -642,7 +642,7 @@ static void mac80211_hwsim_bss_info_changed(struct ieee80211_hw *hw, if (changed & BSS_CHANGED_HT) { printk(KERN_DEBUG " %s: HT: op_mode=0x%x\n", wiphy_name(hw->wiphy), - info->ht.operation_mode); + info->ht_operation_mode); } if (changed & BSS_CHANGED_BASIC_RATES) { diff --git a/drivers/net/wireless/mwl8k.c b/drivers/net/wireless/mwl8k.c index 46b288dc8f4..a263d5c84c0 100644 --- a/drivers/net/wireless/mwl8k.c +++ b/drivers/net/wireless/mwl8k.c @@ -2369,7 +2369,7 @@ static int mwl8k_cmd_set_aid(struct ieee80211_hw *hw, if (info->use_cts_prot) { prot_mode = MWL8K_FRAME_PROT_11G; } else { - switch (info->ht.operation_mode & + switch (info->ht_operation_mode & IEEE80211_HT_OP_MODE_PROTECTION) { case IEEE80211_HT_OP_MODE_PROTECTION_20MHZ: prot_mode = MWL8K_FRAME_PROT_11N_HT_40MHZ_ONLY; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 38dc1cd1027..03591fcf519 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -170,14 +170,6 @@ enum ieee80211_bss_change { BSS_CHANGED_BEACON_ENABLED = 1<<9, }; -/** - * struct ieee80211_bss_ht_conf - BSS's changing HT configuration - * @operation_mode: HT operation mode (like in &struct ieee80211_ht_info) - */ -struct ieee80211_bss_ht_conf { - u16 operation_mode; -}; - /** * struct ieee80211_bss_conf - holds the BSS's changing parameters * @@ -203,6 +195,8 @@ struct ieee80211_bss_ht_conf { * the current band. * @bssid: The BSSID for this BSS * @enable_beacon: whether beaconing should be enabled or not + * @ht_operation_mode: HT operation mode (like in &struct ieee80211_ht_info). + * This field is only valid when the channel type is one of the HT types. */ struct ieee80211_bss_conf { const u8 *bssid; @@ -219,7 +213,7 @@ struct ieee80211_bss_conf { u16 assoc_capability; u64 timestamp; u32 basic_rates; - struct ieee80211_bss_ht_conf ht; + u16 ht_operation_mode; }; /** diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index c5445bae9d6..a1f2332a6fe 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -95,16 +95,14 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - struct ieee80211_bss_ht_conf ht; struct sta_info *sta; u32 changed = 0; + u16 ht_opmode; bool enable_ht = true, ht_changed; enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - memset(&ht, 0, sizeof(ht)); - /* HT is not supported */ if (!sband->ht_cap.ht_supported) enable_ht = false; @@ -148,19 +146,18 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, IEEE80211_RC_HT_CHANGED); rcu_read_unlock(); - } /* disable HT */ if (!enable_ht) return 0; - ht.operation_mode = le16_to_cpu(hti->operation_mode); + ht_opmode = le16_to_cpu(hti->operation_mode); /* if bss configuration changed store the new one */ - if (memcmp(&sdata->vif.bss_conf.ht, &ht, sizeof(ht))) { + if (sdata->vif.bss_conf.ht_operation_mode != ht_opmode) { changed |= BSS_CHANGED_HT; - sdata->vif.bss_conf.ht = ht; + sdata->vif.bss_conf.ht_operation_mode = ht_opmode; } return changed; -- cgit v1.2.3-70-g09d2 From 44033f80cefd1d7b474efdabc412476d4bafb8f4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 8 May 2009 21:21:41 +0200 Subject: mac80211: remove ieee80211_ht_bss_info This struct is no longer used (and hasn't been for a while). Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 03591fcf519..d10ed1776fc 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -72,22 +72,6 @@ * not do so then mac80211 may add this under certain circumstances. */ -/** - * struct ieee80211_ht_bss_info - describing BSS's HT characteristics - * - * This structure describes most essential parameters needed - * to describe 802.11n HT characteristics in a BSS. - * - * @primary_channel: channel number of primery channel - * @bss_cap: 802.11n's general BSS capabilities (e.g. channel width) - * @bss_op_mode: 802.11n's BSS operation modes (e.g. HT protection) - */ -struct ieee80211_ht_bss_info { - u8 primary_channel; - u8 bss_cap; /* use IEEE80211_HT_IE_CHA_ */ - u8 bss_op_mode; /* use IEEE80211_HT_IE_ */ -}; - /** * enum ieee80211_max_queues - maximum number of queues * -- cgit v1.2.3-70-g09d2 From 4e943900fb9675d3a5ebdabc2cd4a9a54edace97 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 9 May 2009 20:06:47 +0200 Subject: cfg80211: constify key mac address in ops The address pointed to by mac_addr can be marked as const. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 6 +++--- net/mac80211/cfg.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 47e30e1d91f..35afe973d46 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -860,13 +860,13 @@ struct cfg80211_ops { struct vif_params *params); int (*add_key)(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, u8 *mac_addr, + u8 key_index, const u8 *mac_addr, struct key_params *params); int (*get_key)(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, u8 *mac_addr, void *cookie, + u8 key_index, const u8 *mac_addr, void *cookie, void (*callback)(void *cookie, struct key_params*)); int (*del_key)(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, u8 *mac_addr); + u8 key_index, const u8 *mac_addr); int (*set_default_key)(struct wiphy *wiphy, struct net_device *netdev, u8 key_index); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 4e627cf2b8c..be86e159e6e 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -112,7 +112,7 @@ static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex, } static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, - u8 key_idx, u8 *mac_addr, + u8 key_idx, const u8 *mac_addr, struct key_params *params) { struct ieee80211_sub_if_data *sdata; @@ -166,7 +166,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, - u8 key_idx, u8 *mac_addr) + u8 key_idx, const u8 *mac_addr) { struct ieee80211_sub_if_data *sdata; struct sta_info *sta; @@ -208,7 +208,7 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, - u8 key_idx, u8 *mac_addr, void *cookie, + u8 key_idx, const u8 *mac_addr, void *cookie, void (*callback)(void *cookie, struct key_params *params)) { -- cgit v1.2.3-70-g09d2 From cbe8fa9c5e88fd5f554ebc519af3b0ed17dde0dd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 9 May 2009 20:09:03 +0200 Subject: cfg80211: put wext data into substructure To make it more apparent in the code what is for wext only (and needs to be #ifdef'ed) put all the info for wext into a substruct in each wireless_dev. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 6 +++-- net/wireless/core.c | 4 ++-- net/wireless/ibss.c | 60 +++++++++++++++++++++++++------------------------- 3 files changed, 36 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 35afe973d46..4c748935ce5 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1147,8 +1147,10 @@ struct wireless_dev { #ifdef CONFIG_WIRELESS_EXT /* wext data */ - struct cfg80211_ibss_params wext; - u8 wext_bssid[ETH_ALEN]; + struct { + struct cfg80211_ibss_params ibss; + u8 bssid[ETH_ALEN]; + } wext; #endif }; diff --git a/net/wireless/core.c b/net/wireless/core.c index 2006a4ee60e..15b2a179480 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -470,9 +470,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, #ifdef CONFIG_WIRELESS_EXT if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC) break; - if (!dev->ieee80211_ptr->wext.ssid_len) + if (!dev->ieee80211_ptr->wext.ibss.ssid_len) break; - cfg80211_join_ibss(rdev, dev, &dev->ieee80211_ptr->wext); + cfg80211_join_ibss(rdev, dev, &dev->ieee80211_ptr->wext.ibss); break; #endif case NETDEV_UNREGISTER: diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 3c38afaed28..a4a1c3498ff 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -63,7 +63,7 @@ int cfg80211_join_ibss(struct cfg80211_registered_device *rdev, return -EALREADY; #ifdef CONFIG_WIRELESS_EXT - wdev->wext.channel = params->channel; + wdev->wext.ibss.channel = params->channel; #endif err = rdev->ops->join_ibss(&rdev->wiphy, dev, params); @@ -90,7 +90,7 @@ void cfg80211_clear_ibss(struct net_device *dev, bool nowext) memset(wdev->bssid, 0, ETH_ALEN); #ifdef CONFIG_WIRELESS_EXT if (!nowext) - wdev->wext.ssid_len = 0; + wdev->wext.ibss.ssid_len = 0; #endif } @@ -116,11 +116,11 @@ static int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, enum ieee80211_band band; int i; - if (!wdev->wext.beacon_interval) - wdev->wext.beacon_interval = 100; + if (!wdev->wext.ibss.beacon_interval) + wdev->wext.ibss.beacon_interval = 100; /* try to find an IBSS channel if none requested ... */ - if (!wdev->wext.channel) { + if (!wdev->wext.ibss.channel) { for (band = 0; band < IEEE80211_NUM_BANDS; band++) { struct ieee80211_supported_band *sband; struct ieee80211_channel *chan; @@ -135,27 +135,27 @@ static int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, continue; if (chan->flags & IEEE80211_CHAN_DISABLED) continue; - wdev->wext.channel = chan; + wdev->wext.ibss.channel = chan; break; } - if (wdev->wext.channel) + if (wdev->wext.ibss.channel) break; } - if (!wdev->wext.channel) + if (!wdev->wext.ibss.channel) return -EINVAL; } /* don't join -- SSID is not there */ - if (!wdev->wext.ssid_len) + if (!wdev->wext.ibss.ssid_len) return 0; if (!netif_running(wdev->netdev)) return 0; return cfg80211_join_ibss(wiphy_to_dev(wdev->wiphy), - wdev->netdev, &wdev->wext); + wdev->netdev, &wdev->wext.ibss); } int cfg80211_ibss_wext_siwfreq(struct net_device *dev, @@ -182,7 +182,7 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev, chan->flags & IEEE80211_CHAN_DISABLED)) return -EINVAL; - if (wdev->wext.channel == chan) + if (wdev->wext.ibss.channel == chan) return 0; if (wdev->ssid_len) { @@ -193,11 +193,11 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev, } if (chan) { - wdev->wext.channel = chan; - wdev->wext.channel_fixed = true; + wdev->wext.ibss.channel = chan; + wdev->wext.ibss.channel_fixed = true; } else { /* cfg80211_ibss_wext_join will pick one if needed */ - wdev->wext.channel_fixed = false; + wdev->wext.ibss.channel_fixed = false; } return cfg80211_ibss_wext_join(wiphy_to_dev(wdev->wiphy), wdev); @@ -218,8 +218,8 @@ int cfg80211_ibss_wext_giwfreq(struct net_device *dev, if (wdev->current_bss) chan = wdev->current_bss->channel; - else if (wdev->wext.channel) - chan = wdev->wext.channel; + else if (wdev->wext.ibss.channel) + chan = wdev->wext.ibss.channel; if (chan) { freq->m = chan->center_freq; @@ -259,9 +259,9 @@ int cfg80211_ibss_wext_siwessid(struct net_device *dev, if (len > 0 && ssid[len - 1] == '\0') len--; - wdev->wext.ssid = wdev->ssid; - memcpy(wdev->wext.ssid, ssid, len); - wdev->wext.ssid_len = len; + wdev->wext.ibss.ssid = wdev->ssid; + memcpy(wdev->wext.ibss.ssid, ssid, len); + wdev->wext.ibss.ssid_len = len; return cfg80211_ibss_wext_join(wiphy_to_dev(wdev->wiphy), wdev); } @@ -284,10 +284,10 @@ int cfg80211_ibss_wext_giwessid(struct net_device *dev, data->flags = 1; data->length = wdev->ssid_len; memcpy(ssid, wdev->ssid, data->length); - } else if (wdev->wext.ssid && wdev->wext.ssid_len) { + } else if (wdev->wext.ibss.ssid && wdev->wext.ibss.ssid_len) { data->flags = 1; - data->length = wdev->wext.ssid_len; - memcpy(ssid, wdev->wext.ssid, data->length); + data->length = wdev->wext.ibss.ssid_len; + memcpy(ssid, wdev->wext.ibss.ssid, data->length); } return 0; @@ -318,12 +318,12 @@ int cfg80211_ibss_wext_siwap(struct net_device *dev, bssid = NULL; /* both automatic */ - if (!bssid && !wdev->wext.bssid) + if (!bssid && !wdev->wext.ibss.bssid) return 0; /* fixed already - and no change */ - if (wdev->wext.bssid && bssid && - compare_ether_addr(bssid, wdev->wext.bssid) == 0) + if (wdev->wext.ibss.bssid && bssid && + compare_ether_addr(bssid, wdev->wext.ibss.bssid) == 0) return 0; if (wdev->ssid_len) { @@ -334,10 +334,10 @@ int cfg80211_ibss_wext_siwap(struct net_device *dev, } if (bssid) { - memcpy(wdev->wext_bssid, bssid, ETH_ALEN); - wdev->wext.bssid = wdev->wext_bssid; + memcpy(wdev->wext.bssid, bssid, ETH_ALEN); + wdev->wext.ibss.bssid = wdev->wext.bssid; } else - wdev->wext.bssid = NULL; + wdev->wext.ibss.bssid = NULL; return cfg80211_ibss_wext_join(wiphy_to_dev(wdev->wiphy), wdev); } @@ -356,8 +356,8 @@ int cfg80211_ibss_wext_giwap(struct net_device *dev, ap_addr->sa_family = ARPHRD_ETHER; - if (wdev->wext.bssid) { - memcpy(ap_addr->sa_data, wdev->wext.bssid, ETH_ALEN); + if (wdev->wext.ibss.bssid) { + memcpy(ap_addr->sa_data, wdev->wext.ibss.bssid, ETH_ALEN); return 0; } -- cgit v1.2.3-70-g09d2 From 528769cf1e422d932052be1487459262f3d75333 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Mon, 11 May 2009 10:20:35 +0300 Subject: mac80211: Robust Action frame categories for MFP IEEE 802.11w/D9.0 introduces a mechanism for Action field Category values to be used to select which Action frames are Robust. Public and Vendor-specific categories are marked as not Robust in IEEE 802.11w; HT will be marked not Robust in IEEE 802.11n. A new Vendor-specific Protected category is allocated for Robust vendor-specific Action frames. Another new category, Protected Dual of Action, is introduced for protecting some existing Public Action frames (e.g., IEEE 802.11y protected enablement). Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index dc92359f37e..05c29c01174 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1068,8 +1068,12 @@ enum ieee80211_category { WLAN_CATEGORY_DLS = 2, WLAN_CATEGORY_BACK = 3, WLAN_CATEGORY_PUBLIC = 4, + WLAN_CATEGORY_HT = 7, WLAN_CATEGORY_SA_QUERY = 8, + WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION = 9, WLAN_CATEGORY_WMM = 17, + WLAN_CATEGORY_VENDOR_SPECIFIC_PROTECTED = 126, + WLAN_CATEGORY_VENDOR_SPECIFIC = 127, }; /* SPECTRUM_MGMT action code */ @@ -1261,7 +1265,9 @@ static inline bool ieee80211_is_robust_mgmt_frame(struct ieee80211_hdr *hdr) if (ieee80211_has_protected(hdr->frame_control)) return true; category = ((u8 *) hdr) + 24; - return *category != WLAN_CATEGORY_PUBLIC; + return *category != WLAN_CATEGORY_PUBLIC && + *category != WLAN_CATEGORY_HT && + *category != WLAN_CATEGORY_VENDOR_SPECIFIC; } return false; -- cgit v1.2.3-70-g09d2 From e13cf6e04ebc231653e03ebde4799dc55bf62849 Mon Sep 17 00:00:00 2001 From: john stultz Date: Mon, 11 May 2009 18:13:13 -0700 Subject: ntp: fix comment typos Bernhard Schiffner noticed I had a few comment typos in this patch, (note: to save embarrassment, when making typos, avoid copying and pasting them) so this patch corrects them. [ Impact: cleanup ] Reported-by: Bernhard Schiffner Signed-off-by: John Stultz Cc: riel@redhat.com Cc: akpm@linux-foundation.org LKML-Reference: <1242090794.7214.131.camel@localhost.localdomain> Signed-off-by: Ingo Molnar --- include/linux/timex.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/timex.h b/include/linux/timex.h index 0daf9611ef4..9910e3bd5b3 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -179,7 +179,7 @@ struct timex { * NTP convergence time. A higher value makes it stiffer, increasing * convergence time, but making the clock more stable. * - * In David Mills' nanokenrel reference implmentation SHIFT_PLL is 4. + * In David Mills' nanokernel reference implementation SHIFT_PLL is 4. * However this seems to increase convergence time much too long. * * https://lists.ntp.org/pipermail/hackers/2008-January/003487.html @@ -196,7 +196,7 @@ struct timex { * * SHIFT_FLL is used as a dampening factor to define how much we * adjust the frequency correction for a given offset in FLL mode. - * In David Mills' nanokenrel reference implmentation SHIFT_PLL is 2. + * In David Mills' nanokernel reference implementation SHIFT_FLL is 2. * * MAXTC establishes the maximum time constant of the PLL. */ -- cgit v1.2.3-70-g09d2 From 2b1ccc0ee918a653d0483fdad9dd6112ce8e9043 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 12 May 2009 11:11:48 +0200 Subject: splice: fix misleading comment Splice is tied to pipes by design, it'll not change. And now that the splice stuff is in splice.h (and note pipe.h), the rest of the comment is out-of-date as well. Signed-off-by: Jens Axboe --- include/linux/splice.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/splice.h b/include/linux/splice.h index 5f3faa9d15a..18e7c7c0cae 100644 --- a/include/linux/splice.h +++ b/include/linux/splice.h @@ -11,8 +11,7 @@ #include /* - * splice is tied to pipes as a transport (at least for now), so we'll just - * add the splice flags here. + * Flags passed in from splice/tee/vmsplice */ #define SPLICE_F_MOVE (0x01) /* move pages instead of copying */ #define SPLICE_F_NONBLOCK (0x02) /* don't block on the pipe splicing (but */ -- cgit v1.2.3-70-g09d2 From 597d0275736dad9c3bda6f0a00a1c477dc0f37b1 Mon Sep 17 00:00:00 2001 From: Arun R Bharadwaj Date: Thu, 16 Apr 2009 12:13:26 +0530 Subject: timers: Framework for identifying pinned timers * Arun R Bharadwaj [2009-04-16 12:11:36]: This patch creates a new framework for identifying cpu-pinned timers and hrtimers. This framework is needed because pinned timers are expected to fire on the same CPU on which they are queued. So it is essential to identify these and not migrate them, in case there are any. For regular timers, the currently existing add_timer_on() can be used queue pinned timers and subsequently mod_timer_pinned() can be used to modify the 'expires' field. For hrtimers, new modes HRTIMER_ABS_PINNED and HRTIMER_REL_PINNED are added to queue cpu-pinned hrtimer. [ tglx: use .._PINNED mode argument instead of creating tons of new functions ] Signed-off-by: Arun R Bharadwaj Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 7 +++++-- include/linux/timer.h | 3 +++ kernel/hrtimer.c | 7 ++++--- kernel/timer.c | 31 +++++++++++++++++++++++++++---- 4 files changed, 39 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 0d2f7c8a33d..7400900de94 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -30,8 +30,11 @@ struct hrtimer_cpu_base; * Mode arguments of xxx_hrtimer functions: */ enum hrtimer_mode { - HRTIMER_MODE_ABS, /* Time value is absolute */ - HRTIMER_MODE_REL, /* Time value is relative to now */ + HRTIMER_MODE_ABS = 0x0, /* Time value is absolute */ + HRTIMER_MODE_REL = 0x1, /* Time value is relative to now */ + HRTIMER_MODE_PINNED = 0x02, /* Timer is bound to CPU */ + HRTIMER_MODE_ABS_PINNED = 0x02, + HRTIMER_MODE_REL_PINNED = 0x03, }; /* diff --git a/include/linux/timer.h b/include/linux/timer.h index 6cdb6f3331f..ccf882eed8f 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -163,7 +163,10 @@ extern void add_timer_on(struct timer_list *timer, int cpu); extern int del_timer(struct timer_list * timer); extern int mod_timer(struct timer_list *timer, unsigned long expires); extern int mod_timer_pending(struct timer_list *timer, unsigned long expires); +extern int mod_timer_pinned(struct timer_list *timer, unsigned long expires); +#define TIMER_NOT_PINNED 0 +#define TIMER_PINNED 1 /* * The jiffies value which is added to now, when there is no timer * in the timer wheel: diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index cb8a15c1958..c71bcd54924 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -193,7 +193,8 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, * Switch the timer base to the current CPU when possible. */ static inline struct hrtimer_clock_base * -switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base) +switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, + int pinned) { struct hrtimer_clock_base *new_base; struct hrtimer_cpu_base *new_cpu_base; @@ -907,9 +908,9 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, ret = remove_hrtimer(timer, base); /* Switch the timer base, if necessary: */ - new_base = switch_hrtimer_base(timer, base); + new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); - if (mode == HRTIMER_MODE_REL) { + if (mode & HRTIMER_MODE_REL) { tim = ktime_add_safe(tim, new_base->get_time()); /* * CONFIG_TIME_LOW_RES is a temporary way for architectures diff --git a/kernel/timer.c b/kernel/timer.c index 5c1e84beaf4..3424dfd11d5 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -604,7 +604,8 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer, } static inline int -__mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) +__mod_timer(struct timer_list *timer, unsigned long expires, + bool pending_only, int pinned) { struct tvec_base *base, *new_base; unsigned long flags; @@ -668,7 +669,7 @@ out_unlock: */ int mod_timer_pending(struct timer_list *timer, unsigned long expires) { - return __mod_timer(timer, expires, true); + return __mod_timer(timer, expires, true, TIMER_NOT_PINNED); } EXPORT_SYMBOL(mod_timer_pending); @@ -702,10 +703,32 @@ int mod_timer(struct timer_list *timer, unsigned long expires) if (timer->expires == expires && timer_pending(timer)) return 1; - return __mod_timer(timer, expires, false); + return __mod_timer(timer, expires, false, TIMER_NOT_PINNED); } EXPORT_SYMBOL(mod_timer); +/** + * mod_timer_pinned - modify a timer's timeout + * @timer: the timer to be modified + * @expires: new timeout in jiffies + * + * mod_timer_pinned() is a way to update the expire field of an + * active timer (if the timer is inactive it will be activated) + * and not allow the timer to be migrated to a different CPU. + * + * mod_timer_pinned(timer, expires) is equivalent to: + * + * del_timer(timer); timer->expires = expires; add_timer(timer); + */ +int mod_timer_pinned(struct timer_list *timer, unsigned long expires) +{ + if (timer->expires == expires && timer_pending(timer)) + return 1; + + return __mod_timer(timer, expires, false, TIMER_PINNED); +} +EXPORT_SYMBOL(mod_timer_pinned); + /** * add_timer - start a timer * @timer: the timer to be added @@ -1356,7 +1379,7 @@ signed long __sched schedule_timeout(signed long timeout) expire = timeout + jiffies; setup_timer_on_stack(&timer, process_timeout, (unsigned long)current); - __mod_timer(&timer, expire, false); + __mod_timer(&timer, expire, false, TIMER_NOT_PINNED); schedule(); del_singleshot_timer_sync(&timer); -- cgit v1.2.3-70-g09d2 From cd1bb94b4a0531e8211a3774f17de831f8285f76 Mon Sep 17 00:00:00 2001 From: Arun R Bharadwaj Date: Thu, 16 Apr 2009 12:15:34 +0530 Subject: timers: /proc/sys sysctl hook to enable timer migration * Arun R Bharadwaj [2009-04-16 12:11:36]: This patch creates the /proc/sys sysctl interface at /proc/sys/kernel/timer_migration Timer migration is enabled by default. To disable timer migration, when CONFIG_SCHED_DEBUG = y, echo 0 > /proc/sys/kernel/timer_migration Signed-off-by: Arun R Bharadwaj Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 1 + kernel/sched.c | 2 ++ kernel/sysctl.c | 8 ++++++++ 3 files changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index b4c38bc8049..61850401040 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1766,6 +1766,7 @@ extern unsigned int sysctl_sched_child_runs_first; extern unsigned int sysctl_sched_features; extern unsigned int sysctl_sched_migration_cost; extern unsigned int sysctl_sched_nr_migrate; +extern unsigned int sysctl_timer_migration; int sched_nr_latency_handler(struct ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, diff --git a/kernel/sched.c b/kernel/sched.c index 9c5b4d3f97a..7f1dd56af86 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -8731,6 +8731,8 @@ void __init sched_init_smp(void) } #endif /* CONFIG_SMP */ +const_debug unsigned int sysctl_timer_migration = 1; + int in_sched_functions(unsigned long addr) { return in_lock_functions(addr) || diff --git a/kernel/sysctl.c b/kernel/sysctl.c index e3d2c7dd59b..b3ce5813730 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -324,6 +324,14 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "timer_migration", + .data = &sysctl_timer_migration, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, #endif { .ctl_name = CTL_UNNUMBERED, -- cgit v1.2.3-70-g09d2 From eea08f32adb3f97553d49a4f79a119833036000a Mon Sep 17 00:00:00 2001 From: Arun R Bharadwaj Date: Thu, 16 Apr 2009 12:16:41 +0530 Subject: timers: Logic to move non pinned timers * Arun R Bharadwaj [2009-04-16 12:11:36]: This patch migrates all non pinned timers and hrtimers to the current idle load balancer, from all the idle CPUs. Timers firing on busy CPUs are not migrated. While migrating hrtimers, care should be taken to check if migrating a hrtimer would result in a latency or not. So we compare the expiry of the hrtimer with the next timer interrupt on the target cpu and migrate the hrtimer only if it expires *after* the next interrupt on the target cpu. So, added a clockevents_get_next_event() helper function to return the next_event on the target cpu's clock_event_device. [ tglx: cleanups and simplifications ] Signed-off-by: Arun R Bharadwaj Signed-off-by: Thomas Gleixner --- include/linux/clockchips.h | 9 ++++++++ include/linux/sched.h | 12 +++++++++++ kernel/hrtimer.c | 51 ++++++++++++++++++++++++++++++++++++++++++++-- kernel/sched.c | 5 +++++ kernel/time/clockevents.c | 12 +++++++++++ kernel/timer.c | 17 +++++++++++++--- 6 files changed, 101 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 3a1dbba4d3a..20a100fe2b4 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -143,3 +143,12 @@ extern void clockevents_notify(unsigned long reason, void *arg); #endif #endif + +#ifdef CONFIG_GENERIC_CLOCKEVENTS +extern ktime_t clockevents_get_next_event(int cpu); +#else +static inline ktime_t clockevents_get_next_event(int cpu) +{ + return (ktime_t) { .tv64 = KTIME_MAX }; +} +#endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 61850401040..311dec12397 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -257,6 +257,7 @@ extern void task_rq_unlock_wait(struct task_struct *p); extern cpumask_var_t nohz_cpu_mask; #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) extern int select_nohz_load_balancer(int cpu); +extern int get_nohz_load_balancer(void); #else static inline int select_nohz_load_balancer(int cpu) { @@ -1772,6 +1773,17 @@ int sched_nr_latency_handler(struct ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, loff_t *ppos); #endif +#ifdef CONFIG_SCHED_DEBUG +static inline unsigned int get_sysctl_timer_migration(void) +{ + return sysctl_timer_migration; +} +#else +static inline unsigned int get_sysctl_timer_migration(void) +{ + return 1; +} +#endif extern unsigned int sysctl_sched_rt_period; extern int sysctl_sched_rt_runtime; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index c71bcd54924..b675a67c9ac 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -43,6 +43,8 @@ #include #include #include +#include +#include #include @@ -198,8 +200,19 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, { struct hrtimer_clock_base *new_base; struct hrtimer_cpu_base *new_cpu_base; + int cpu, preferred_cpu = -1; + + cpu = smp_processor_id(); +#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) + if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) { + preferred_cpu = get_nohz_load_balancer(); + if (preferred_cpu >= 0) + cpu = preferred_cpu; + } +#endif - new_cpu_base = &__get_cpu_var(hrtimer_bases); +again: + new_cpu_base = &per_cpu(hrtimer_bases, cpu); new_base = &new_cpu_base->clock_base[base->index]; if (base != new_base) { @@ -219,6 +232,40 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, timer->base = NULL; spin_unlock(&base->cpu_base->lock); spin_lock(&new_base->cpu_base->lock); + + /* Optimized away for NOHZ=n SMP=n */ + if (cpu == preferred_cpu) { + /* Calculate clock monotonic expiry time */ +#ifdef CONFIG_HIGH_RES_TIMERS + ktime_t expires = ktime_sub(hrtimer_get_expires(timer), + new_base->offset); +#else + ktime_t expires = hrtimer_get_expires(timer); +#endif + + /* + * Get the next event on target cpu from the + * clock events layer. + * This covers the highres=off nohz=on case as well. + */ + ktime_t next = clockevents_get_next_event(cpu); + + ktime_t delta = ktime_sub(expires, next); + + /* + * We do not migrate the timer when it is expiring + * before the next event on the target cpu because + * we cannot reprogram the target cpu hardware and + * we would cause it to fire late. + */ + if (delta.tv64 < 0) { + cpu = smp_processor_id(); + spin_unlock(&new_base->cpu_base->lock); + spin_lock(&base->cpu_base->lock); + timer->base = base; + goto again; + } + } timer->base = new_base; } return new_base; @@ -236,7 +283,7 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) return base; } -# define switch_hrtimer_base(t, b) (b) +# define switch_hrtimer_base(t, b, p) (b) #endif /* !CONFIG_SMP */ diff --git a/kernel/sched.c b/kernel/sched.c index 7f1dd56af86..9fe3774a0fd 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4244,6 +4244,11 @@ static struct { .load_balancer = ATOMIC_INIT(-1), }; +int get_nohz_load_balancer(void) +{ + return atomic_read(&nohz.load_balancer); +} + /* * This routine will try to nominate the ilb (idle load balancing) * owner among the cpus whose ticks are stopped. ilb owner will do the idle diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index d13be216a79..ab20ded013b 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -18,6 +18,7 @@ #include #include #include +#include /* The registered clock event devices */ static LIST_HEAD(clockevent_devices); @@ -251,4 +252,15 @@ void clockevents_notify(unsigned long reason, void *arg) spin_unlock(&clockevents_lock); } EXPORT_SYMBOL_GPL(clockevents_notify); + +ktime_t clockevents_get_next_event(int cpu) +{ + struct tick_device *td; + struct clock_event_device *dev; + + td = &per_cpu(tick_cpu_device, cpu); + dev = td->evtdev; + + return dev->next_event; +} #endif diff --git a/kernel/timer.c b/kernel/timer.c index 3424dfd11d5..3f841db5edf 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -609,9 +610,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, { struct tvec_base *base, *new_base; unsigned long flags; - int ret; - - ret = 0; + int ret = 0 , cpu; timer_stats_timer_set_start_info(timer); BUG_ON(!timer->function); @@ -630,6 +629,18 @@ __mod_timer(struct timer_list *timer, unsigned long expires, new_base = __get_cpu_var(tvec_bases); + cpu = smp_processor_id(); + +#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) + if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) { + int preferred_cpu = get_nohz_load_balancer(); + + if (preferred_cpu >= 0) + cpu = preferred_cpu; + } +#endif + new_base = per_cpu(tvec_bases, cpu); + if (base != new_base) { /* * We are trying to schedule the timer on the local CPU. -- cgit v1.2.3-70-g09d2 From 08645126dd24872c2e27014f93968f7312e29176 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 11 May 2009 13:54:58 +0200 Subject: cfg80211: implement wext key handling Move key handling wireless extension ioctls from mac80211 to cfg80211 so that all drivers that implement the cfg80211 operations get wext compatibility. Note that this drops the SIOCGIWENCODE ioctl support for getting IW_ENCODE_RESTRICTED/IW_ENCODE_OPEN. This means that iwconfig will no longer report "Security mode:open" or "Security mode:restricted" for mac80211. However, what we displayed there (the authentication algo used) was actually wrong -- linux/wireless.h states that this setting is meant to differentiate between "Refuse non-encoded packets" and "Accept non-encoded packets". (Combined with "cfg80211: fix a couple of bugs with key ioctls". -- JWL) Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 10 ++ net/mac80211/wext.c | 279 +-------------------------------------------- net/wireless/core.c | 6 +- net/wireless/core.h | 6 +- net/wireless/nl80211.c | 57 +++------ net/wireless/util.c | 45 ++++++++ net/wireless/wext-compat.c | 257 ++++++++++++++++++++++++++++++++++++++++- 7 files changed, 343 insertions(+), 317 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 4c748935ce5..e69e6c66dd1 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1150,6 +1150,7 @@ struct wireless_dev { struct { struct cfg80211_ibss_params ibss; u8 bssid[ETH_ALEN]; + s8 default_key, default_mgmt_key; } wext; #endif }; @@ -1400,6 +1401,15 @@ int cfg80211_wext_siwretry(struct net_device *dev, int cfg80211_wext_giwretry(struct net_device *dev, struct iw_request_info *info, struct iw_param *retry, char *extra); +int cfg80211_wext_siwencodeext(struct net_device *dev, + struct iw_request_info *info, + struct iw_point *erq, char *extra); +int cfg80211_wext_siwencode(struct net_device *dev, + struct iw_request_info *info, + struct iw_point *erq, char *keybuf); +int cfg80211_wext_giwencode(struct net_device *dev, + struct iw_request_info *info, + struct iw_point *erq, char *keybuf); /* * callbacks for asynchronous cfg80211 methods, notification diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 6b4eb8d43a4..d8450264468 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -27,100 +27,6 @@ #include "aes_ccm.h" -static int ieee80211_set_encryption(struct ieee80211_sub_if_data *sdata, u8 *sta_addr, - int idx, int alg, int remove, - int set_tx_key, const u8 *_key, - size_t key_len) -{ - struct ieee80211_local *local = sdata->local; - struct sta_info *sta; - struct ieee80211_key *key; - int err; - - if (alg == ALG_AES_CMAC) { - if (idx < NUM_DEFAULT_KEYS || - idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) { - printk(KERN_DEBUG "%s: set_encrypt - invalid idx=%d " - "(BIP)\n", sdata->dev->name, idx); - return -EINVAL; - } - } else if (idx < 0 || idx >= NUM_DEFAULT_KEYS) { - printk(KERN_DEBUG "%s: set_encrypt - invalid idx=%d\n", - sdata->dev->name, idx); - return -EINVAL; - } - - if (remove) { - rcu_read_lock(); - - err = 0; - - if (is_broadcast_ether_addr(sta_addr)) { - key = sdata->keys[idx]; - } else { - sta = sta_info_get(local, sta_addr); - if (!sta) { - err = -ENOENT; - goto out_unlock; - } - key = sta->key; - } - - ieee80211_key_free(key); - } else { - key = ieee80211_key_alloc(alg, idx, key_len, _key); - if (!key) - return -ENOMEM; - - sta = NULL; - err = 0; - - rcu_read_lock(); - - if (!is_broadcast_ether_addr(sta_addr)) { - set_tx_key = 0; - /* - * According to the standard, the key index of a - * pairwise key must be zero. However, some AP are - * broken when it comes to WEP key indices, so we - * work around this. - */ - if (idx != 0 && alg != ALG_WEP) { - ieee80211_key_free(key); - err = -EINVAL; - goto out_unlock; - } - - sta = sta_info_get(local, sta_addr); - if (!sta) { - ieee80211_key_free(key); - err = -ENOENT; - goto out_unlock; - } - } - - if (alg == ALG_WEP && - key_len != LEN_WEP40 && key_len != LEN_WEP104) { - ieee80211_key_free(key); - err = -EINVAL; - goto out_unlock; - } - - ieee80211_key_link(key, sdata, sta); - - if (set_tx_key || (!sta && !sdata->default_key && key)) - ieee80211_set_default_key(sdata, idx); - if (alg == ALG_AES_CMAC && - (set_tx_key || (!sta && !sdata->default_mgmt_key && key))) - ieee80211_set_default_mgmt_key(sdata, idx); - } - - out_unlock: - rcu_read_unlock(); - - return err; -} - static int ieee80211_ioctl_siwgenie(struct net_device *dev, struct iw_request_info *info, struct iw_point *data, char *extra) @@ -472,109 +378,6 @@ static int ieee80211_ioctl_giwtxpower(struct net_device *dev, return 0; } -static int ieee80211_ioctl_siwencode(struct net_device *dev, - struct iw_request_info *info, - struct iw_point *erq, char *keybuf) -{ - struct ieee80211_sub_if_data *sdata; - int idx, i, alg = ALG_WEP; - u8 bcaddr[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; - int remove = 0, ret; - - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - idx = erq->flags & IW_ENCODE_INDEX; - if (idx == 0) { - if (sdata->default_key) - for (i = 0; i < NUM_DEFAULT_KEYS; i++) { - if (sdata->default_key == sdata->keys[i]) { - idx = i; - break; - } - } - } else if (idx < 1 || idx > 4) - return -EINVAL; - else - idx--; - - if (erq->flags & IW_ENCODE_DISABLED) - remove = 1; - else if (erq->length == 0) { - /* No key data - just set the default TX key index */ - ieee80211_set_default_key(sdata, idx); - return 0; - } - - ret = ieee80211_set_encryption( - sdata, bcaddr, - idx, alg, remove, - !sdata->default_key, - keybuf, erq->length); - - if (!ret && sdata->vif.type == NL80211_IFTYPE_STATION) { - if (remove) - sdata->u.mgd.flags &= ~IEEE80211_STA_TKIP_WEP_USED; - else - sdata->u.mgd.flags |= IEEE80211_STA_TKIP_WEP_USED; - } - - return ret; -} - - -static int ieee80211_ioctl_giwencode(struct net_device *dev, - struct iw_request_info *info, - struct iw_point *erq, char *key) -{ - struct ieee80211_sub_if_data *sdata; - int idx, i; - - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - idx = erq->flags & IW_ENCODE_INDEX; - if (idx < 1 || idx > 4) { - idx = -1; - if (!sdata->default_key) - idx = 0; - else for (i = 0; i < NUM_DEFAULT_KEYS; i++) { - if (sdata->default_key == sdata->keys[i]) { - idx = i; - break; - } - } - if (idx < 0) - return -EINVAL; - } else - idx--; - - erq->flags = idx + 1; - - if (!sdata->keys[idx]) { - erq->length = 0; - erq->flags |= IW_ENCODE_DISABLED; - return 0; - } - - memcpy(key, sdata->keys[idx]->conf.key, - min_t(int, erq->length, sdata->keys[idx]->conf.keylen)); - erq->length = sdata->keys[idx]->conf.keylen; - erq->flags |= IW_ENCODE_ENABLED; - - if (sdata->vif.type == NL80211_IFTYPE_STATION) { - switch (sdata->u.mgd.auth_alg) { - case WLAN_AUTH_OPEN: - case WLAN_AUTH_LEAP: - erq->flags |= IW_ENCODE_OPEN; - break; - case WLAN_AUTH_SHARED_KEY: - erq->flags |= IW_ENCODE_RESTRICTED; - break; - } - } - - return 0; -} - static int ieee80211_ioctl_siwpower(struct net_device *dev, struct iw_request_info *info, struct iw_param *wrq, @@ -809,82 +612,6 @@ static int ieee80211_ioctl_giwauth(struct net_device *dev, } -static int ieee80211_ioctl_siwencodeext(struct net_device *dev, - struct iw_request_info *info, - struct iw_point *erq, char *extra) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct iw_encode_ext *ext = (struct iw_encode_ext *) extra; - int uninitialized_var(alg), idx, i, remove = 0; - - switch (ext->alg) { - case IW_ENCODE_ALG_NONE: - remove = 1; - break; - case IW_ENCODE_ALG_WEP: - alg = ALG_WEP; - break; - case IW_ENCODE_ALG_TKIP: - alg = ALG_TKIP; - break; - case IW_ENCODE_ALG_CCMP: - alg = ALG_CCMP; - break; - case IW_ENCODE_ALG_AES_CMAC: - alg = ALG_AES_CMAC; - break; - default: - return -EOPNOTSUPP; - } - - if (erq->flags & IW_ENCODE_DISABLED) - remove = 1; - - idx = erq->flags & IW_ENCODE_INDEX; - if (alg == ALG_AES_CMAC) { - if (idx < NUM_DEFAULT_KEYS + 1 || - idx > NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) { - idx = -1; - if (!sdata->default_mgmt_key) - idx = 0; - else for (i = NUM_DEFAULT_KEYS; - i < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS; - i++) { - if (sdata->default_mgmt_key == sdata->keys[i]) - { - idx = i; - break; - } - } - if (idx < 0) - return -EINVAL; - } else - idx--; - } else { - if (idx < 1 || idx > 4) { - idx = -1; - if (!sdata->default_key) - idx = 0; - else for (i = 0; i < NUM_DEFAULT_KEYS; i++) { - if (sdata->default_key == sdata->keys[i]) { - idx = i; - break; - } - } - if (idx < 0) - return -EINVAL; - } else - idx--; - } - - return ieee80211_set_encryption(sdata, ext->addr.sa_data, idx, alg, - remove, - ext->ext_flags & - IW_ENCODE_EXT_SET_TX_KEY, - ext->key, ext->key_len); -} - - /* Structures to export the Wireless Handlers */ static const iw_handler ieee80211_handler[] = @@ -931,8 +658,8 @@ static const iw_handler ieee80211_handler[] = (iw_handler) ieee80211_ioctl_giwtxpower, /* SIOCGIWTXPOW */ (iw_handler) cfg80211_wext_siwretry, /* SIOCSIWRETRY */ (iw_handler) cfg80211_wext_giwretry, /* SIOCGIWRETRY */ - (iw_handler) ieee80211_ioctl_siwencode, /* SIOCSIWENCODE */ - (iw_handler) ieee80211_ioctl_giwencode, /* SIOCGIWENCODE */ + (iw_handler) cfg80211_wext_siwencode, /* SIOCSIWENCODE */ + (iw_handler) cfg80211_wext_giwencode, /* SIOCGIWENCODE */ (iw_handler) ieee80211_ioctl_siwpower, /* SIOCSIWPOWER */ (iw_handler) ieee80211_ioctl_giwpower, /* SIOCGIWPOWER */ (iw_handler) NULL, /* -- hole -- */ @@ -941,7 +668,7 @@ static const iw_handler ieee80211_handler[] = (iw_handler) NULL, /* SIOCGIWGENIE */ (iw_handler) ieee80211_ioctl_siwauth, /* SIOCSIWAUTH */ (iw_handler) ieee80211_ioctl_giwauth, /* SIOCGIWAUTH */ - (iw_handler) ieee80211_ioctl_siwencodeext, /* SIOCSIWENCODEEXT */ + (iw_handler) cfg80211_wext_siwencodeext, /* SIOCSIWENCODEEXT */ (iw_handler) NULL, /* SIOCGIWENCODEEXT */ (iw_handler) NULL, /* SIOCSIWPMKSA */ (iw_handler) NULL, /* -- hole -- */ diff --git a/net/wireless/core.c b/net/wireless/core.c index 15b2a179480..47c20eb0c04 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1,7 +1,7 @@ /* * This is the linux wireless configuration interface. * - * Copyright 2006-2008 Johannes Berg + * Copyright 2006-2009 Johannes Berg */ #include @@ -457,6 +457,10 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, "symlink to netdev!\n"); } dev->ieee80211_ptr->netdev = dev; +#ifdef CONFIG_WIRELESS_EXT + dev->ieee80211_ptr->wext.default_key = -1; + dev->ieee80211_ptr->wext.default_mgmt_key = -1; +#endif mutex_unlock(&rdev->devlist_mtx); break; case NETDEV_GOING_DOWN: diff --git a/net/wireless/core.h b/net/wireless/core.h index 3e49d339931..f14b6c5f422 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -1,7 +1,7 @@ /* * Wireless configuration interface internals. * - * Copyright 2006, 2007 Johannes Berg + * Copyright 2006-2009 Johannes Berg */ #ifndef __NET_WIRELESS_CORE_H #define __NET_WIRELESS_CORE_H @@ -151,4 +151,8 @@ void cfg80211_clear_ibss(struct net_device *dev, bool nowext); int cfg80211_leave_ibss(struct cfg80211_registered_device *rdev, struct net_device *dev, bool nowext); +/* internal helpers */ +int cfg80211_validate_key_settings(struct key_params *params, int key_idx, + const u8 *mac_addr); + #endif /* __NET_WIRELESS_CORE_H */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a39e4644778..f88dbbec752 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1,7 +1,7 @@ /* * This is the new netlink-based wireless configuration interface. * - * Copyright 2006, 2007 Johannes Berg + * Copyright 2006-2009 Johannes Berg */ #include @@ -1073,6 +1073,14 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) } err = func(&drv->wiphy, dev, key_idx); +#ifdef CONFIG_WIRELESS_EXT + if (!err) { + if (func == drv->ops->set_default_key) + dev->ieee80211_ptr->wext.default_key = key_idx; + else + dev->ieee80211_ptr->wext.default_mgmt_key = key_idx; + } +#endif out: cfg80211_put_dev(drv); @@ -1111,45 +1119,9 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_MAC]) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); - if (key_idx > 5) + if (cfg80211_validate_key_settings(¶ms, key_idx, mac_addr)) return -EINVAL; - /* - * Disallow pairwise keys with non-zero index unless it's WEP - * (because current deployments use pairwise WEP keys with - * non-zero indizes but 802.11i clearly specifies to use zero) - */ - if (mac_addr && key_idx && - params.cipher != WLAN_CIPHER_SUITE_WEP40 && - params.cipher != WLAN_CIPHER_SUITE_WEP104) - return -EINVAL; - - /* TODO: add definitions for the lengths to linux/ieee80211.h */ - switch (params.cipher) { - case WLAN_CIPHER_SUITE_WEP40: - if (params.key_len != 5) - return -EINVAL; - break; - case WLAN_CIPHER_SUITE_TKIP: - if (params.key_len != 32) - return -EINVAL; - break; - case WLAN_CIPHER_SUITE_CCMP: - if (params.key_len != 16) - return -EINVAL; - break; - case WLAN_CIPHER_SUITE_WEP104: - if (params.key_len != 13) - return -EINVAL; - break; - case WLAN_CIPHER_SUITE_AES_CMAC: - if (params.key_len != 16) - return -EINVAL; - break; - default: - return -EINVAL; - } - rtnl_lock(); err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); @@ -1210,6 +1182,15 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) err = drv->ops->del_key(&drv->wiphy, dev, key_idx, mac_addr); +#ifdef CONFIG_WIRELESS_EXT + if (!err) { + if (key_idx == dev->ieee80211_ptr->wext.default_key) + dev->ieee80211_ptr->wext.default_key = -1; + else if (key_idx == dev->ieee80211_ptr->wext.default_mgmt_key) + dev->ieee80211_ptr->wext.default_mgmt_key = -1; + } +#endif + out: cfg80211_put_dev(drv); dev_put(dev); diff --git a/net/wireless/util.c b/net/wireless/util.c index 5f7e997195c..beb226e78cd 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -138,3 +138,48 @@ void ieee80211_set_bitrate_flags(struct wiphy *wiphy) if (wiphy->bands[band]) set_mandatory_flags_band(wiphy->bands[band], band); } + +int cfg80211_validate_key_settings(struct key_params *params, int key_idx, + const u8 *mac_addr) +{ + if (key_idx > 5) + return -EINVAL; + + /* + * Disallow pairwise keys with non-zero index unless it's WEP + * (because current deployments use pairwise WEP keys with + * non-zero indizes but 802.11i clearly specifies to use zero) + */ + if (mac_addr && key_idx && + params->cipher != WLAN_CIPHER_SUITE_WEP40 && + params->cipher != WLAN_CIPHER_SUITE_WEP104) + return -EINVAL; + + /* TODO: add definitions for the lengths to linux/ieee80211.h */ + switch (params->cipher) { + case WLAN_CIPHER_SUITE_WEP40: + if (params->key_len != 5) + return -EINVAL; + break; + case WLAN_CIPHER_SUITE_TKIP: + if (params->key_len != 32) + return -EINVAL; + break; + case WLAN_CIPHER_SUITE_CCMP: + if (params->key_len != 16) + return -EINVAL; + break; + case WLAN_CIPHER_SUITE_WEP104: + if (params->key_len != 13) + return -EINVAL; + break; + case WLAN_CIPHER_SUITE_AES_CMAC: + if (params->key_len != 16) + return -EINVAL; + break; + default: + return -EINVAL; + } + + return 0; +} diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index abf6b0a047d..ffc98a8d6e5 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -5,12 +5,13 @@ * into cfg80211, when that happens all the exports here go away and * we directly assign the wireless handlers of wireless interfaces. * - * Copyright 2008 Johannes Berg + * Copyright 2008-2009 Johannes Berg */ #include #include #include +#include #include #include #include "core.h" @@ -477,3 +478,257 @@ int cfg80211_wext_giwretry(struct net_device *dev, return 0; } EXPORT_SYMBOL_GPL(cfg80211_wext_giwretry); + +static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev, + struct net_device *dev, const u8 *addr, + bool remove, bool tx_key, int idx, + struct key_params *params) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + int err; + + if (params->cipher == WLAN_CIPHER_SUITE_AES_CMAC) { + if (!rdev->ops->set_default_mgmt_key) + return -EOPNOTSUPP; + + if (idx < 4 || idx > 5) + return -EINVAL; + } else if (idx < 0 || idx > 3) + return -EINVAL; + + if (remove) { + err = rdev->ops->del_key(&rdev->wiphy, dev, idx, addr); + if (!err) { + if (idx == wdev->wext.default_key) + wdev->wext.default_key = -1; + else if (idx == wdev->wext.default_mgmt_key) + wdev->wext.default_mgmt_key = -1; + } + return err; + } else { + if (addr) + tx_key = false; + + if (cfg80211_validate_key_settings(params, idx, addr)) + return -EINVAL; + + err = rdev->ops->add_key(&rdev->wiphy, dev, idx, addr, params); + if (err) + return err; + + if (tx_key || (!addr && wdev->wext.default_key == -1)) { + err = rdev->ops->set_default_key(&rdev->wiphy, + dev, idx); + if (!err) + wdev->wext.default_key = idx; + return err; + } + + if (params->cipher == WLAN_CIPHER_SUITE_AES_CMAC && + (tx_key || (!addr && wdev->wext.default_mgmt_key == -1))) { + err = rdev->ops->set_default_mgmt_key(&rdev->wiphy, + dev, idx); + if (!err) + wdev->wext.default_mgmt_key = idx; + return err; + } + + return 0; + } +} + +int cfg80211_wext_siwencode(struct net_device *dev, + struct iw_request_info *info, + struct iw_point *erq, char *keybuf) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + int idx, err; + bool remove = false; + struct key_params params; + + /* no use -- only MFP (set_default_mgmt_key) is optional */ + if (!rdev->ops->del_key || + !rdev->ops->add_key || + !rdev->ops->set_default_key) + return -EOPNOTSUPP; + + idx = erq->flags & IW_ENCODE_INDEX; + if (idx == 0) { + idx = wdev->wext.default_key; + if (idx < 0) + idx = 0; + } else if (idx < 1 || idx > 4) + return -EINVAL; + else + idx--; + + if (erq->flags & IW_ENCODE_DISABLED) + remove = true; + else if (erq->length == 0) { + /* No key data - just set the default TX key index */ + err = rdev->ops->set_default_key(&rdev->wiphy, dev, idx); + if (!err) + wdev->wext.default_key = idx; + return err; + } + + memset(¶ms, 0, sizeof(params)); + params.key = keybuf; + params.key_len = erq->length; + if (erq->length == 5) + params.cipher = WLAN_CIPHER_SUITE_WEP40; + else if (erq->length == 13) + params.cipher = WLAN_CIPHER_SUITE_WEP104; + else if (!remove) + return -EINVAL; + + return cfg80211_set_encryption(rdev, dev, NULL, remove, + wdev->wext.default_key == -1, + idx, ¶ms); +} +EXPORT_SYMBOL_GPL(cfg80211_wext_siwencode); + +int cfg80211_wext_siwencodeext(struct net_device *dev, + struct iw_request_info *info, + struct iw_point *erq, char *extra) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct iw_encode_ext *ext = (struct iw_encode_ext *) extra; + const u8 *addr; + int idx; + bool remove = false; + struct key_params params; + u32 cipher; + + /* no use -- only MFP (set_default_mgmt_key) is optional */ + if (!rdev->ops->del_key || + !rdev->ops->add_key || + !rdev->ops->set_default_key) + return -EOPNOTSUPP; + + switch (ext->alg) { + case IW_ENCODE_ALG_NONE: + remove = true; + cipher = 0; + break; + case IW_ENCODE_ALG_WEP: + if (ext->key_len == 5) + cipher = WLAN_CIPHER_SUITE_WEP40; + else if (ext->key_len == 13) + cipher = WLAN_CIPHER_SUITE_WEP104; + else + return -EINVAL; + break; + case IW_ENCODE_ALG_TKIP: + cipher = WLAN_CIPHER_SUITE_TKIP; + break; + case IW_ENCODE_ALG_CCMP: + cipher = WLAN_CIPHER_SUITE_CCMP; + break; + case IW_ENCODE_ALG_AES_CMAC: + cipher = WLAN_CIPHER_SUITE_AES_CMAC; + break; + default: + return -EOPNOTSUPP; + } + + if (erq->flags & IW_ENCODE_DISABLED) + remove = true; + + idx = erq->flags & IW_ENCODE_INDEX; + if (cipher == WLAN_CIPHER_SUITE_AES_CMAC) { + if (idx < 4 || idx > 5) { + idx = wdev->wext.default_mgmt_key; + if (idx < 0) + return -EINVAL; + } else + idx--; + } else { + if (idx < 1 || idx > 4) { + idx = wdev->wext.default_key; + if (idx < 0) + return -EINVAL; + } else + idx--; + } + + addr = ext->addr.sa_data; + if (is_broadcast_ether_addr(addr)) + addr = NULL; + + memset(¶ms, 0, sizeof(params)); + params.key = ext->key; + params.key_len = ext->key_len; + params.cipher = cipher; + + return cfg80211_set_encryption( + rdev, dev, addr, remove, + ext->ext_flags & IW_ENCODE_EXT_SET_TX_KEY, + idx, ¶ms); +} +EXPORT_SYMBOL_GPL(cfg80211_wext_siwencodeext); + +struct giwencode_cookie { + size_t buflen; + char *keybuf; +}; + +static void giwencode_get_key_cb(void *cookie, struct key_params *params) +{ + struct giwencode_cookie *data = cookie; + + if (!params->key) { + data->buflen = 0; + return; + } + + data->buflen = min_t(size_t, data->buflen, params->key_len); + memcpy(data->keybuf, params->key, data->buflen); +} + +int cfg80211_wext_giwencode(struct net_device *dev, + struct iw_request_info *info, + struct iw_point *erq, char *keybuf) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + int idx, err; + struct giwencode_cookie data = { + .keybuf = keybuf, + .buflen = erq->length, + }; + + if (!rdev->ops->get_key) + return -EOPNOTSUPP; + + idx = erq->flags & IW_ENCODE_INDEX; + if (idx == 0) { + idx = wdev->wext.default_key; + if (idx < 0) + idx = 0; + } else if (idx < 1 || idx > 4) + return -EINVAL; + else + idx--; + + erq->flags = idx + 1; + + err = rdev->ops->get_key(&rdev->wiphy, dev, idx, NULL, &data, + giwencode_get_key_cb); + if (!err) { + erq->length = data.buflen; + erq->flags |= IW_ENCODE_ENABLED; + return 0; + } + + if (err == -ENOENT) { + erq->flags |= IW_ENCODE_DISABLED; + erq->length = 0; + return 0; + } + + return err; +} +EXPORT_SYMBOL_GPL(cfg80211_wext_giwencode); -- cgit v1.2.3-70-g09d2 From eccb8e8f0c3af47aeb6dbe4012eb8d4fc888767a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 11 May 2009 21:57:56 +0300 Subject: nl80211: improve station flags handling It is currently not possible to modify station flags, but that capability would be very useful. This patch introduces a new nl80211 attribute that contains a set/mask for station flags, and updates the internal API (and mac80211) to mirror that. The new attribute is parsed before falling back to the old so that userspace can specify both (if it can) to work on all kernels. Signed-off-by: Johannes Berg Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- include/linux/nl80211.h | 21 ++++++++++++++++++++- include/net/cfg80211.h | 28 +++++----------------------- net/mac80211/cfg.c | 28 ++++++++++++++++------------ net/wireless/nl80211.c | 38 ++++++++++++++++++++++++++++++-------- 4 files changed, 71 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 58c4ee1822d..aeefccfac0e 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -25,6 +25,8 @@ * */ +#include + /** * DOC: Station handling * @@ -380,7 +382,7 @@ enum nl80211_commands { * * @NL80211_ATTR_STA_AID: Association ID for the station (u16) * @NL80211_ATTR_STA_FLAGS: flags, nested element with NLA_FLAG attributes of - * &enum nl80211_sta_flags. + * &enum nl80211_sta_flags (deprecated, use %NL80211_ATTR_STA_FLAGS2) * @NL80211_ATTR_STA_LISTEN_INTERVAL: listen interval as defined by * IEEE 802.11 7.3.1.6 (u16). * @NL80211_ATTR_STA_SUPPORTED_RATES: supported rates, array of supported @@ -499,6 +501,9 @@ enum nl80211_commands { * this attribute can be used * with %NL80211_CMD_ASSOCIATE request * + * @NL80211_ATTR_STA_FLAGS2: Attribute containing a + * &struct nl80211_sta_flag_update. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -603,6 +608,8 @@ enum nl80211_attrs { NL80211_ATTR_USE_MFP, + NL80211_ATTR_STA_FLAGS2, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -691,6 +698,18 @@ enum nl80211_sta_flags { NL80211_STA_FLAG_MAX = __NL80211_STA_FLAG_AFTER_LAST - 1 }; +/** + * struct nl80211_sta_flag_update - station flags mask/set + * @mask: mask of station flags to set + * @set: which values to set them to + * + * Both mask and set contain bits as per &enum nl80211_sta_flags. + */ +struct nl80211_sta_flag_update { + __u32 mask; + __u32 set; +} __attribute__((packed)); + /** * enum nl80211_rate_info - bitrate information * diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index e69e6c66dd1..0dae6b38294 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -251,27 +251,6 @@ struct beacon_parameters { int head_len, tail_len; }; -/** - * enum station_flags - station flags - * - * Station capability flags. Note that these must be the bits - * according to the nl80211 flags. - * - * @STATION_FLAG_CHANGED: station flags were changed - * @STATION_FLAG_AUTHORIZED: station is authorized to send frames (802.1X) - * @STATION_FLAG_SHORT_PREAMBLE: station is capable of receiving frames - * with short preambles - * @STATION_FLAG_WME: station is WME/QoS capable - * @STATION_FLAG_MFP: station uses management frame protection - */ -enum station_flags { - STATION_FLAG_CHANGED = 1<<0, - STATION_FLAG_AUTHORIZED = 1<sdata; + u32 mask, set; sband = local->hw.wiphy->bands[local->oper_channel->band]; - /* - * FIXME: updating the flags is racy when this function is - * called from ieee80211_change_station(), this will - * be resolved in a future patch. - */ + spin_lock_bh(&sta->lock); + mask = params->sta_flags_mask; + set = params->sta_flags_set; - if (params->station_flags & STATION_FLAG_CHANGED) { - spin_lock_bh(&sta->lock); + if (mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) { sta->flags &= ~WLAN_STA_AUTHORIZED; - if (params->station_flags & STATION_FLAG_AUTHORIZED) + if (set & BIT(NL80211_STA_FLAG_AUTHORIZED)) sta->flags |= WLAN_STA_AUTHORIZED; + } + if (mask & BIT(NL80211_STA_FLAG_SHORT_PREAMBLE)) { sta->flags &= ~WLAN_STA_SHORT_PREAMBLE; - if (params->station_flags & STATION_FLAG_SHORT_PREAMBLE) + if (set & BIT(NL80211_STA_FLAG_SHORT_PREAMBLE)) sta->flags |= WLAN_STA_SHORT_PREAMBLE; + } + if (mask & BIT(NL80211_STA_FLAG_WME)) { sta->flags &= ~WLAN_STA_WME; - if (params->station_flags & STATION_FLAG_WME) + if (set & BIT(NL80211_STA_FLAG_WME)) sta->flags |= WLAN_STA_WME; + } + if (mask & BIT(NL80211_STA_FLAG_MFP)) { sta->flags &= ~WLAN_STA_MFP; - if (params->station_flags & STATION_FLAG_MFP) + if (set & BIT(NL80211_STA_FLAG_MFP)) sta->flags |= WLAN_STA_MFP; - spin_unlock_bh(&sta->lock); } + spin_unlock_bh(&sta->lock); /* * FIXME: updating the following information is racy when this diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 2353ddbf493..66024ef57ba 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -123,6 +123,9 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { [NL80211_ATTR_FREQ_FIXED] = { .type = NLA_FLAG }, [NL80211_ATTR_TIMED_OUT] = { .type = NLA_FLAG }, [NL80211_ATTR_USE_MFP] = { .type = NLA_U32 }, + [NL80211_ATTR_STA_FLAGS2] = { + .len = sizeof(struct nl80211_sta_flag_update), + }, }; /* IE validation */ @@ -1334,13 +1337,33 @@ static const struct nla_policy sta_flags_policy[NL80211_STA_FLAG_MAX + 1] = { [NL80211_STA_FLAG_MFP] = { .type = NLA_FLAG }, }; -static int parse_station_flags(struct nlattr *nla, u32 *staflags) +static int parse_station_flags(struct genl_info *info, + struct station_parameters *params) { struct nlattr *flags[NL80211_STA_FLAG_MAX + 1]; + struct nlattr *nla; int flag; - *staflags = 0; + /* + * Try parsing the new attribute first so userspace + * can specify both for older kernels. + */ + nla = info->attrs[NL80211_ATTR_STA_FLAGS2]; + if (nla) { + struct nl80211_sta_flag_update *sta_flags; + + sta_flags = nla_data(nla); + params->sta_flags_mask = sta_flags->mask; + params->sta_flags_set = sta_flags->set; + if ((params->sta_flags_mask | + params->sta_flags_set) & BIT(__NL80211_STA_FLAG_INVALID)) + return -EINVAL; + return 0; + } + + /* if present, parse the old attribute */ + nla = info->attrs[NL80211_ATTR_STA_FLAGS]; if (!nla) return 0; @@ -1348,11 +1371,12 @@ static int parse_station_flags(struct nlattr *nla, u32 *staflags) nla, sta_flags_policy)) return -EINVAL; - *staflags = STATION_FLAG_CHANGED; + params->sta_flags_mask = (1 << __NL80211_STA_FLAG_AFTER_LAST) - 1; + params->sta_flags_mask &= ~1; for (flag = 1; flag <= NL80211_STA_FLAG_MAX; flag++) if (flags[flag]) - *staflags |= (1<sta_flags_set |= (1<attrs[NL80211_ATTR_HT_CAPABILITY]); - if (parse_station_flags(info->attrs[NL80211_ATTR_STA_FLAGS], - ¶ms.station_flags)) + if (parse_station_flags(info, ¶ms)) return -EINVAL; if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) @@ -1718,8 +1741,7 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) params.ht_capa = nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); - if (parse_station_flags(info->attrs[NL80211_ATTR_STA_FLAGS], - ¶ms.station_flags)) + if (parse_station_flags(info, ¶ms)) return -EINVAL; rtnl_lock(); -- cgit v1.2.3-70-g09d2 From 3f77316c6b99f596bfbf72c0542f47f7230b702e Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Mon, 11 May 2009 21:57:57 +0300 Subject: nl80211: Add IEEE 802.1X PAE control for station mode Add a new NL80211_ATTR_CONTROL_PORT flag for NL80211_CMD_ASSOCIATE to allow user space to indicate that it will control the IEEE 802.1X port in station mode. Previously, mac80211 was always marking the port authorized in station mode. This was enough when drop_unencrypted flag was set. However, drop_unencrypted can currently be controlled only with WEXT and the current nl80211 design does not allow fully secure configuration. Fix this by providing a mechanism for user space to control the IEEE 802.1X port in station mode (i.e., do the same that we are already doing in AP mode). Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- include/linux/nl80211.h | 9 +++++++++ include/net/cfg80211.h | 5 +++++ net/mac80211/cfg.c | 5 +++++ net/mac80211/ieee80211_i.h | 2 +- net/mac80211/mlme.c | 5 +++-- net/mac80211/wext.c | 3 +++ net/wireless/nl80211.c | 3 +++ 7 files changed, 29 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index aeefccfac0e..2781525b03d 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -504,6 +504,13 @@ enum nl80211_commands { * @NL80211_ATTR_STA_FLAGS2: Attribute containing a * &struct nl80211_sta_flag_update. * + * @NL80211_ATTR_CONTROL_PORT: A flag indicating whether user space controls + * IEEE 802.1X port, i.e., sets/clears %NL80211_STA_FLAG_AUTHORIZED, in + * station mode. If the flag is included in %NL80211_CMD_ASSOCIATE + * request, the driver will assume that the port is unauthorized until + * authorized by user space. Otherwise, port is marked authorized by + * default in station mode. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -610,6 +617,8 @@ enum nl80211_attrs { NL80211_ATTR_STA_FLAGS2, + NL80211_ATTR_CONTROL_PORT, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0dae6b38294..9e17a83d343 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -655,6 +655,10 @@ struct cfg80211_auth_request { * @ie: Extra IEs to add to (Re)Association Request frame or %NULL * @ie_len: Length of ie buffer in octets * @use_mfp: Use management frame protection (IEEE 802.11w) in this association + * @control_port: Whether user space controls IEEE 802.1X port, i.e., + * sets/clears %NL80211_STA_FLAG_AUTHORIZED. If true, the driver is + * required to assume that the port is unauthorized until authorized by + * user space. Otherwise, port is marked authorized by default. */ struct cfg80211_assoc_request { struct ieee80211_channel *chan; @@ -664,6 +668,7 @@ struct cfg80211_assoc_request { const u8 *ie; size_t ie_len; bool use_mfp; + bool control_port; }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index d591a936f5c..6464bfd232c 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1265,6 +1265,11 @@ static int ieee80211_assoc(struct wiphy *wiphy, struct net_device *dev, sdata->u.mgd.flags &= ~IEEE80211_STA_MFP_ENABLED; } + if (req->control_port) + sdata->u.mgd.flags |= IEEE80211_STA_CONTROL_PORT; + else + sdata->u.mgd.flags &= ~IEEE80211_STA_CONTROL_PORT; + sdata->u.mgd.flags |= IEEE80211_STA_EXT_SME; sdata->u.mgd.state = IEEE80211_STA_MLME_ASSOCIATE; ieee80211_sta_req_auth(sdata); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 081c5742730..56a49ef446c 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -235,7 +235,7 @@ struct mesh_preq_queue { #define IEEE80211_STA_ASSOCIATED BIT(4) #define IEEE80211_STA_PROBEREQ_POLL BIT(5) #define IEEE80211_STA_CREATE_IBSS BIT(6) -/* hole at 7, please re-use */ +#define IEEE80211_STA_CONTROL_PORT BIT(7) #define IEEE80211_STA_WMM_ENABLED BIT(8) /* hole at 9, please re-use */ #define IEEE80211_STA_AUTO_SSID_SEL BIT(10) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 6d00e3f738c..2806f6af7ae 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1581,8 +1581,9 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, * to between the sta_info_alloc() and sta_info_insert() above. */ - set_sta_flags(sta, WLAN_STA_AUTH | WLAN_STA_ASSOC | WLAN_STA_ASSOC_AP | - WLAN_STA_AUTHORIZED); + set_sta_flags(sta, WLAN_STA_AUTH | WLAN_STA_ASSOC | WLAN_STA_ASSOC_AP); + if (!(ifmgd->flags & IEEE80211_STA_CONTROL_PORT)) + set_sta_flags(sta, WLAN_STA_AUTHORIZED); rates = 0; basic_rates = 0; diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index d8450264468..c14394744a9 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -41,6 +41,7 @@ static int ieee80211_ioctl_siwgenie(struct net_device *dev, return ret; sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL; sdata->u.mgd.flags &= ~IEEE80211_STA_EXT_SME; + sdata->u.mgd.flags &= ~IEEE80211_STA_CONTROL_PORT; ieee80211_sta_req_auth(sdata); return 0; } @@ -124,6 +125,7 @@ static int ieee80211_ioctl_siwessid(struct net_device *dev, return ret; sdata->u.mgd.flags &= ~IEEE80211_STA_EXT_SME; + sdata->u.mgd.flags &= ~IEEE80211_STA_CONTROL_PORT; ieee80211_sta_req_auth(sdata); return 0; } @@ -181,6 +183,7 @@ static int ieee80211_ioctl_siwap(struct net_device *dev, if (ret) return ret; sdata->u.mgd.flags &= ~IEEE80211_STA_EXT_SME; + sdata->u.mgd.flags &= ~IEEE80211_STA_CONTROL_PORT; ieee80211_sta_req_auth(sdata); return 0; } else if (sdata->vif.type == NL80211_IFTYPE_WDS) { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 66024ef57ba..cad281390cf 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -126,6 +126,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { [NL80211_ATTR_STA_FLAGS2] = { .len = sizeof(struct nl80211_sta_flag_update), }, + [NL80211_ATTR_CONTROL_PORT] = { .type = NLA_FLAG }, }; /* IE validation */ @@ -3040,6 +3041,8 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) } } + req.control_port = info->attrs[NL80211_ATTR_CONTROL_PORT]; + err = drv->ops->assoc(&drv->wiphy, dev, &req); out: -- cgit v1.2.3-70-g09d2 From faa8fdc85347cc76d87b43ea718785661c54f656 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Mon, 11 May 2009 21:57:58 +0300 Subject: nl80211: Add RSC configuration for new keys When setting a key with NL80211_CMD_NEW_KEY, we should allow the key sequence number (RSC) to be set in order to allow replay protection to work correctly for group keys. This patch documents this use for nl80211 and adds the couple of missing pieces in nl80211/cfg80211 and mac80211 to support this. In addition, WEXT SIOCSIWENCODEEXT compat processing in cfg80211 is extended to handle the RSC (this was already specified in WEXT, but just not implemented in cfg80211/mac80211). Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- include/linux/nl80211.h | 4 ++-- net/mac80211/cfg.c | 3 ++- net/mac80211/key.c | 21 ++++++++++++++++++++- net/mac80211/key.h | 3 ++- net/wireless/nl80211.c | 5 +++++ net/wireless/wext-compat.c | 5 +++++ 6 files changed, 36 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 2781525b03d..dbea93b694e 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -79,8 +79,8 @@ * @NL80211_CMD_SET_KEY: Set key attributes %NL80211_ATTR_KEY_DEFAULT, * %NL80211_ATTR_KEY_DEFAULT_MGMT, or %NL80211_ATTR_KEY_THRESHOLD. * @NL80211_CMD_NEW_KEY: add a key with given %NL80211_ATTR_KEY_DATA, - * %NL80211_ATTR_KEY_IDX, %NL80211_ATTR_MAC and %NL80211_ATTR_KEY_CIPHER - * attributes. + * %NL80211_ATTR_KEY_IDX, %NL80211_ATTR_MAC, %NL80211_ATTR_KEY_CIPHER, + * and %NL80211_ATTR_KEY_SEQ attributes. * @NL80211_CMD_DEL_KEY: delete a key identified by %NL80211_ATTR_KEY_IDX * or %NL80211_ATTR_MAC. * diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 6464bfd232c..77e9ff5ec4f 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -141,7 +141,8 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, return -EINVAL; } - key = ieee80211_key_alloc(alg, key_idx, params->key_len, params->key); + key = ieee80211_key_alloc(alg, key_idx, params->key_len, params->key, + params->seq_len, params->seq); if (!key) return -ENOMEM; diff --git a/net/mac80211/key.c b/net/mac80211/key.c index b7e1350273b..827ea8e6ee0 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -290,9 +290,11 @@ static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, int idx, size_t key_len, - const u8 *key_data) + const u8 *key_data, + size_t seq_len, const u8 *seq) { struct ieee80211_key *key; + int i, j; BUG_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS); @@ -318,14 +320,31 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, case ALG_TKIP: key->conf.iv_len = TKIP_IV_LEN; key->conf.icv_len = TKIP_ICV_LEN; + if (seq && seq_len == 6) { + for (i = 0; i < NUM_RX_DATA_QUEUES; i++) { + key->u.tkip.rx[i].iv32 = + get_unaligned_le32(&seq[2]); + key->u.tkip.rx[i].iv16 = + get_unaligned_le16(seq); + } + } break; case ALG_CCMP: key->conf.iv_len = CCMP_HDR_LEN; key->conf.icv_len = CCMP_MIC_LEN; + if (seq && seq_len == CCMP_PN_LEN) { + for (i = 0; i < NUM_RX_DATA_QUEUES; i++) + for (j = 0; j < CCMP_PN_LEN; j++) + key->u.ccmp.rx_pn[i][j] = + seq[CCMP_PN_LEN - j - 1]; + } break; case ALG_AES_CMAC: key->conf.iv_len = 0; key->conf.icv_len = sizeof(struct ieee80211_mmie); + if (seq && seq_len == 6) + for (j = 0; j < 6; j++) + key->u.aes_cmac.rx_pn[j] = seq[6 - j - 1]; break; } memcpy(key->conf.key, key_data, key_len); diff --git a/net/mac80211/key.h b/net/mac80211/key.h index 215d3ef42a4..9572e00f532 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -144,7 +144,8 @@ struct ieee80211_key { struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, int idx, size_t key_len, - const u8 *key_data); + const u8 *key_data, + size_t seq_len, const u8 *seq); /* * Insert a key into data structures (sdata, sta if necessary) * to make it used, free old key. diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index cad281390cf..f0fec2f4982 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1115,6 +1115,11 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info) params.key_len = nla_len(info->attrs[NL80211_ATTR_KEY_DATA]); } + if (info->attrs[NL80211_ATTR_KEY_SEQ]) { + params.seq = nla_data(info->attrs[NL80211_ATTR_KEY_SEQ]); + params.seq_len = nla_len(info->attrs[NL80211_ATTR_KEY_SEQ]); + } + if (info->attrs[NL80211_ATTR_KEY_IDX]) key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]); diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index ffc98a8d6e5..f98090b90fb 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -663,6 +663,11 @@ int cfg80211_wext_siwencodeext(struct net_device *dev, params.key_len = ext->key_len; params.cipher = cipher; + if (ext->ext_flags & IW_ENCODE_EXT_RX_SEQ_VALID) { + params.seq = ext->rx_seq; + params.seq_len = 6; + } + return cfg80211_set_encryption( rdev, dev, addr, remove, ext->ext_flags & IW_ENCODE_EXT_SET_TX_KEY, -- cgit v1.2.3-70-g09d2 From d34c43078236b41146877c49af341ab85b5fb8db Mon Sep 17 00:00:00 2001 From: Jon Smirl Date: Wed, 13 May 2009 21:59:14 -0400 Subject: ASoC: Add SNDRV_PCM_FMTBIT_S32_BE as a valid AC97 format Signed-off-by: Jon Smirl Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index 496dc30457b..352d7eee9b6 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -79,7 +79,8 @@ struct snd_pcm_substream; #define SND_SOC_CLOCK_OUT 1 #define SND_SOC_STD_AC97_FMTS (SNDRV_PCM_FMTBIT_S16_LE |\ - SNDRV_PCM_FMTBIT_S32_LE) + SNDRV_PCM_FMTBIT_S32_LE |\ + SNDRV_PCM_FMTBIT_S32_BE) struct snd_soc_dai_ops; struct snd_soc_dai; -- cgit v1.2.3-70-g09d2 From 9fc20f030ba457d20eb994e1def7e2ce7d5ae1a8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 14 May 2009 15:14:18 +0200 Subject: ALSA: ctxfi - Move PCI ID definitions to linux/pci_ids.h Signed-off-by: Takashi Iwai --- include/linux/pci_ids.h | 7 +++++++ sound/pci/ctxfi/ctatc.c | 17 +++++++++-------- sound/pci/ctxfi/ctdrv.h | 30 ------------------------------ sound/pci/ctxfi/xfi.c | 6 +++--- 4 files changed, 19 insertions(+), 41 deletions(-) delete mode 100644 sound/pci/ctxfi/ctdrv.h (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 06ba90c211a..61915313898 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1314,6 +1314,13 @@ #define PCI_VENDOR_ID_CREATIVE 0x1102 /* duplicate: ECTIVA */ #define PCI_DEVICE_ID_CREATIVE_EMU10K1 0x0002 +#define PCI_DEVICE_ID_CREATIVE_20K1 0x0005 +#define PCI_DEVICE_ID_CREATIVE_20K2 0x000b +#define PCI_SUBDEVICE_ID_CREATIVE_SB0760 0x0024 +#define PCI_SUBDEVICE_ID_CREATIVE_SB08801 0x0041 +#define PCI_SUBDEVICE_ID_CREATIVE_SB08802 0x0042 +#define PCI_SUBDEVICE_ID_CREATIVE_SB08803 0x0043 +#define PCI_SUBDEVICE_ID_CREATIVE_HENDRIX 0x6000 #define PCI_VENDOR_ID_ECTIVA 0x1102 /* duplicate: CREATIVE */ #define PCI_DEVICE_ID_ECTIVA_EV1938 0x8938 diff --git a/sound/pci/ctxfi/ctatc.c b/sound/pci/ctxfi/ctatc.c index 5f35374863f..073fe2a59da 100644 --- a/sound/pci/ctxfi/ctatc.c +++ b/sound/pci/ctxfi/ctatc.c @@ -18,7 +18,6 @@ #include "ctatc.h" #include "ctpcm.h" #include "ctmixer.h" -#include "ctdrv.h" #include "cthardware.h" #include "ctsrc.h" #include "ctamixer.h" @@ -40,23 +39,25 @@ | ((IEC958_AES3_CON_FS_48000) << 24)) static const struct ct_atc_chip_sub_details atc_sub_details[NUM_CTCARDS] = { - [CTSB0760] = {.subsys = PCI_SUBSYS_CREATIVE_SB0760, + [CTSB0760] = {.subsys = PCI_SUBDEVICE_ID_CREATIVE_SB0760, .nm_model = "SB076x"}, - [CTHENDRIX] = {.subsys = PCI_SUBSYS_CREATIVE_HENDRIX, + [CTHENDRIX] = {.subsys = PCI_SUBDEVICE_ID_CREATIVE_HENDRIX, .nm_model = "Hendrix"}, - [CTSB08801] = {.subsys = PCI_SUBSYS_CREATIVE_SB08801, + [CTSB08801] = {.subsys = PCI_SUBDEVICE_ID_CREATIVE_SB08801, .nm_model = "SB0880"}, - [CTSB08802] = {.subsys = PCI_SUBSYS_CREATIVE_SB08802, + [CTSB08802] = {.subsys = PCI_SUBDEVICE_ID_CREATIVE_SB08802, .nm_model = "SB0880"}, - [CTSB08803] = {.subsys = PCI_SUBSYS_CREATIVE_SB08803, + [CTSB08803] = {.subsys = PCI_SUBDEVICE_ID_CREATIVE_SB08803, .nm_model = "SB0880"} }; static struct ct_atc_chip_details atc_chip_details[] = { - {.vendor = PCI_VENDOR_CREATIVE, .device = PCI_DEVICE_CREATIVE_20K1, + {.vendor = PCI_VENDOR_ID_CREATIVE, + .device = PCI_DEVICE_ID_CREATIVE_20K1, .sub_details = NULL, .nm_card = "X-Fi 20k1"}, - {.vendor = PCI_VENDOR_CREATIVE, .device = PCI_DEVICE_CREATIVE_20K2, + {.vendor = PCI_VENDOR_ID_CREATIVE, + .device = PCI_DEVICE_ID_CREATIVE_20K2, .sub_details = atc_sub_details, .nm_card = "X-Fi 20k2"}, {} /* terminator */ diff --git a/sound/pci/ctxfi/ctdrv.h b/sound/pci/ctxfi/ctdrv.h deleted file mode 100644 index f776a44f52c..00000000000 --- a/sound/pci/ctxfi/ctdrv.h +++ /dev/null @@ -1,30 +0,0 @@ -/** - * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved. - * - * This source file is released under GPL v2 license (no other versions). - * See the COPYING file included in the main directory of this source - * distribution for the license terms and conditions. - * - * @file ctdrv.h - * - * @breaf - * This file contains the definition of card IDs supported by this driver. - * - * @author Liu Chun - * - */ - -#ifndef CTDRV_H -#define CTDRV_H - -#define PCI_VENDOR_CREATIVE 0x1102 -#define PCI_DEVICE_CREATIVE_20K1 0x0005 -#define PCI_DEVICE_CREATIVE_20K2 0x000B -#define PCI_SUBVENDOR_CREATIVE 0x1102 -#define PCI_SUBSYS_CREATIVE_SB0760 0x0024 -#define PCI_SUBSYS_CREATIVE_SB08801 0x0041 -#define PCI_SUBSYS_CREATIVE_SB08802 0x0042 -#define PCI_SUBSYS_CREATIVE_SB08803 0x0043 -#define PCI_SUBSYS_CREATIVE_HENDRIX 0x6000 - -#endif /* CTDRV_H */ diff --git a/sound/pci/ctxfi/xfi.c b/sound/pci/ctxfi/xfi.c index f9114403651..b7368fded53 100644 --- a/sound/pci/ctxfi/xfi.c +++ b/sound/pci/ctxfi/xfi.c @@ -11,10 +11,10 @@ #include #include #include +#include #include #include #include "ctatc.h" -#include "ctdrv.h" MODULE_AUTHOR("Creative Technology Ltd"); MODULE_DESCRIPTION("X-Fi driver version 1.03"); @@ -32,8 +32,8 @@ static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP; static struct pci_device_id ct_pci_dev_ids[] = { /* only X-Fi is supported, so... */ - { PCI_DEVICE(PCI_VENDOR_CREATIVE, PCI_DEVICE_CREATIVE_20K1) }, - { PCI_DEVICE(PCI_VENDOR_CREATIVE, PCI_DEVICE_CREATIVE_20K2) }, + { PCI_DEVICE(PCI_VENDOR_ID_CREATIVE, PCI_DEVICE_ID_CREATIVE_20K1) }, + { PCI_DEVICE(PCI_VENDOR_ID_CREATIVE, PCI_DEVICE_ID_CREATIVE_20K2) }, { 0, } }; MODULE_DEVICE_TABLE(pci, ct_pci_dev_ids); -- cgit v1.2.3-70-g09d2 From 077e6dba20e74a455a0452379d2a965c7e1b01ad Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 1 May 2009 21:21:02 +0200 Subject: ide-atapi: switch to rq->resid_len Now that we have rq->resid_len, use it to account partial completion amount during the lifetime of an rq, decrementing it on each successful transfer. As a result, get rid of now unused pc->xferred. While at it, remove noisy debug call in ide_prep_sense. Signed-off-by: Borislav Petkov --- drivers/ide/ide-atapi.c | 19 ++++++++----------- drivers/ide/ide-tape.c | 12 +++++------- include/linux/ide.h | 2 -- 3 files changed, 13 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index 7129495b3e4..1022e421abd 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -172,8 +172,6 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq) unsigned int cmd_len, sense_len; int err; - debug_log("%s: enter\n", __func__); - switch (drive->media) { case ide_floppy: cmd_len = 255; @@ -370,7 +368,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) ? "write" : "read"); pc->flags |= PC_FLAG_DMA_ERROR; } else - pc->xferred = blk_rq_bytes(rq); + rq->resid_len = 0; debug_log("%s: DMA finished\n", drive->name); } @@ -379,7 +377,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) int uptodate, error; debug_log("Packet command completed, %d bytes transferred\n", - pc->xferred); + blk_rq_bytes(rq)); pc->flags &= ~PC_FLAG_DMA_IN_PROGRESS; @@ -467,15 +465,15 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) ide_pio_bytes(drive, cmd, write, done); /* Update transferred byte count */ - pc->xferred += done; + rq->resid_len -= done; bcount -= done; if (bcount) ide_pad_transfer(drive, write, bcount); - debug_log("[cmd %x] transferred %d bytes, padded %d bytes\n", - rq->cmd[0], done, bcount); + debug_log("[cmd %x] transferred %d bytes, padded %d bytes, resid: %u\n", + rq->cmd[0], done, bcount, rq->resid_len); /* And set the interrupt handler again */ ide_set_handler(drive, ide_pc_intr, timeout); @@ -643,16 +641,15 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, struct ide_cmd *cmd) } else { pc = drive->pc; - /* We haven't transferred any data yet */ - pc->xferred = 0; - valid_tf = IDE_VALID_DEVICE; bytes = blk_rq_bytes(rq); - bcount = ((drive->media == ide_tape) ? bytes : min_t(unsigned int, bytes, 63 * 1024)); + /* We haven't transferred any data yet */ + rq->resid_len = bcount; + if (pc->flags & PC_FLAG_DMA_ERROR) { pc->flags &= ~PC_FLAG_DMA_ERROR; ide_dma_off(drive); diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index aaeef12f80a..c9337099797 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -301,11 +301,9 @@ static void idetape_analyze_error(ide_drive_t *drive, u8 *sense) debug_log(DBG_ERR, "pc = %x, sense key = %x, asc = %x, ascq = %x\n", pc->c[0], tape->sense_key, tape->asc, tape->ascq); - /* Correct pc->xferred by asking the tape. */ + /* correct remaining bytes to transfer */ if (pc->flags & PC_FLAG_DMA_ERROR) - pc->xferred = blk_rq_bytes(rq) - - tape->blk_size * - get_unaligned_be32(&sense[3]); + rq->resid_len = tape->blk_size * get_unaligned_be32(&sense[3]); /* * If error was the result of a zero-length read or write command, @@ -339,7 +337,7 @@ static void idetape_analyze_error(ide_drive_t *drive, u8 *sense) pc->flags |= PC_FLAG_ABORT; } if (!(pc->flags & PC_FLAG_ABORT) && - pc->xferred) + (blk_rq_bytes(rq) - rq->resid_len)) pc->retries = IDETAPE_MAX_PC_RETRIES + 1; } } @@ -369,7 +367,8 @@ static int ide_tape_callback(ide_drive_t *drive, int dsc) printk(KERN_ERR "ide-tape: Error in REQUEST SENSE " "itself - Aborting request!\n"); } else if (pc->c[0] == READ_6 || pc->c[0] == WRITE_6) { - int blocks = pc->xferred / tape->blk_size; + unsigned int blocks = + (blk_rq_bytes(rq) - rq->resid_len) / tape->blk_size; tape->avg_size += blocks * tape->blk_size; @@ -381,7 +380,6 @@ static int ide_tape_callback(ide_drive_t *drive, int dsc) } tape->first_frame += blocks; - rq->resid_len = blk_rq_bytes(rq) - blocks * tape->blk_size; if (pc->error) { uptodate = 0; diff --git a/include/linux/ide.h b/include/linux/ide.h index 34c128f0a33..745a393af27 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -357,8 +357,6 @@ struct ide_atapi_pc { /* bytes to transfer */ int req_xfer; - /* bytes actually transferred */ - int xferred; /* data buffer */ u8 *buf; -- cgit v1.2.3-70-g09d2 From 5a0e43b5e2ee9a295f864c38f0e853b1a4fc3892 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 1 May 2009 23:27:11 +0200 Subject: ide-atapi: add a len-parameter to ide_queue_pc_tail This is in preparation for removing ide_atapi_pc. There should be no functional change resulting from this patch. Signed-off-by: Borislav Petkov --- drivers/ide/ide-atapi.c | 12 ++++++------ drivers/ide/ide-floppy.c | 4 ++-- drivers/ide/ide-floppy_ioctl.c | 8 ++++---- drivers/ide/ide-tape.c | 26 +++++++++++++------------- include/linux/ide.h | 3 ++- 5 files changed, 27 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index 1022e421abd..0d4da2c1adc 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -84,7 +84,7 @@ EXPORT_SYMBOL_GPL(ide_init_pc); * and wait for it to be serviced. */ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk, - struct ide_atapi_pc *pc) + struct ide_atapi_pc *pc, unsigned int bufflen) { struct request *rq; int error; @@ -93,8 +93,8 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk, rq->cmd_type = REQ_TYPE_SPECIAL; rq->special = (char *)pc; - if (pc->req_xfer) { - error = blk_rq_map_kern(drive->queue, rq, pc->buf, pc->req_xfer, + if (bufflen) { + error = blk_rq_map_kern(drive->queue, rq, pc->buf, bufflen, GFP_NOIO); if (error) goto put_req; @@ -117,7 +117,7 @@ int ide_do_test_unit_ready(ide_drive_t *drive, struct gendisk *disk) ide_init_pc(&pc); pc.c[0] = TEST_UNIT_READY; - return ide_queue_pc_tail(drive, disk, &pc); + return ide_queue_pc_tail(drive, disk, &pc, 0); } EXPORT_SYMBOL_GPL(ide_do_test_unit_ready); @@ -132,7 +132,7 @@ int ide_do_start_stop(ide_drive_t *drive, struct gendisk *disk, int start) if (drive->media == ide_tape) pc.flags |= PC_FLAG_WAIT_FOR_DSC; - return ide_queue_pc_tail(drive, disk, &pc); + return ide_queue_pc_tail(drive, disk, &pc, 0); } EXPORT_SYMBOL_GPL(ide_do_start_stop); @@ -147,7 +147,7 @@ int ide_set_media_lock(ide_drive_t *drive, struct gendisk *disk, int on) pc.c[0] = ALLOW_MEDIUM_REMOVAL; pc.c[4] = on; - return ide_queue_pc_tail(drive, disk, &pc); + return ide_queue_pc_tail(drive, disk, &pc, 0); } EXPORT_SYMBOL_GPL(ide_set_media_lock); diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index a1c55985d4a..5df00d4ab8d 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -318,7 +318,7 @@ static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive, ide_floppy_create_mode_sense_cmd(pc, IDEFLOPPY_FLEXIBLE_DISK_PAGE); - if (ide_queue_pc_tail(drive, disk, pc)) { + if (ide_queue_pc_tail(drive, disk, pc, pc->req_xfer)) { printk(KERN_ERR PFX "Can't get flexible disk page params\n"); return 1; } @@ -390,7 +390,7 @@ static int ide_floppy_get_capacity(ide_drive_t *drive) pc.buf = &pc_buf[0]; pc.buf_size = sizeof(pc_buf); - if (ide_queue_pc_tail(drive, disk, &pc)) { + if (ide_queue_pc_tail(drive, disk, &pc, pc.req_xfer)) { printk(KERN_ERR PFX "Can't get floppy parameters\n"); return 1; } diff --git a/drivers/ide/ide-floppy_ioctl.c b/drivers/ide/ide-floppy_ioctl.c index cd8a42027ed..75f1d50276a 100644 --- a/drivers/ide/ide-floppy_ioctl.c +++ b/drivers/ide/ide-floppy_ioctl.c @@ -50,7 +50,7 @@ static int ide_floppy_get_format_capacities(ide_drive_t *drive, pc->buf = &pc_buf[0]; pc->buf_size = sizeof(pc_buf); - if (ide_queue_pc_tail(drive, floppy->disk, pc)) { + if (ide_queue_pc_tail(drive, floppy->disk, pc, pc->req_xfer)) { printk(KERN_ERR "ide-floppy: Can't get floppy parameters\n"); return -EIO; } @@ -124,7 +124,7 @@ static int ide_floppy_get_sfrp_bit(ide_drive_t *drive, struct ide_atapi_pc *pc) ide_floppy_create_mode_sense_cmd(pc, IDEFLOPPY_CAPABILITIES_PAGE); pc->flags |= PC_FLAG_SUPPRESS_ERROR; - if (ide_queue_pc_tail(drive, floppy->disk, pc)) + if (ide_queue_pc_tail(drive, floppy->disk, pc, pc->req_xfer)) return 1; if (pc->buf[8 + 2] & 0x40) @@ -172,7 +172,7 @@ static int ide_floppy_format_unit(ide_drive_t *drive, struct ide_atapi_pc *pc, ide_floppy_get_sfrp_bit(drive, pc); ide_floppy_create_format_unit_cmd(pc, blocks, length, flags); - if (ide_queue_pc_tail(drive, floppy->disk, pc)) + if (ide_queue_pc_tail(drive, floppy->disk, pc, pc->req_xfer)) err = -EIO; out: @@ -200,7 +200,7 @@ static int ide_floppy_get_format_progress(ide_drive_t *drive, if (drive->atapi_flags & IDE_AFLAG_SRFP) { ide_create_request_sense_cmd(drive, pc); - if (ide_queue_pc_tail(drive, floppy->disk, pc)) + if (ide_queue_pc_tail(drive, floppy->disk, pc, pc->req_xfer)) return -EIO; if (floppy->sense_key == 2 && diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index c9337099797..f09a263b72f 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -770,7 +770,7 @@ static int idetape_flush_tape_buffers(ide_drive_t *drive) int rc; idetape_create_write_filemark_cmd(drive, &pc, 0); - rc = ide_queue_pc_tail(drive, tape->disk, &pc); + rc = ide_queue_pc_tail(drive, tape->disk, &pc, pc.req_xfer); if (rc) return rc; idetape_wait_ready(drive, 60 * 5 * HZ); @@ -793,7 +793,7 @@ static int idetape_read_position(ide_drive_t *drive) debug_log(DBG_PROCS, "Enter %s\n", __func__); idetape_create_read_position_cmd(&pc); - if (ide_queue_pc_tail(drive, tape->disk, &pc)) + if (ide_queue_pc_tail(drive, tape->disk, &pc, pc.req_xfer)) return -1; position = tape->first_frame; return position; @@ -846,12 +846,12 @@ static int idetape_position_tape(ide_drive_t *drive, unsigned int block, __ide_tape_discard_merge_buffer(drive); idetape_wait_ready(drive, 60 * 5 * HZ); idetape_create_locate_cmd(drive, &pc, block, partition, skip); - retval = ide_queue_pc_tail(drive, disk, &pc); + retval = ide_queue_pc_tail(drive, disk, &pc, pc.req_xfer); if (retval) return (retval); idetape_create_read_position_cmd(&pc); - return ide_queue_pc_tail(drive, disk, &pc); + return ide_queue_pc_tail(drive, disk, &pc, pc.req_xfer); } static void ide_tape_discard_merge_buffer(ide_drive_t *drive, @@ -1047,12 +1047,12 @@ static int idetape_rewind_tape(ide_drive_t *drive) debug_log(DBG_SENSE, "Enter %s\n", __func__); idetape_create_rewind_cmd(drive, &pc); - retval = ide_queue_pc_tail(drive, disk, &pc); + retval = ide_queue_pc_tail(drive, disk, &pc, pc.req_xfer); if (retval) return retval; idetape_create_read_position_cmd(&pc); - retval = ide_queue_pc_tail(drive, disk, &pc); + retval = ide_queue_pc_tail(drive, disk, &pc, pc.req_xfer); if (retval) return retval; return 0; @@ -1120,7 +1120,7 @@ static int idetape_space_over_filemarks(ide_drive_t *drive, short mt_op, case MTBSF: idetape_create_space_cmd(&pc, mt_count - count, IDETAPE_SPACE_OVER_FILEMARK); - return ide_queue_pc_tail(drive, disk, &pc); + return ide_queue_pc_tail(drive, disk, &pc, pc.req_xfer); case MTFSFM: case MTBSFM: if (!sprev) @@ -1259,7 +1259,7 @@ static int idetape_write_filemark(ide_drive_t *drive) /* Write a filemark */ idetape_create_write_filemark_cmd(drive, &pc, 1); - if (ide_queue_pc_tail(drive, tape->disk, &pc)) { + if (ide_queue_pc_tail(drive, tape->disk, &pc, pc.req_xfer)) { printk(KERN_ERR "ide-tape: Couldn't write a filemark\n"); return -EIO; } @@ -1344,11 +1344,11 @@ static int idetape_mtioctop(ide_drive_t *drive, short mt_op, int mt_count) IDETAPE_LU_RETENSION_MASK | IDETAPE_LU_LOAD_MASK); case MTEOM: idetape_create_space_cmd(&pc, 0, IDETAPE_SPACE_TO_EOD); - return ide_queue_pc_tail(drive, disk, &pc); + return ide_queue_pc_tail(drive, disk, &pc, pc.req_xfer); case MTERASE: (void)idetape_rewind_tape(drive); idetape_create_erase_cmd(&pc); - return ide_queue_pc_tail(drive, disk, &pc); + return ide_queue_pc_tail(drive, disk, &pc, pc.req_xfer); case MTSETBLK: if (mt_count) { if (mt_count < tape->blk_size || @@ -1457,7 +1457,7 @@ static void ide_tape_get_bsize_from_bdesc(ide_drive_t *drive) struct ide_atapi_pc pc; idetape_create_mode_sense_cmd(&pc, IDETAPE_BLOCK_DESCRIPTOR); - if (ide_queue_pc_tail(drive, tape->disk, &pc)) { + if (ide_queue_pc_tail(drive, tape->disk, &pc, pc.req_xfer)) { printk(KERN_ERR "ide-tape: Can't get block descriptor\n"); if (tape->blk_size == 0) { printk(KERN_WARNING "ide-tape: Cannot deal with zero " @@ -1613,7 +1613,7 @@ static void idetape_get_inquiry_results(ide_drive_t *drive) pc.buf = &pc_buf[0]; pc.buf_size = sizeof(pc_buf); - if (ide_queue_pc_tail(drive, tape->disk, &pc)) { + if (ide_queue_pc_tail(drive, tape->disk, &pc, pc.req_xfer)) { printk(KERN_ERR "ide-tape: %s: can't get INQUIRY results\n", tape->name); return; @@ -1642,7 +1642,7 @@ static void idetape_get_mode_sense_results(ide_drive_t *drive) u8 speed, max_speed; idetape_create_mode_sense_cmd(&pc, IDETAPE_CAPABILITIES_PAGE); - if (ide_queue_pc_tail(drive, tape->disk, &pc)) { + if (ide_queue_pc_tail(drive, tape->disk, &pc, pc.req_xfer)) { printk(KERN_ERR "ide-tape: Can't get tape parameters - assuming" " some default values\n"); tape->blk_size = 512; diff --git a/include/linux/ide.h b/include/linux/ide.h index 745a393af27..7e15bd1eaae 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1160,7 +1160,8 @@ enum { REQ_IDETAPE_WRITE = (1 << 3), }; -int ide_queue_pc_tail(ide_drive_t *, struct gendisk *, struct ide_atapi_pc *); +int ide_queue_pc_tail(ide_drive_t *, struct gendisk *, struct ide_atapi_pc *, + unsigned int); int ide_do_test_unit_ready(ide_drive_t *, struct gendisk *); int ide_do_start_stop(ide_drive_t *, struct gendisk *, int); -- cgit v1.2.3-70-g09d2 From b13345f39dadbabdabaf8819cf6df26913da9e8d Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 2 May 2009 10:26:12 +0200 Subject: ide-atapi: add a buffer-arg to ide_queue_pc_tail This is in preparation of removing ide_atapi_pc. Expose the buffer as an argument to ide_queue_pc_tail with later replacing it with local buffer or even kmalloc'ed one if needed due to stack usage constraints. Also, add the possibility of passing a NULL-ptr buffer for cmds which don't transfer data besides the cdb. While at it, switch to local buffer in idetape_get_mode_sense_results(). There should be no functional change resulting from this patch. Signed-off-by: Borislav Petkov --- drivers/ide/ide-atapi.c | 12 ++++++------ drivers/ide/ide-floppy.c | 17 ++++++++--------- drivers/ide/ide-floppy_ioctl.c | 16 ++++++++-------- drivers/ide/ide-tape.c | 37 ++++++++++++++++++------------------- include/linux/ide.h | 2 +- 5 files changed, 41 insertions(+), 43 deletions(-) (limited to 'include') diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index 0d4da2c1adc..b12be1f17f1 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -84,7 +84,7 @@ EXPORT_SYMBOL_GPL(ide_init_pc); * and wait for it to be serviced. */ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk, - struct ide_atapi_pc *pc, unsigned int bufflen) + struct ide_atapi_pc *pc, void *buf, unsigned int bufflen) { struct request *rq; int error; @@ -93,8 +93,8 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk, rq->cmd_type = REQ_TYPE_SPECIAL; rq->special = (char *)pc; - if (bufflen) { - error = blk_rq_map_kern(drive->queue, rq, pc->buf, bufflen, + if (buf && bufflen) { + error = blk_rq_map_kern(drive->queue, rq, buf, bufflen, GFP_NOIO); if (error) goto put_req; @@ -117,7 +117,7 @@ int ide_do_test_unit_ready(ide_drive_t *drive, struct gendisk *disk) ide_init_pc(&pc); pc.c[0] = TEST_UNIT_READY; - return ide_queue_pc_tail(drive, disk, &pc, 0); + return ide_queue_pc_tail(drive, disk, &pc, NULL, 0); } EXPORT_SYMBOL_GPL(ide_do_test_unit_ready); @@ -132,7 +132,7 @@ int ide_do_start_stop(ide_drive_t *drive, struct gendisk *disk, int start) if (drive->media == ide_tape) pc.flags |= PC_FLAG_WAIT_FOR_DSC; - return ide_queue_pc_tail(drive, disk, &pc, 0); + return ide_queue_pc_tail(drive, disk, &pc, NULL, 0); } EXPORT_SYMBOL_GPL(ide_do_start_stop); @@ -147,7 +147,7 @@ int ide_set_media_lock(ide_drive_t *drive, struct gendisk *disk, int on) pc.c[0] = ALLOW_MEDIUM_REMOVAL; pc.c[4] = on; - return ide_queue_pc_tail(drive, disk, &pc, 0); + return ide_queue_pc_tail(drive, disk, &pc, NULL, 0); } EXPORT_SYMBOL_GPL(ide_set_media_lock); diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index 5df00d4ab8d..be21cf23f8c 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -318,7 +318,7 @@ static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive, ide_floppy_create_mode_sense_cmd(pc, IDEFLOPPY_FLEXIBLE_DISK_PAGE); - if (ide_queue_pc_tail(drive, disk, pc, pc->req_xfer)) { + if (ide_queue_pc_tail(drive, disk, pc, pc->buf, pc->req_xfer)) { printk(KERN_ERR PFX "Can't get flexible disk page params\n"); return 1; } @@ -387,22 +387,21 @@ static int ide_floppy_get_capacity(ide_drive_t *drive) drive->capacity64 = 0; ide_floppy_create_read_capacity_cmd(&pc); - pc.buf = &pc_buf[0]; pc.buf_size = sizeof(pc_buf); - if (ide_queue_pc_tail(drive, disk, &pc, pc.req_xfer)) { + if (ide_queue_pc_tail(drive, disk, &pc, pc_buf, pc.req_xfer)) { printk(KERN_ERR PFX "Can't get floppy parameters\n"); return 1; } - header_len = pc.buf[3]; - cap_desc = &pc.buf[4]; + header_len = pc_buf[3]; + cap_desc = &pc_buf[4]; desc_cnt = header_len / 8; /* capacity descriptor of 8 bytes */ for (i = 0; i < desc_cnt; i++) { unsigned int desc_start = 4 + i*8; - blocks = be32_to_cpup((__be32 *)&pc.buf[desc_start]); - length = be16_to_cpup((__be16 *)&pc.buf[desc_start + 6]); + blocks = be32_to_cpup((__be32 *)&pc_buf[desc_start]); + length = be16_to_cpup((__be16 *)&pc_buf[desc_start + 6]); ide_debug_log(IDE_DBG_PROBE, "Descriptor %d: %dkB, %d blocks, " "%d sector size", @@ -415,7 +414,7 @@ static int ide_floppy_get_capacity(ide_drive_t *drive) * the code below is valid only for the 1st descriptor, ie i=0 */ - switch (pc.buf[desc_start + 4] & 0x03) { + switch (pc_buf[desc_start + 4] & 0x03) { /* Clik! drive returns this instead of CAPACITY_CURRENT */ case CAPACITY_UNFORMATTED: if (!(drive->atapi_flags & IDE_AFLAG_CLIK_DRIVE)) @@ -464,7 +463,7 @@ static int ide_floppy_get_capacity(ide_drive_t *drive) break; } ide_debug_log(IDE_DBG_PROBE, "Descriptor 0 Code: %d", - pc.buf[desc_start + 4] & 0x03); + pc_buf[desc_start + 4] & 0x03); } /* Clik! disk does not support get_flexible_disk_page */ diff --git a/drivers/ide/ide-floppy_ioctl.c b/drivers/ide/ide-floppy_ioctl.c index 75f1d50276a..9c2518d7514 100644 --- a/drivers/ide/ide-floppy_ioctl.c +++ b/drivers/ide/ide-floppy_ioctl.c @@ -47,15 +47,14 @@ static int ide_floppy_get_format_capacities(ide_drive_t *drive, return -EINVAL; ide_floppy_create_read_capacity_cmd(pc); - pc->buf = &pc_buf[0]; pc->buf_size = sizeof(pc_buf); - if (ide_queue_pc_tail(drive, floppy->disk, pc, pc->req_xfer)) { + if (ide_queue_pc_tail(drive, floppy->disk, pc, pc_buf, pc->req_xfer)) { printk(KERN_ERR "ide-floppy: Can't get floppy parameters\n"); return -EIO; } - header_len = pc->buf[3]; + header_len = pc_buf[3]; desc_cnt = header_len / 8; /* capacity descriptor of 8 bytes */ u_index = 0; @@ -72,8 +71,8 @@ static int ide_floppy_get_format_capacities(ide_drive_t *drive, if (u_index >= u_array_size) break; /* User-supplied buffer too small */ - blocks = be32_to_cpup((__be32 *)&pc->buf[desc_start]); - length = be16_to_cpup((__be16 *)&pc->buf[desc_start + 6]); + blocks = be32_to_cpup((__be32 *)&pc_buf[desc_start]); + length = be16_to_cpup((__be16 *)&pc_buf[desc_start + 6]); if (put_user(blocks, argp)) return -EFAULT; @@ -124,7 +123,7 @@ static int ide_floppy_get_sfrp_bit(ide_drive_t *drive, struct ide_atapi_pc *pc) ide_floppy_create_mode_sense_cmd(pc, IDEFLOPPY_CAPABILITIES_PAGE); pc->flags |= PC_FLAG_SUPPRESS_ERROR; - if (ide_queue_pc_tail(drive, floppy->disk, pc, pc->req_xfer)) + if (ide_queue_pc_tail(drive, floppy->disk, pc, pc->buf, pc->req_xfer)) return 1; if (pc->buf[8 + 2] & 0x40) @@ -172,7 +171,7 @@ static int ide_floppy_format_unit(ide_drive_t *drive, struct ide_atapi_pc *pc, ide_floppy_get_sfrp_bit(drive, pc); ide_floppy_create_format_unit_cmd(pc, blocks, length, flags); - if (ide_queue_pc_tail(drive, floppy->disk, pc, pc->req_xfer)) + if (ide_queue_pc_tail(drive, floppy->disk, pc, pc->buf, pc->req_xfer)) err = -EIO; out: @@ -200,7 +199,8 @@ static int ide_floppy_get_format_progress(ide_drive_t *drive, if (drive->atapi_flags & IDE_AFLAG_SRFP) { ide_create_request_sense_cmd(drive, pc); - if (ide_queue_pc_tail(drive, floppy->disk, pc, pc->req_xfer)) + if (ide_queue_pc_tail(drive, floppy->disk, pc, pc->buf, + pc->req_xfer)) return -EIO; if (floppy->sense_key == 2 && diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index f09a263b72f..1f7f50473a4 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -770,7 +770,7 @@ static int idetape_flush_tape_buffers(ide_drive_t *drive) int rc; idetape_create_write_filemark_cmd(drive, &pc, 0); - rc = ide_queue_pc_tail(drive, tape->disk, &pc, pc.req_xfer); + rc = ide_queue_pc_tail(drive, tape->disk, &pc, NULL, 0); if (rc) return rc; idetape_wait_ready(drive, 60 * 5 * HZ); @@ -793,7 +793,7 @@ static int idetape_read_position(ide_drive_t *drive) debug_log(DBG_PROCS, "Enter %s\n", __func__); idetape_create_read_position_cmd(&pc); - if (ide_queue_pc_tail(drive, tape->disk, &pc, pc.req_xfer)) + if (ide_queue_pc_tail(drive, tape->disk, &pc, pc.buf, pc.req_xfer)) return -1; position = tape->first_frame; return position; @@ -846,12 +846,12 @@ static int idetape_position_tape(ide_drive_t *drive, unsigned int block, __ide_tape_discard_merge_buffer(drive); idetape_wait_ready(drive, 60 * 5 * HZ); idetape_create_locate_cmd(drive, &pc, block, partition, skip); - retval = ide_queue_pc_tail(drive, disk, &pc, pc.req_xfer); + retval = ide_queue_pc_tail(drive, disk, &pc, NULL, 0); if (retval) return (retval); idetape_create_read_position_cmd(&pc); - return ide_queue_pc_tail(drive, disk, &pc, pc.req_xfer); + return ide_queue_pc_tail(drive, disk, &pc, pc.buf, pc.req_xfer); } static void ide_tape_discard_merge_buffer(ide_drive_t *drive, @@ -1047,12 +1047,12 @@ static int idetape_rewind_tape(ide_drive_t *drive) debug_log(DBG_SENSE, "Enter %s\n", __func__); idetape_create_rewind_cmd(drive, &pc); - retval = ide_queue_pc_tail(drive, disk, &pc, pc.req_xfer); + retval = ide_queue_pc_tail(drive, disk, &pc, NULL, 0); if (retval) return retval; idetape_create_read_position_cmd(&pc); - retval = ide_queue_pc_tail(drive, disk, &pc, pc.req_xfer); + retval = ide_queue_pc_tail(drive, disk, &pc, pc.buf, pc.req_xfer); if (retval) return retval; return 0; @@ -1120,7 +1120,7 @@ static int idetape_space_over_filemarks(ide_drive_t *drive, short mt_op, case MTBSF: idetape_create_space_cmd(&pc, mt_count - count, IDETAPE_SPACE_OVER_FILEMARK); - return ide_queue_pc_tail(drive, disk, &pc, pc.req_xfer); + return ide_queue_pc_tail(drive, disk, &pc, NULL, 0); case MTFSFM: case MTBSFM: if (!sprev) @@ -1259,7 +1259,7 @@ static int idetape_write_filemark(ide_drive_t *drive) /* Write a filemark */ idetape_create_write_filemark_cmd(drive, &pc, 1); - if (ide_queue_pc_tail(drive, tape->disk, &pc, pc.req_xfer)) { + if (ide_queue_pc_tail(drive, tape->disk, &pc, NULL, 0)) { printk(KERN_ERR "ide-tape: Couldn't write a filemark\n"); return -EIO; } @@ -1344,11 +1344,11 @@ static int idetape_mtioctop(ide_drive_t *drive, short mt_op, int mt_count) IDETAPE_LU_RETENSION_MASK | IDETAPE_LU_LOAD_MASK); case MTEOM: idetape_create_space_cmd(&pc, 0, IDETAPE_SPACE_TO_EOD); - return ide_queue_pc_tail(drive, disk, &pc, pc.req_xfer); + return ide_queue_pc_tail(drive, disk, &pc, NULL, 0); case MTERASE: (void)idetape_rewind_tape(drive); idetape_create_erase_cmd(&pc); - return ide_queue_pc_tail(drive, disk, &pc, pc.req_xfer); + return ide_queue_pc_tail(drive, disk, &pc, NULL, 0); case MTSETBLK: if (mt_count) { if (mt_count < tape->blk_size || @@ -1457,7 +1457,7 @@ static void ide_tape_get_bsize_from_bdesc(ide_drive_t *drive) struct ide_atapi_pc pc; idetape_create_mode_sense_cmd(&pc, IDETAPE_BLOCK_DESCRIPTOR); - if (ide_queue_pc_tail(drive, tape->disk, &pc, pc.req_xfer)) { + if (ide_queue_pc_tail(drive, tape->disk, &pc, pc.buf, pc.req_xfer)) { printk(KERN_ERR "ide-tape: Can't get block descriptor\n"); if (tape->blk_size == 0) { printk(KERN_WARNING "ide-tape: Cannot deal with zero " @@ -1610,17 +1610,16 @@ static void idetape_get_inquiry_results(ide_drive_t *drive) char fw_rev[4], vendor_id[8], product_id[16]; idetape_create_inquiry_cmd(&pc); - pc.buf = &pc_buf[0]; pc.buf_size = sizeof(pc_buf); - if (ide_queue_pc_tail(drive, tape->disk, &pc, pc.req_xfer)) { + if (ide_queue_pc_tail(drive, tape->disk, &pc, pc_buf, pc.req_xfer)) { printk(KERN_ERR "ide-tape: %s: can't get INQUIRY results\n", tape->name); return; } - memcpy(vendor_id, &pc.buf[8], 8); - memcpy(product_id, &pc.buf[16], 16); - memcpy(fw_rev, &pc.buf[32], 4); + memcpy(vendor_id, &pc_buf[8], 8); + memcpy(product_id, &pc_buf[16], 16); + memcpy(fw_rev, &pc_buf[32], 4); ide_fixstring(vendor_id, 8, 0); ide_fixstring(product_id, 16, 0); @@ -1638,11 +1637,11 @@ static void idetape_get_mode_sense_results(ide_drive_t *drive) { idetape_tape_t *tape = drive->driver_data; struct ide_atapi_pc pc; - u8 *caps; + u8 buf[24], *caps; u8 speed, max_speed; idetape_create_mode_sense_cmd(&pc, IDETAPE_CAPABILITIES_PAGE); - if (ide_queue_pc_tail(drive, tape->disk, &pc, pc.req_xfer)) { + if (ide_queue_pc_tail(drive, tape->disk, &pc, buf, pc.req_xfer)) { printk(KERN_ERR "ide-tape: Can't get tape parameters - assuming" " some default values\n"); tape->blk_size = 512; @@ -1651,7 +1650,7 @@ static void idetape_get_mode_sense_results(ide_drive_t *drive) put_unaligned(6*52, (u16 *)&tape->caps[16]); return; } - caps = pc.buf + 4 + pc.buf[3]; + caps = buf + 4 + buf[3]; /* convert to host order and save for later use */ speed = be16_to_cpup((__be16 *)&caps[14]); diff --git a/include/linux/ide.h b/include/linux/ide.h index 7e15bd1eaae..4cd7157a403 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1161,7 +1161,7 @@ enum { }; int ide_queue_pc_tail(ide_drive_t *, struct gendisk *, struct ide_atapi_pc *, - unsigned int); + void *, unsigned int); int ide_do_test_unit_ready(ide_drive_t *, struct gendisk *); int ide_do_start_stop(ide_drive_t *, struct gendisk *, int); -- cgit v1.2.3-70-g09d2 From 19f52a784f7ecb5b51cd73cc4514614b600b995a Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 4 May 2009 09:53:03 +0200 Subject: ide-atapi: remove pc->buf Now after all users of pc->buf have been converted, remove the 64B buffer embedded in each packet command. There should be no functional change resulting from this patch. Signed-off-by: Borislav Petkov --- drivers/ide/ide-atapi.c | 6 ------ drivers/ide/ide-floppy.c | 8 +------- drivers/ide/ide-floppy_ioctl.c | 1 - drivers/ide/ide-tape.c | 7 ++----- include/linux/ide.h | 11 ----------- 5 files changed, 3 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index 66ea1e7774f..3075b041466 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -74,8 +74,6 @@ EXPORT_SYMBOL_GPL(ide_check_atapi_device); void ide_init_pc(struct ide_atapi_pc *pc) { memset(pc, 0, sizeof(*pc)); - pc->buf = pc->pc_buf; - pc->buf_size = IDE_PC_BUFFER_SIZE; } EXPORT_SYMBOL_GPL(ide_init_pc); @@ -254,10 +252,6 @@ void ide_retry_pc(ide_drive_t *drive) ide_init_pc(pc); memcpy(pc->c, sense_rq->cmd, 12); - /* pointer to mapped address */ - pc->buf = bio_data(sense_rq->bio); - pc->req_xfer = blk_rq_bytes(sense_rq); - if (drive->media == ide_tape) set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags); diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index 14e5e9ca2ad..800c83a9db8 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -210,8 +210,7 @@ static void idefloppy_create_rw_cmd(ide_drive_t *drive, pc->rq = rq; if (rq->cmd_flags & REQ_RW) pc->flags |= PC_FLAG_WRITING; - pc->buf = NULL; - pc->buf_size = blk_rq_bytes(rq); + pc->flags |= PC_FLAG_DMA_OK; } @@ -226,9 +225,6 @@ static void idefloppy_blockpc_cmd(struct ide_disk_obj *floppy, if (rq_data_dir(rq) == WRITE) pc->flags |= PC_FLAG_WRITING; } - /* pio will be performed by ide_pio_bytes() which handles sg fine */ - pc->buf = NULL; - pc->buf_size = blk_rq_bytes(rq); } static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, @@ -388,8 +384,6 @@ static int ide_floppy_get_capacity(ide_drive_t *drive) drive->capacity64 = 0; ide_floppy_create_read_capacity_cmd(&pc); - pc.buf_size = sizeof(pc_buf); - if (ide_queue_pc_tail(drive, disk, &pc, pc_buf, pc.req_xfer)) { printk(KERN_ERR PFX "Can't get floppy parameters\n"); return 1; diff --git a/drivers/ide/ide-floppy_ioctl.c b/drivers/ide/ide-floppy_ioctl.c index 3a1f9b50b3e..9c2288234de 100644 --- a/drivers/ide/ide-floppy_ioctl.c +++ b/drivers/ide/ide-floppy_ioctl.c @@ -47,7 +47,6 @@ static int ide_floppy_get_format_capacities(ide_drive_t *drive, return -EINVAL; ide_floppy_create_read_capacity_cmd(pc); - pc->buf_size = sizeof(pc_buf); if (ide_queue_pc_tail(drive, floppy->disk, pc, pc_buf, pc->req_xfer)) { printk(KERN_ERR "ide-floppy: Can't get floppy parameters\n"); diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index ead2734bc71..9ca2665faf3 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -568,9 +568,8 @@ static void ide_tape_create_rw_cmd(idetape_tape_t *tape, ide_init_pc(pc); put_unaligned(cpu_to_be32(length), (unsigned int *) &pc->c[1]); pc->c[1] = 1; - pc->buf = NULL; - pc->buf_size = blk_rq_bytes(rq); - if (pc->buf_size == tape->buffer_size) + + if (blk_rq_bytes(rq) == tape->buffer_size) pc->flags |= PC_FLAG_DMA_OK; if (opcode == READ_6) @@ -1608,8 +1607,6 @@ static void idetape_get_inquiry_results(ide_drive_t *drive) char fw_rev[4], vendor_id[8], product_id[16]; idetape_create_inquiry_cmd(&pc); - pc.buf_size = sizeof(pc_buf); - if (ide_queue_pc_tail(drive, tape->disk, &pc, pc_buf, pc.req_xfer)) { printk(KERN_ERR "ide-tape: %s: can't get INQUIRY results\n", tape->name); diff --git a/include/linux/ide.h b/include/linux/ide.h index 4cd7157a403..59aedcd7fae 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -341,11 +341,6 @@ enum { PC_FLAG_WRITING = (1 << 6), }; -/* - * With each packet command, we allocate a buffer of IDE_PC_BUFFER_SIZE bytes. - * This is used for several packet commands (not for READ/WRITE commands). - */ -#define IDE_PC_BUFFER_SIZE 64 #define ATAPI_WAIT_PC (60 * HZ) struct ide_atapi_pc { @@ -358,10 +353,6 @@ struct ide_atapi_pc { /* bytes to transfer */ int req_xfer; - /* data buffer */ - u8 *buf; - int buf_size; - /* the corresponding request */ struct request *rq; @@ -371,8 +362,6 @@ struct ide_atapi_pc { * those are more or less driver-specific and some of them are subject * to change/removal later. */ - u8 pc_buf[IDE_PC_BUFFER_SIZE]; - unsigned long timeout; }; -- cgit v1.2.3-70-g09d2 From 103f7033bd0f7b65ff3e0a5ea72449d08010b031 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 26 Apr 2009 10:39:07 +0200 Subject: ide: unify interrupt reason checking Add ide_check_ireason() function that handles all ATAPI devices. Reorganize all unlikely cases in ireason checking further down in the code path. In addition, add PFX for printks originating from ide-atapi. Finally, remove ide_cd_check_ireason. Signed-off-by: Borislav Petkov --- drivers/ide/ide-atapi.c | 95 +++++++++++++++++++++++++++++++++++-------------- drivers/ide/ide-cd.c | 47 +----------------------- include/linux/ide.h | 2 ++ 3 files changed, 71 insertions(+), 73 deletions(-) (limited to 'include') diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index 3075b041466..1125ce29809 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -10,6 +10,9 @@ #include +#define DRV_NAME "ide-atapi" +#define PFX DRV_NAME ": " + #ifdef DEBUG #define debug_log(fmt, args...) \ printk(KERN_INFO "ide: " fmt, ## args) @@ -197,8 +200,8 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq) GFP_NOIO); if (unlikely(err)) { if (printk_ratelimit()) - printk(KERN_WARNING "%s: failed to map sense buffer\n", - drive->name); + printk(KERN_WARNING PFX "%s: failed to map sense " + "buffer\n", drive->name); return; } @@ -219,7 +222,7 @@ int ide_queue_sense_rq(ide_drive_t *drive, void *special) { /* deferred failure from ide_prep_sense() */ if (!drive->sense_rq_armed) { - printk(KERN_WARNING "%s: failed queue sense request\n", + printk(KERN_WARNING PFX "%s: error queuing a sense request\n", drive->name); return -ENOMEM; } @@ -292,7 +295,7 @@ int ide_cd_expiry(ide_drive_t *drive) break; default: if (!(rq->cmd_flags & REQ_QUIET)) - printk(KERN_INFO "cmd 0x%x timed out\n", + printk(KERN_INFO PFX "cmd 0x%x timed out\n", rq->cmd[0]); wait = 0; break; @@ -325,6 +328,55 @@ void ide_read_bcount_and_ireason(ide_drive_t *drive, u16 *bcount, u8 *ireason) } EXPORT_SYMBOL_GPL(ide_read_bcount_and_ireason); +/* + * Check the contents of the interrupt reason register and attempt to recover if + * there are problems. + * + * Returns: + * - 0 if everything's ok + * - 1 if the request has to be terminated. + */ +int ide_check_ireason(ide_drive_t *drive, struct request *rq, int len, + int ireason, int rw) +{ + ide_hwif_t *hwif = drive->hwif; + + debug_log("ireason: 0x%x, rw: 0x%x\n", ireason, rw); + + if (ireason == (!rw << 1)) + return 0; + else if (ireason == (rw << 1)) { + printk(KERN_ERR PFX "%s: %s: wrong transfer direction!\n", + drive->name, __func__); + + if (dev_is_idecd(drive)) + ide_pad_transfer(drive, rw, len); + } else if (!rw && ireason == ATAPI_COD) { + if (dev_is_idecd(drive)) { + /* + * Some drives (ASUS) seem to tell us that status info + * is available. Just get it and ignore. + */ + (void)hwif->tp_ops->read_status(hwif); + return 0; + } + } else { + if (ireason & ATAPI_COD) + printk(KERN_ERR PFX "%s: CoD != 0 in %s\n", drive->name, + __func__); + + /* drive wants a command packet, or invalid ireason... */ + printk(KERN_ERR PFX "%s: %s: bad interrupt reason 0x%02x\n", + drive->name, __func__, ireason); + } + + if (dev_is_idecd(drive) && rq->cmd_type == REQ_TYPE_ATA_PC) + rq->cmd_flags |= REQ_FAILED; + + return 1; +} +EXPORT_SYMBOL_GPL(ide_check_ireason); + /* * This is the usual interrupt handler which will be called during a packet * command. We will transfer some of the data (as requested by the drive) @@ -359,7 +411,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) if (rc || (drive->media == ide_tape && (stat & ATA_ERR))) { if (drive->media == ide_floppy) - printk(KERN_ERR "%s: DMA %s error\n", + printk(KERN_ERR PFX "%s: DMA %s error\n", drive->name, rq_data_dir(pc->rq) ? "write" : "read"); pc->flags |= PC_FLAG_DMA_ERROR; @@ -391,8 +443,8 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) pc->rq->errors++; if (rq->cmd[0] == REQUEST_SENSE) { - printk(KERN_ERR "%s: I/O error in request sense" - " command\n", drive->name); + printk(KERN_ERR PFX "%s: I/O error in request " + "sense command\n", drive->name); return ide_do_reset(drive); } @@ -434,8 +486,8 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) if (pc->flags & PC_FLAG_DMA_IN_PROGRESS) { pc->flags &= ~PC_FLAG_DMA_IN_PROGRESS; - printk(KERN_ERR "%s: The device wants to issue more interrupts " - "in DMA mode\n", drive->name); + printk(KERN_ERR PFX "%s: The device wants to issue more " + "interrupts in DMA mode\n", drive->name); ide_dma_off(drive); return ide_do_reset(drive); } @@ -443,19 +495,8 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) /* Get the number of bytes to transfer on this interrupt. */ ide_read_bcount_and_ireason(drive, &bcount, &ireason); - if (ireason & ATAPI_COD) { - printk(KERN_ERR "%s: CoD != 0 in %s\n", drive->name, __func__); + if (ide_check_ireason(drive, rq, bcount, ireason, write)) return ide_do_reset(drive); - } - - if (((ireason & ATAPI_IO) == ATAPI_IO) == write) { - /* Hopefully, we will never get here */ - printk(KERN_ERR "%s: We wanted to %s, but the device wants us " - "to %s!\n", drive->name, - (ireason & ATAPI_IO) ? "Write" : "Read", - (ireason & ATAPI_IO) ? "Read" : "Write"); - return ide_do_reset(drive); - } done = min_t(unsigned int, bcount, cmd->nleft); ide_pio_bytes(drive, cmd, write, done); @@ -503,13 +544,13 @@ static u8 ide_wait_ireason(ide_drive_t *drive, u8 ireason) while (retries-- && ((ireason & ATAPI_COD) == 0 || (ireason & ATAPI_IO))) { - printk(KERN_ERR "%s: (IO,CoD != (0,1) while issuing " + printk(KERN_ERR PFX "%s: (IO,CoD != (0,1) while issuing " "a packet command, retrying\n", drive->name); udelay(100); ireason = ide_read_ireason(drive); if (retries == 0) { - printk(KERN_ERR "%s: (IO,CoD != (0,1) while issuing " - "a packet command, ignoring\n", + printk(KERN_ERR PFX "%s: (IO,CoD != (0,1) while issuing" + " a packet command, ignoring\n", drive->name); ireason |= ATAPI_COD; ireason &= ~ATAPI_IO; @@ -540,7 +581,7 @@ static ide_startstop_t ide_transfer_pc(ide_drive_t *drive) u8 ireason; if (ide_wait_stat(&startstop, drive, ATA_DRQ, ATA_BUSY, WAIT_READY)) { - printk(KERN_ERR "%s: Strange, packet command initiated yet " + printk(KERN_ERR PFX "%s: Strange, packet command initiated yet " "DRQ isn't asserted\n", drive->name); return startstop; } @@ -582,8 +623,8 @@ static ide_startstop_t ide_transfer_pc(ide_drive_t *drive) ireason = ide_wait_ireason(drive, ireason); if ((ireason & ATAPI_COD) == 0 || (ireason & ATAPI_IO)) { - printk(KERN_ERR "%s: (IO,CoD) != (0,1) while issuing " - "a packet command\n", drive->name); + printk(KERN_ERR PFX "%s: (IO,CoD) != (0,1) while " + "issuing a packet command\n", drive->name); return ide_do_reset(drive); } diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index dca41ae0d04..d299713bfdc 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -410,50 +410,6 @@ end_request: return 2; } -/* - * Check the contents of the interrupt reason register from the cdrom - * and attempt to recover if there are problems. Returns 0 if everything's - * ok; nonzero if the request has been terminated. - */ -static int ide_cd_check_ireason(ide_drive_t *drive, struct request *rq, - int len, int ireason, int rw) -{ - ide_hwif_t *hwif = drive->hwif; - - ide_debug_log(IDE_DBG_FUNC, "ireason: 0x%x, rw: 0x%x", ireason, rw); - - /* - * ireason == 0: the drive wants to receive data from us - * ireason == 2: the drive is expecting to transfer data to us - */ - if (ireason == (!rw << 1)) - return 0; - else if (ireason == (rw << 1)) { - - /* whoops... */ - printk(KERN_ERR PFX "%s: %s: wrong transfer direction!\n", - drive->name, __func__); - - ide_pad_transfer(drive, rw, len); - } else if (rw == 0 && ireason == 1) { - /* - * Some drives (ASUS) seem to tell us that status info is - * available. Just get it and ignore. - */ - (void)hwif->tp_ops->read_status(hwif); - return 0; - } else { - /* drive wants a command packet, or invalid ireason... */ - printk(KERN_ERR PFX "%s: %s: bad interrupt reason 0x%02x\n", - drive->name, __func__, ireason); - } - - if (rq->cmd_type == REQ_TYPE_ATA_PC) - rq->cmd_flags |= REQ_FAILED; - - return -1; -} - static void ide_cd_request_sense_fixup(ide_drive_t *drive, struct ide_cmd *cmd) { struct request *rq = cmd->rq; @@ -645,8 +601,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) goto out_end; } - /* check which way to transfer data */ - rc = ide_cd_check_ireason(drive, rq, len, ireason, write); + rc = ide_check_ireason(drive, rq, len, ireason, write); if (rc) goto out_end; diff --git a/include/linux/ide.h b/include/linux/ide.h index 59aedcd7fae..70a2c94d668 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1125,6 +1125,8 @@ void SELECT_MASK(ide_drive_t *, int); u8 ide_read_error(ide_drive_t *); void ide_read_bcount_and_ireason(ide_drive_t *, u16 *, u8 *); +int ide_check_ireason(ide_drive_t *, struct request *, int, int, int); + int ide_check_atapi_device(ide_drive_t *, const char *); void ide_init_pc(struct ide_atapi_pc *); -- cgit v1.2.3-70-g09d2 From 9e35ad388bea89f7d6f375af4c0ae98803688666 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 13 May 2009 16:21:38 +0200 Subject: perf_counter: Rework the perf counter disable/enable The current disable/enable mechanism is: token = hw_perf_save_disable(); ... /* do bits */ ... hw_perf_restore(token); This works well, provided that the use nests properly. Except we don't. x86 NMI/INT throttling has non-nested use of this, breaking things. Therefore provide a reference counter disable/enable interface, where the first disable disables the hardware, and the last enable enables the hardware again. [ Impact: refactor, simplify the PMU disable/enable logic ] Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 24 ++++---- arch/x86/kernel/cpu/perf_counter.c | 113 ++++++++++++++----------------------- drivers/acpi/processor_idle.c | 6 +- include/linux/perf_counter.h | 10 ++-- kernel/perf_counter.c | 76 +++++++++++++++---------- 5 files changed, 109 insertions(+), 120 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 15cdc8e6722..bb1b463c136 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -386,7 +386,7 @@ static void write_mmcr0(struct cpu_hw_counters *cpuhw, unsigned long mmcr0) * Disable all counters to prevent PMU interrupts and to allow * counters to be added or removed. */ -u64 hw_perf_save_disable(void) +void hw_perf_disable(void) { struct cpu_hw_counters *cpuhw; unsigned long ret; @@ -428,7 +428,6 @@ u64 hw_perf_save_disable(void) mb(); } local_irq_restore(flags); - return ret; } /* @@ -436,7 +435,7 @@ u64 hw_perf_save_disable(void) * If we were previously disabled and counters were added, then * put the new config on the PMU. */ -void hw_perf_restore(u64 disable) +void hw_perf_enable(void) { struct perf_counter *counter; struct cpu_hw_counters *cpuhw; @@ -448,9 +447,12 @@ void hw_perf_restore(u64 disable) int n_lim; int idx; - if (disable) - return; local_irq_save(flags); + if (!cpuhw->disabled) { + local_irq_restore(flags); + return; + } + cpuhw = &__get_cpu_var(cpu_hw_counters); cpuhw->disabled = 0; @@ -649,19 +651,18 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader, /* * Add a counter to the PMU. * If all counters are not already frozen, then we disable and - * re-enable the PMU in order to get hw_perf_restore to do the + * re-enable the PMU in order to get hw_perf_enable to do the * actual work of reconfiguring the PMU. */ static int power_pmu_enable(struct perf_counter *counter) { struct cpu_hw_counters *cpuhw; unsigned long flags; - u64 pmudis; int n0; int ret = -EAGAIN; local_irq_save(flags); - pmudis = hw_perf_save_disable(); + perf_disable(); /* * Add the counter to the list (if there is room) @@ -685,7 +686,7 @@ static int power_pmu_enable(struct perf_counter *counter) ret = 0; out: - hw_perf_restore(pmudis); + perf_enable(); local_irq_restore(flags); return ret; } @@ -697,11 +698,10 @@ static void power_pmu_disable(struct perf_counter *counter) { struct cpu_hw_counters *cpuhw; long i; - u64 pmudis; unsigned long flags; local_irq_save(flags); - pmudis = hw_perf_save_disable(); + perf_disable(); power_pmu_read(counter); @@ -735,7 +735,7 @@ static void power_pmu_disable(struct perf_counter *counter) cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); } - hw_perf_restore(pmudis); + perf_enable(); local_irq_restore(flags); } diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 7601c014f8f..313638cecbb 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -31,7 +31,6 @@ struct cpu_hw_counters { unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; unsigned long interrupts; - u64 throttle_ctrl; int enabled; }; @@ -42,8 +41,8 @@ struct x86_pmu { const char *name; int version; int (*handle_irq)(struct pt_regs *, int); - u64 (*save_disable_all)(void); - void (*restore_all)(u64); + void (*disable_all)(void); + void (*enable_all)(void); void (*enable)(struct hw_perf_counter *, int); void (*disable)(struct hw_perf_counter *, int); unsigned eventsel; @@ -56,6 +55,7 @@ struct x86_pmu { int counter_bits; u64 counter_mask; u64 max_period; + u64 intel_ctrl; }; static struct x86_pmu x86_pmu __read_mostly; @@ -311,22 +311,19 @@ static int __hw_perf_counter_init(struct perf_counter *counter) return 0; } -static u64 intel_pmu_save_disable_all(void) +static void intel_pmu_disable_all(void) { - u64 ctrl; - - rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); - - return ctrl; } -static u64 amd_pmu_save_disable_all(void) +static void amd_pmu_disable_all(void) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - int enabled, idx; + int idx; + + if (!cpuc->enabled) + return; - enabled = cpuc->enabled; cpuc->enabled = 0; /* * ensure we write the disable before we start disabling the @@ -334,8 +331,6 @@ static u64 amd_pmu_save_disable_all(void) * right thing. */ barrier(); - if (!enabled) - goto out; for (idx = 0; idx < x86_pmu.num_counters; idx++) { u64 val; @@ -348,37 +343,31 @@ static u64 amd_pmu_save_disable_all(void) val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; wrmsrl(MSR_K7_EVNTSEL0 + idx, val); } - -out: - return enabled; } -u64 hw_perf_save_disable(void) +void hw_perf_disable(void) { if (!x86_pmu_initialized()) - return 0; - return x86_pmu.save_disable_all(); + return; + return x86_pmu.disable_all(); } -/* - * Exported because of ACPI idle - */ -EXPORT_SYMBOL_GPL(hw_perf_save_disable); -static void intel_pmu_restore_all(u64 ctrl) +static void intel_pmu_enable_all(void) { - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); } -static void amd_pmu_restore_all(u64 ctrl) +static void amd_pmu_enable_all(void) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); int idx; - cpuc->enabled = ctrl; - barrier(); - if (!ctrl) + if (cpuc->enabled) return; + cpuc->enabled = 1; + barrier(); + for (idx = 0; idx < x86_pmu.num_counters; idx++) { u64 val; @@ -392,16 +381,12 @@ static void amd_pmu_restore_all(u64 ctrl) } } -void hw_perf_restore(u64 ctrl) +void hw_perf_enable(void) { if (!x86_pmu_initialized()) return; - x86_pmu.restore_all(ctrl); + x86_pmu.enable_all(); } -/* - * Exported because of ACPI idle - */ -EXPORT_SYMBOL_GPL(hw_perf_restore); static inline u64 intel_pmu_get_status(void) { @@ -735,15 +720,14 @@ static int intel_pmu_handle_irq(struct pt_regs *regs, int nmi) int bit, cpu = smp_processor_id(); u64 ack, status; struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu); - int ret = 0; - - cpuc->throttle_ctrl = intel_pmu_save_disable_all(); + perf_disable(); status = intel_pmu_get_status(); - if (!status) - goto out; + if (!status) { + perf_enable(); + return 0; + } - ret = 1; again: inc_irq_stat(apic_perf_irqs); ack = status; @@ -767,19 +751,11 @@ again: status = intel_pmu_get_status(); if (status) goto again; -out: - /* - * Restore - do not reenable when global enable is off or throttled: - */ - if (cpuc->throttle_ctrl) { - if (++cpuc->interrupts < PERFMON_MAX_INTERRUPTS) { - intel_pmu_restore_all(cpuc->throttle_ctrl); - } else { - pr_info("CPU#%d: perfcounters: max interrupt rate exceeded! Throttle on.\n", smp_processor_id()); - } - } - return ret; + if (++cpuc->interrupts != PERFMON_MAX_INTERRUPTS) + perf_enable(); + + return 1; } static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi) @@ -792,13 +768,11 @@ static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi) struct hw_perf_counter *hwc; int idx, throttle = 0; - cpuc->throttle_ctrl = cpuc->enabled; - cpuc->enabled = 0; - barrier(); - - if (cpuc->throttle_ctrl) { - if (++cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) - throttle = 1; + if (++cpuc->interrupts == PERFMON_MAX_INTERRUPTS) { + throttle = 1; + __perf_disable(); + cpuc->enabled = 0; + barrier(); } for (idx = 0; idx < x86_pmu.num_counters; idx++) { @@ -824,9 +798,6 @@ next: amd_pmu_disable_counter(hwc, idx); } - if (cpuc->throttle_ctrl && !throttle) - cpuc->enabled = 1; - return handled; } @@ -839,13 +810,11 @@ void perf_counter_unthrottle(void) cpuc = &__get_cpu_var(cpu_hw_counters); if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) { - pr_info("CPU#%d: perfcounters: throttle off.\n", smp_processor_id()); - /* * Clear them before re-enabling irqs/NMIs again: */ cpuc->interrupts = 0; - hw_perf_restore(cpuc->throttle_ctrl); + perf_enable(); } else { cpuc->interrupts = 0; } @@ -931,8 +900,8 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = { static struct x86_pmu intel_pmu = { .name = "Intel", .handle_irq = intel_pmu_handle_irq, - .save_disable_all = intel_pmu_save_disable_all, - .restore_all = intel_pmu_restore_all, + .disable_all = intel_pmu_disable_all, + .enable_all = intel_pmu_enable_all, .enable = intel_pmu_enable_counter, .disable = intel_pmu_disable_counter, .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, @@ -951,8 +920,8 @@ static struct x86_pmu intel_pmu = { static struct x86_pmu amd_pmu = { .name = "AMD", .handle_irq = amd_pmu_handle_irq, - .save_disable_all = amd_pmu_save_disable_all, - .restore_all = amd_pmu_restore_all, + .disable_all = amd_pmu_disable_all, + .enable_all = amd_pmu_enable_all, .enable = amd_pmu_enable_counter, .disable = amd_pmu_disable_counter, .eventsel = MSR_K7_EVNTSEL0, @@ -1003,6 +972,8 @@ static int intel_pmu_init(void) x86_pmu.counter_bits = eax.split.bit_width; x86_pmu.counter_mask = (1ULL << eax.split.bit_width) - 1; + rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); + return 0; } diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index d2830f39d46..9645758c047 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -763,11 +763,9 @@ static int acpi_idle_bm_check(void) */ static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx) { - u64 perf_flags; - /* Don't trace irqs off for idle */ stop_critical_timings(); - perf_flags = hw_perf_save_disable(); + perf_disable(); if (cx->entry_method == ACPI_CSTATE_FFH) { /* Call into architectural FFH based C-state */ acpi_processor_ffh_cstate_enter(cx); @@ -782,7 +780,7 @@ static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx) gets asserted in time to freeze execution properly. */ unused = inl(acpi_gbl_FADT.xpm_timer_block.address); } - hw_perf_restore(perf_flags); + perf_enable(); start_critical_timings(); } diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 614f921d616..e543ecc129f 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -544,8 +544,10 @@ extern void perf_counter_exit_task(struct task_struct *child); extern void perf_counter_do_pending(void); extern void perf_counter_print_debug(void); extern void perf_counter_unthrottle(void); -extern u64 hw_perf_save_disable(void); -extern void hw_perf_restore(u64 ctrl); +extern void __perf_disable(void); +extern bool __perf_enable(void); +extern void perf_disable(void); +extern void perf_enable(void); extern int perf_counter_task_disable(void); extern int perf_counter_task_enable(void); extern int hw_perf_group_sched_in(struct perf_counter *group_leader, @@ -600,8 +602,8 @@ static inline void perf_counter_exit_task(struct task_struct *child) { } static inline void perf_counter_do_pending(void) { } static inline void perf_counter_print_debug(void) { } static inline void perf_counter_unthrottle(void) { } -static inline void hw_perf_restore(u64 ctrl) { } -static inline u64 hw_perf_save_disable(void) { return 0; } +static inline void perf_disable(void) { } +static inline void perf_enable(void) { } static inline int perf_counter_task_disable(void) { return -EINVAL; } static inline int perf_counter_task_enable(void) { return -EINVAL; } diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 985be0b662a..e814ff04d7c 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -60,8 +60,9 @@ extern __weak const struct pmu *hw_perf_counter_init(struct perf_counter *counte return NULL; } -u64 __weak hw_perf_save_disable(void) { return 0; } -void __weak hw_perf_restore(u64 ctrl) { barrier(); } +void __weak hw_perf_disable(void) { barrier(); } +void __weak hw_perf_enable(void) { barrier(); } + void __weak hw_perf_counter_setup(int cpu) { barrier(); } int __weak hw_perf_group_sched_in(struct perf_counter *group_leader, struct perf_cpu_context *cpuctx, @@ -72,6 +73,32 @@ int __weak hw_perf_group_sched_in(struct perf_counter *group_leader, void __weak perf_counter_print_debug(void) { } +static DEFINE_PER_CPU(int, disable_count); + +void __perf_disable(void) +{ + __get_cpu_var(disable_count)++; +} + +bool __perf_enable(void) +{ + return !--__get_cpu_var(disable_count); +} + +void perf_disable(void) +{ + __perf_disable(); + hw_perf_disable(); +} +EXPORT_SYMBOL_GPL(perf_disable); /* ACPI idle */ + +void perf_enable(void) +{ + if (__perf_enable()) + hw_perf_enable(); +} +EXPORT_SYMBOL_GPL(perf_enable); /* ACPI idle */ + static void list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) { @@ -170,7 +197,6 @@ static void __perf_counter_remove_from_context(void *info) struct perf_counter *counter = info; struct perf_counter_context *ctx = counter->ctx; unsigned long flags; - u64 perf_flags; /* * If this is a task context, we need to check whether it is @@ -191,9 +217,9 @@ static void __perf_counter_remove_from_context(void *info) * Protect the list operation against NMI by disabling the * counters on a global level. NOP for non NMI based counters. */ - perf_flags = hw_perf_save_disable(); + perf_disable(); list_del_counter(counter, ctx); - hw_perf_restore(perf_flags); + perf_enable(); if (!ctx->task) { /* @@ -538,7 +564,6 @@ static void __perf_install_in_context(void *info) struct perf_counter *leader = counter->group_leader; int cpu = smp_processor_id(); unsigned long flags; - u64 perf_flags; int err; /* @@ -556,7 +581,7 @@ static void __perf_install_in_context(void *info) * Protect the list operation against NMI by disabling the * counters on a global level. NOP for non NMI based counters. */ - perf_flags = hw_perf_save_disable(); + perf_disable(); add_counter_to_ctx(counter, ctx); @@ -596,7 +621,7 @@ static void __perf_install_in_context(void *info) cpuctx->max_pertask--; unlock: - hw_perf_restore(perf_flags); + perf_enable(); spin_unlock_irqrestore(&ctx->lock, flags); } @@ -663,7 +688,6 @@ static void __perf_counter_enable(void *info) struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_counter_context *ctx = counter->ctx; struct perf_counter *leader = counter->group_leader; - unsigned long pmuflags; unsigned long flags; int err; @@ -693,14 +717,14 @@ static void __perf_counter_enable(void *info) if (!group_can_go_on(counter, cpuctx, 1)) { err = -EEXIST; } else { - pmuflags = hw_perf_save_disable(); + perf_disable(); if (counter == leader) err = group_sched_in(counter, cpuctx, ctx, smp_processor_id()); else err = counter_sched_in(counter, cpuctx, ctx, smp_processor_id()); - hw_perf_restore(pmuflags); + perf_enable(); } if (err) { @@ -795,7 +819,6 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx, struct perf_cpu_context *cpuctx) { struct perf_counter *counter; - u64 flags; spin_lock(&ctx->lock); ctx->is_active = 0; @@ -803,12 +826,12 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx, goto out; update_context_time(ctx); - flags = hw_perf_save_disable(); + perf_disable(); if (ctx->nr_active) { list_for_each_entry(counter, &ctx->counter_list, list_entry) group_sched_out(counter, cpuctx, ctx); } - hw_perf_restore(flags); + perf_enable(); out: spin_unlock(&ctx->lock); } @@ -860,7 +883,6 @@ __perf_counter_sched_in(struct perf_counter_context *ctx, struct perf_cpu_context *cpuctx, int cpu) { struct perf_counter *counter; - u64 flags; int can_add_hw = 1; spin_lock(&ctx->lock); @@ -870,7 +892,7 @@ __perf_counter_sched_in(struct perf_counter_context *ctx, ctx->timestamp = perf_clock(); - flags = hw_perf_save_disable(); + perf_disable(); /* * First go through the list and put on any pinned groups @@ -917,7 +939,7 @@ __perf_counter_sched_in(struct perf_counter_context *ctx, can_add_hw = 0; } } - hw_perf_restore(flags); + perf_enable(); out: spin_unlock(&ctx->lock); } @@ -955,7 +977,6 @@ int perf_counter_task_disable(void) struct perf_counter_context *ctx = &curr->perf_counter_ctx; struct perf_counter *counter; unsigned long flags; - u64 perf_flags; if (likely(!ctx->nr_counters)) return 0; @@ -969,7 +990,7 @@ int perf_counter_task_disable(void) /* * Disable all the counters: */ - perf_flags = hw_perf_save_disable(); + perf_disable(); list_for_each_entry(counter, &ctx->counter_list, list_entry) { if (counter->state != PERF_COUNTER_STATE_ERROR) { @@ -978,7 +999,7 @@ int perf_counter_task_disable(void) } } - hw_perf_restore(perf_flags); + perf_enable(); spin_unlock_irqrestore(&ctx->lock, flags); @@ -991,7 +1012,6 @@ int perf_counter_task_enable(void) struct perf_counter_context *ctx = &curr->perf_counter_ctx; struct perf_counter *counter; unsigned long flags; - u64 perf_flags; int cpu; if (likely(!ctx->nr_counters)) @@ -1007,7 +1027,7 @@ int perf_counter_task_enable(void) /* * Disable all the counters: */ - perf_flags = hw_perf_save_disable(); + perf_disable(); list_for_each_entry(counter, &ctx->counter_list, list_entry) { if (counter->state > PERF_COUNTER_STATE_OFF) @@ -1017,7 +1037,7 @@ int perf_counter_task_enable(void) ctx->time - counter->total_time_enabled; counter->hw_event.disabled = 0; } - hw_perf_restore(perf_flags); + perf_enable(); spin_unlock(&ctx->lock); @@ -1034,7 +1054,6 @@ int perf_counter_task_enable(void) static void rotate_ctx(struct perf_counter_context *ctx) { struct perf_counter *counter; - u64 perf_flags; if (!ctx->nr_counters) return; @@ -1043,12 +1062,12 @@ static void rotate_ctx(struct perf_counter_context *ctx) /* * Rotate the first entry last (works just fine for group counters too): */ - perf_flags = hw_perf_save_disable(); + perf_disable(); list_for_each_entry(counter, &ctx->counter_list, list_entry) { list_move_tail(&counter->list_entry, &ctx->counter_list); break; } - hw_perf_restore(perf_flags); + perf_enable(); spin_unlock(&ctx->lock); } @@ -3194,7 +3213,6 @@ __perf_counter_exit_task(struct task_struct *child, } else { struct perf_cpu_context *cpuctx; unsigned long flags; - u64 perf_flags; /* * Disable and unlink this counter. @@ -3203,7 +3221,7 @@ __perf_counter_exit_task(struct task_struct *child, * could still be processing it: */ local_irq_save(flags); - perf_flags = hw_perf_save_disable(); + perf_disable(); cpuctx = &__get_cpu_var(perf_cpu_context); @@ -3214,7 +3232,7 @@ __perf_counter_exit_task(struct task_struct *child, child_ctx->nr_counters--; - hw_perf_restore(perf_flags); + perf_enable(); local_irq_restore(flags); } -- cgit v1.2.3-70-g09d2 From 789f90fcf6b0b54e655740e9396c954378542c79 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 15 May 2009 15:19:27 +0200 Subject: perf_counter: per user mlock gift Instead of a per-process mlock gift for perf-counters, use a per-user gift so that there is less of a DoS potential. [ Impact: allow less worst-case unprivileged memory consumption ] Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Cc: Arnaldo Carvalho de Melo LKML-Reference: <20090515132018.496182835@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++++ kernel/perf_counter.c | 22 +++++++++++++++------- 2 files changed, 19 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index d1857580a13..ff59d123151 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -674,6 +674,10 @@ struct user_struct { struct work_struct work; #endif #endif + +#ifdef CONFIG_PERF_COUNTERS + atomic_long_t locked_vm; +#endif }; extern int uids_sysfs_init(void); diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 0173738dd54..93f4a0e4b87 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -45,7 +45,7 @@ static atomic_t nr_munmap_tracking __read_mostly; static atomic_t nr_comm_tracking __read_mostly; int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */ -int sysctl_perf_counter_mlock __read_mostly = 128; /* 'free' kb per counter */ +int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */ /* * Lock for (sysadmin-configurable) counter reservations: @@ -1522,6 +1522,9 @@ static void perf_mmap_close(struct vm_area_struct *vma) if (atomic_dec_and_mutex_lock(&counter->mmap_count, &counter->mmap_mutex)) { + struct user_struct *user = current_user(); + + atomic_long_sub(counter->data->nr_pages + 1, &user->locked_vm); vma->vm_mm->locked_vm -= counter->data->nr_locked; perf_mmap_data_free(counter); mutex_unlock(&counter->mmap_mutex); @@ -1537,11 +1540,13 @@ static struct vm_operations_struct perf_mmap_vmops = { static int perf_mmap(struct file *file, struct vm_area_struct *vma) { struct perf_counter *counter = file->private_data; + struct user_struct *user = current_user(); unsigned long vma_size; unsigned long nr_pages; + unsigned long user_locked, user_lock_limit; unsigned long locked, lock_limit; + long user_extra, extra; int ret = 0; - long extra; if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE)) return -EINVAL; @@ -1569,15 +1574,17 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) goto unlock; } - extra = nr_pages /* + 1 only account the data pages */; - extra -= sysctl_perf_counter_mlock >> (PAGE_SHIFT - 10); - if (extra < 0) - extra = 0; + user_extra = nr_pages + 1; + user_lock_limit = sysctl_perf_counter_mlock >> (PAGE_SHIFT - 10); + user_locked = atomic_long_read(&user->locked_vm) + user_extra; - locked = vma->vm_mm->locked_vm + extra; + extra = 0; + if (user_locked > user_lock_limit) + extra = user_locked - user_lock_limit; lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; lock_limit >>= PAGE_SHIFT; + locked = vma->vm_mm->locked_vm + extra; if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { ret = -EPERM; @@ -1590,6 +1597,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) goto unlock; atomic_set(&counter->mmap_count, 1); + atomic_long_add(user_extra, &user->locked_vm); vma->vm_mm->locked_vm += extra; counter->data->nr_locked = extra; unlock: -- cgit v1.2.3-70-g09d2 From 60db5e09c13109b13830cc9dcae688003fd39e79 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 15 May 2009 15:19:28 +0200 Subject: perf_counter: frequency based adaptive irq_period Instead of specifying the irq_period for a counter, provide a target interrupt frequency and dynamically adapt the irq_period to match this frequency. [ Impact: new perf-counter attribute/feature ] Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Cc: Arnaldo Carvalho de Melo LKML-Reference: <20090515132018.646195868@chello.nl> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 13 ++++---- arch/x86/kernel/cpu/perf_counter.c | 9 ++---- include/linux/perf_counter.h | 10 ++++-- kernel/perf_counter.c | 63 ++++++++++++++++++++++++++++++-------- 4 files changed, 68 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index bb1b463c136..db8d5cafc15 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -534,7 +534,7 @@ void hw_perf_enable(void) continue; } val = 0; - if (counter->hw_event.irq_period) { + if (counter->hw.irq_period) { left = atomic64_read(&counter->hw.period_left); if (left < 0x80000000L) val = 0x80000000L - left; @@ -829,8 +829,6 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) if (!ppmu) return ERR_PTR(-ENXIO); - if ((s64)counter->hw_event.irq_period < 0) - return ERR_PTR(-EINVAL); if (!perf_event_raw(&counter->hw_event)) { ev = perf_event_id(&counter->hw_event); if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) @@ -901,7 +899,7 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) counter->hw.config = events[n]; counter->hw.counter_base = cflags[n]; - atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period); + atomic64_set(&counter->hw.period_left, counter->hw.irq_period); /* * See if we need to reserve the PMU. @@ -934,6 +932,7 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) static void record_and_restart(struct perf_counter *counter, long val, struct pt_regs *regs, int nmi) { + u64 period = counter->hw.irq_period; s64 prev, delta, left; int record = 0; @@ -948,11 +947,11 @@ static void record_and_restart(struct perf_counter *counter, long val, */ val = 0; left = atomic64_read(&counter->hw.period_left) - delta; - if (counter->hw_event.irq_period) { + if (period) { if (left <= 0) { - left += counter->hw_event.irq_period; + left += period; if (left <= 0) - left = counter->hw_event.irq_period; + left = period; record = 1; } if (left < 0x80000000L) diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 5a7f718eb1e..886dcf334bc 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -286,11 +286,8 @@ static int __hw_perf_counter_init(struct perf_counter *counter) hwc->nmi = 1; } - hwc->irq_period = hw_event->irq_period; - if ((s64)hwc->irq_period <= 0 || hwc->irq_period > x86_pmu.max_period) - hwc->irq_period = x86_pmu.max_period; - - atomic64_set(&hwc->period_left, hwc->irq_period); + atomic64_set(&hwc->period_left, + min(x86_pmu.max_period, hwc->irq_period)); /* * Raw event type provide the config in the event structure @@ -458,7 +455,7 @@ x86_perf_counter_set_period(struct perf_counter *counter, struct hw_perf_counter *hwc, int idx) { s64 left = atomic64_read(&hwc->period_left); - s64 period = hwc->irq_period; + s64 period = min(x86_pmu.max_period, hwc->irq_period); int err; /* diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index e543ecc129f..004b6e162b9 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -130,7 +130,11 @@ struct perf_counter_hw_event { */ __u64 config; - __u64 irq_period; + union { + __u64 irq_period; + __u64 irq_freq; + }; + __u32 record_type; __u32 read_format; @@ -146,8 +150,9 @@ struct perf_counter_hw_event { mmap : 1, /* include mmap data */ munmap : 1, /* include munmap data */ comm : 1, /* include comm data */ + freq : 1, /* use freq, not period */ - __reserved_1 : 52; + __reserved_1 : 51; __u32 extra_config_len; __u32 wakeup_events; /* wakeup every n events */ @@ -337,6 +342,7 @@ struct hw_perf_counter { atomic64_t prev_count; u64 irq_period; atomic64_t period_left; + u64 interrupts; #endif }; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 93f4a0e4b87..0ad1db4f3d6 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1046,6 +1046,38 @@ int perf_counter_task_enable(void) return 0; } +void perf_adjust_freq(struct perf_counter_context *ctx) +{ + struct perf_counter *counter; + u64 irq_period; + u64 events, period; + s64 delta; + + spin_lock(&ctx->lock); + list_for_each_entry(counter, &ctx->counter_list, list_entry) { + if (counter->state != PERF_COUNTER_STATE_ACTIVE) + continue; + + if (!counter->hw_event.freq || !counter->hw_event.irq_freq) + continue; + + events = HZ * counter->hw.interrupts * counter->hw.irq_period; + period = div64_u64(events, counter->hw_event.irq_freq); + + delta = (s64)(1 + period - counter->hw.irq_period); + delta >>= 1; + + irq_period = counter->hw.irq_period + delta; + + if (!irq_period) + irq_period = 1; + + counter->hw.irq_period = irq_period; + counter->hw.interrupts = 0; + } + spin_unlock(&ctx->lock); +} + /* * Round-robin a context's counters: */ @@ -1081,6 +1113,9 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) cpuctx = &per_cpu(perf_cpu_context, cpu); ctx = &curr->perf_counter_ctx; + perf_adjust_freq(&cpuctx->ctx); + perf_adjust_freq(ctx); + perf_counter_cpu_sched_out(cpuctx); __perf_counter_task_sched_out(ctx); @@ -2382,6 +2417,8 @@ int perf_counter_overflow(struct perf_counter *counter, int events = atomic_read(&counter->event_limit); int ret = 0; + counter->hw.interrupts++; + /* * XXX event_limit might not quite work as expected on inherited * counters @@ -2450,6 +2487,7 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) enum hrtimer_restart ret = HRTIMER_RESTART; struct perf_counter *counter; struct pt_regs *regs; + u64 period; counter = container_of(hrtimer, struct perf_counter, hw.hrtimer); counter->pmu->read(counter); @@ -2468,7 +2506,8 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) ret = HRTIMER_NORESTART; } - hrtimer_forward_now(hrtimer, ns_to_ktime(counter->hw.irq_period)); + period = max_t(u64, 10000, counter->hw.irq_period); + hrtimer_forward_now(hrtimer, ns_to_ktime(period)); return ret; } @@ -2629,8 +2668,9 @@ static int cpu_clock_perf_counter_enable(struct perf_counter *counter) hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hwc->hrtimer.function = perf_swcounter_hrtimer; if (hwc->irq_period) { + u64 period = max_t(u64, 10000, hwc->irq_period); __hrtimer_start_range_ns(&hwc->hrtimer, - ns_to_ktime(hwc->irq_period), 0, + ns_to_ktime(period), 0, HRTIMER_MODE_REL, 0); } @@ -2679,8 +2719,9 @@ static int task_clock_perf_counter_enable(struct perf_counter *counter) hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hwc->hrtimer.function = perf_swcounter_hrtimer; if (hwc->irq_period) { + u64 period = max_t(u64, 10000, hwc->irq_period); __hrtimer_start_range_ns(&hwc->hrtimer, - ns_to_ktime(hwc->irq_period), 0, + ns_to_ktime(period), 0, HRTIMER_MODE_REL, 0); } @@ -2811,9 +2852,7 @@ static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) { - struct perf_counter_hw_event *hw_event = &counter->hw_event; const struct pmu *pmu = NULL; - struct hw_perf_counter *hwc = &counter->hw; /* * Software counters (currently) can't in general distinguish @@ -2826,8 +2865,6 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) case PERF_COUNT_CPU_CLOCK: pmu = &perf_ops_cpu_clock; - if (hw_event->irq_period && hw_event->irq_period < 10000) - hw_event->irq_period = 10000; break; case PERF_COUNT_TASK_CLOCK: /* @@ -2839,8 +2876,6 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) else pmu = &perf_ops_cpu_clock; - if (hw_event->irq_period && hw_event->irq_period < 10000) - hw_event->irq_period = 10000; break; case PERF_COUNT_PAGE_FAULTS: case PERF_COUNT_PAGE_FAULTS_MIN: @@ -2854,9 +2889,6 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) break; } - if (pmu) - hwc->irq_period = hw_event->irq_period; - return pmu; } @@ -2872,6 +2904,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, { const struct pmu *pmu; struct perf_counter *counter; + struct hw_perf_counter *hwc; long err; counter = kzalloc(sizeof(*counter), gfpflags); @@ -2907,6 +2940,12 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, pmu = NULL; + hwc = &counter->hw; + if (hw_event->freq && hw_event->irq_freq) + hwc->irq_period = TICK_NSEC / hw_event->irq_freq; + else + hwc->irq_period = hw_event->irq_period; + /* * we currently do not support PERF_RECORD_GROUP on inherited counters */ -- cgit v1.2.3-70-g09d2 From dce48a84adf1806676319f6f480e30a6daa012f9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 11 Apr 2009 10:43:41 +0200 Subject: sched, timers: move calc_load() to scheduler Dimitri Sivanich noticed that xtime_lock is held write locked across calc_load() which iterates over all online CPUs. That can cause long latencies for xtime_lock readers on large SMP systems. The load average calculation is an rough estimate anyway so there is no real need to protect the readers vs. the update. It's not a problem when the avenrun array is updated while a reader copies the values. Instead of iterating over all online CPUs let the scheduler_tick code update the number of active tasks shortly before the avenrun update happens. The avenrun update itself is handled by the CPU which calls do_timer(). [ Impact: reduce xtime_lock write locked section ] Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra --- include/linux/sched.h | 2 +- kernel/sched.c | 84 +++++++++++++++++++++++++++++++++++++++++------ kernel/sched_idletask.c | 3 +- kernel/time/timekeeping.c | 2 +- kernel/timer.c | 54 ++---------------------------- 5 files changed, 80 insertions(+), 65 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index b4c38bc8049..6eb4892efe4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -135,8 +135,8 @@ DECLARE_PER_CPU(unsigned long, process_counts); extern int nr_processes(void); extern unsigned long nr_running(void); extern unsigned long nr_uninterruptible(void); -extern unsigned long nr_active(void); extern unsigned long nr_iowait(void); +extern void calc_global_load(void); extern unsigned long get_parent_ip(unsigned long addr); diff --git a/kernel/sched.c b/kernel/sched.c index 8908d190a34..f4eb88153bd 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -630,6 +630,10 @@ struct rq { struct list_head migration_queue; #endif + /* calc_load related fields */ + unsigned long calc_load_update; + long calc_load_active; + #ifdef CONFIG_SCHED_HRTICK #ifdef CONFIG_SMP int hrtick_csd_pending; @@ -1728,6 +1732,8 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) } #endif +static void calc_load_account_active(struct rq *this_rq); + #include "sched_stats.h" #include "sched_idletask.c" #include "sched_fair.c" @@ -2856,19 +2862,57 @@ unsigned long nr_iowait(void) return sum; } -unsigned long nr_active(void) +/* Variables and functions for calc_load */ +static atomic_long_t calc_load_tasks; +static unsigned long calc_load_update; +unsigned long avenrun[3]; +EXPORT_SYMBOL(avenrun); + +static unsigned long +calc_load(unsigned long load, unsigned long exp, unsigned long active) { - unsigned long i, running = 0, uninterruptible = 0; + load *= exp; + load += active * (FIXED_1 - exp); + return load >> FSHIFT; +} - for_each_online_cpu(i) { - running += cpu_rq(i)->nr_running; - uninterruptible += cpu_rq(i)->nr_uninterruptible; - } +/* + * calc_load - update the avenrun load estimates 10 ticks after the + * CPUs have updated calc_load_tasks. + */ +void calc_global_load(void) +{ + unsigned long upd = calc_load_update + 10; + long active; + + if (time_before(jiffies, upd)) + return; - if (unlikely((long)uninterruptible < 0)) - uninterruptible = 0; + active = atomic_long_read(&calc_load_tasks); + active = active > 0 ? active * FIXED_1 : 0; - return running + uninterruptible; + avenrun[0] = calc_load(avenrun[0], EXP_1, active); + avenrun[1] = calc_load(avenrun[1], EXP_5, active); + avenrun[2] = calc_load(avenrun[2], EXP_15, active); + + calc_load_update += LOAD_FREQ; +} + +/* + * Either called from update_cpu_load() or from a cpu going idle + */ +static void calc_load_account_active(struct rq *this_rq) +{ + long nr_active, delta; + + nr_active = this_rq->nr_running; + nr_active += (long) this_rq->nr_uninterruptible; + + if (nr_active != this_rq->calc_load_active) { + delta = nr_active - this_rq->calc_load_active; + this_rq->calc_load_active = nr_active; + atomic_long_add(delta, &calc_load_tasks); + } } /* @@ -2899,6 +2943,11 @@ static void update_cpu_load(struct rq *this_rq) new_load += scale-1; this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i; } + + if (time_after_eq(jiffies, this_rq->calc_load_update)) { + this_rq->calc_load_update += LOAD_FREQ; + calc_load_account_active(this_rq); + } } #ifdef CONFIG_SMP @@ -7091,6 +7140,14 @@ static void migrate_dead_tasks(unsigned int dead_cpu) } } + +/* + * remove the tasks which were accounted by rq from calc_load_tasks. + */ +static void calc_global_load_remove(struct rq *rq) +{ + atomic_long_sub(rq->calc_load_active, &calc_load_tasks); +} #endif /* CONFIG_HOTPLUG_CPU */ #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) @@ -7325,6 +7382,8 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) /* Update our root-domain */ rq = cpu_rq(cpu); spin_lock_irqsave(&rq->lock, flags); + rq->calc_load_update = calc_load_update; + rq->calc_load_active = 0; if (rq->rd) { BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); @@ -7364,7 +7423,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) cpuset_unlock(); migrate_nr_uninterruptible(rq); BUG_ON(rq->nr_running != 0); - + calc_global_load_remove(rq); /* * No need to migrate the tasks: it was best-effort if * they didn't take sched_hotcpu_mutex. Just wake up @@ -9059,6 +9118,8 @@ void __init sched_init(void) rq = cpu_rq(i); spin_lock_init(&rq->lock); rq->nr_running = 0; + rq->calc_load_active = 0; + rq->calc_load_update = jiffies + LOAD_FREQ; init_cfs_rq(&rq->cfs, rq); init_rt_rq(&rq->rt, rq); #ifdef CONFIG_FAIR_GROUP_SCHED @@ -9166,6 +9227,9 @@ void __init sched_init(void) * when this runqueue becomes "idle". */ init_idle(current, smp_processor_id()); + + calc_load_update = jiffies + LOAD_FREQ; + /* * During early bootup we pretend to be a normal task: */ diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index 8a21a2e28c1..499672c10cb 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c @@ -22,7 +22,8 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int sy static struct task_struct *pick_next_task_idle(struct rq *rq) { schedstat_inc(rq, sched_goidle); - + /* adjust the active tasks as we might go into a long sleep */ + calc_load_account_active(rq); return rq->idle; } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 687dff49f6e..52a8bf8931f 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -22,7 +22,7 @@ /* * This read-write spinlock protects us from races in SMP while - * playing with xtime and avenrun. + * playing with xtime. */ __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock); diff --git a/kernel/timer.c b/kernel/timer.c index cffffad01c3..6a21d7af962 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1122,47 +1122,6 @@ void update_process_times(int user_tick) run_posix_cpu_timers(p); } -/* - * Nr of active tasks - counted in fixed-point numbers - */ -static unsigned long count_active_tasks(void) -{ - return nr_active() * FIXED_1; -} - -/* - * Hmm.. Changed this, as the GNU make sources (load.c) seems to - * imply that avenrun[] is the standard name for this kind of thing. - * Nothing else seems to be standardized: the fractional size etc - * all seem to differ on different machines. - * - * Requires xtime_lock to access. - */ -unsigned long avenrun[3]; - -EXPORT_SYMBOL(avenrun); - -/* - * calc_load - given tick count, update the avenrun load estimates. - * This is called while holding a write_lock on xtime_lock. - */ -static inline void calc_load(unsigned long ticks) -{ - unsigned long active_tasks; /* fixed-point */ - static int count = LOAD_FREQ; - - count -= ticks; - if (unlikely(count < 0)) { - active_tasks = count_active_tasks(); - do { - CALC_LOAD(avenrun[0], EXP_1, active_tasks); - CALC_LOAD(avenrun[1], EXP_5, active_tasks); - CALC_LOAD(avenrun[2], EXP_15, active_tasks); - count += LOAD_FREQ; - } while (count < 0); - } -} - /* * This function runs timers and the timer-tq in bottom half context. */ @@ -1186,16 +1145,6 @@ void run_local_timers(void) softlockup_tick(); } -/* - * Called by the timer interrupt. xtime_lock must already be taken - * by the timer IRQ! - */ -static inline void update_times(unsigned long ticks) -{ - update_wall_time(); - calc_load(ticks); -} - /* * The 64-bit jiffies value is not atomic - you MUST NOT read it * without sampling the sequence number in xtime_lock. @@ -1205,7 +1154,8 @@ static inline void update_times(unsigned long ticks) void do_timer(unsigned long ticks) { jiffies_64 += ticks; - update_times(ticks); + update_wall_time(); + calc_global_load(); } #ifdef __ARCH_WANT_SYS_ALARM -- cgit v1.2.3-70-g09d2 From 2d02494f5a90f2e4b3c4c6acc85ec94674cdc431 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 2 May 2009 20:08:52 +0200 Subject: sched, timers: cleanup avenrun users avenrun is an rough estimate so we don't have to worry about consistency of the three avenrun values. Remove the xtime lock dependency and provide a function to scale the values. Cleanup the users. [ Impact: cleanup ] Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra --- fs/proc/loadavg.c | 18 ++++++------------ include/linux/sched.h | 1 + kernel/sched.c | 15 +++++++++++++++ kernel/timer.c | 32 ++++++-------------------------- 4 files changed, 28 insertions(+), 38 deletions(-) (limited to 'include') diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c index 9bca39cf99e..1afa4dd4cae 100644 --- a/fs/proc/loadavg.c +++ b/fs/proc/loadavg.c @@ -12,20 +12,14 @@ static int loadavg_proc_show(struct seq_file *m, void *v) { - int a, b, c; - unsigned long seq; + unsigned long avnrun[3]; - do { - seq = read_seqbegin(&xtime_lock); - a = avenrun[0] + (FIXED_1/200); - b = avenrun[1] + (FIXED_1/200); - c = avenrun[2] + (FIXED_1/200); - } while (read_seqretry(&xtime_lock, seq)); + get_avenrun(avnrun, FIXED_1/200, 0); - seq_printf(m, "%d.%02d %d.%02d %d.%02d %ld/%d %d\n", - LOAD_INT(a), LOAD_FRAC(a), - LOAD_INT(b), LOAD_FRAC(b), - LOAD_INT(c), LOAD_FRAC(c), + seq_printf(m, "%lu.%02lu %lu.%02lu %lu.%02lu %ld/%d %d\n", + LOAD_INT(avnrun[0]), LOAD_FRAC(avnrun[0]), + LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]), + LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]), nr_running(), nr_threads, task_active_pid_ns(current)->last_pid); return 0; diff --git a/include/linux/sched.h b/include/linux/sched.h index 6eb4892efe4..de7b3b21777 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -116,6 +116,7 @@ struct fs_struct; * 11 bit fractions. */ extern unsigned long avenrun[]; /* Load averages */ +extern void get_avenrun(unsigned long *loads, unsigned long offset, int shift); #define FSHIFT 11 /* nr of bits of precision */ #define FIXED_1 (1<= 0) { - tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC; - tp.tv_sec++; - } - info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); + ktime_get_ts(&tp); + monotonic_to_bootbased(&tp); + info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); - info->loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT); - info->loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT); - info->loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT); + get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT); - info->procs = nr_threads; - } while (read_seqretry(&xtime_lock, seq)); + info->procs = nr_threads; si_meminfo(info); si_swapinfo(info); -- cgit v1.2.3-70-g09d2 From 9d23a90a67261e73b2fcac04d8ca963c6b496afb Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 14 May 2009 21:48:08 +1000 Subject: perf_counter: allow arch to supply event misc flags and instruction pointer At present the values we put in overflow events for the misc flags indicating processor mode and the instruction pointer are obtained using the standard user_mode() and instruction_pointer() functions. Those functions tell you where the performance monitor interrupt was taken, which might not be exactly where the counter overflow occurred, for example because interrupts were disabled at the point where the overflow occurred, or because the processor had many instructions in flight and chose to complete some more instructions beyond the one that caused the counter overflow. Some architectures (e.g. powerpc) can supply more precise information about where the counter overflow occurred and the processor mode at that point. This introduces new functions, perf_misc_flags() and perf_instruction_pointer(), which arch code can override to provide more precise information if available. They have default implementations which are identical to the existing code. This also adds a new misc flag value, PERF_EVENT_MISC_HYPERVISOR, for the case where a counter overflow occurred in the hypervisor. We encode the processor mode in the 2 bits previously used to indicate user or kernel mode; the values for user and kernel mode are unchanged and hypervisor mode is indicated by both bits being set. [ Impact: generalize perfcounter core facilities ] Signed-off-by: Paul Mackerras Acked-by: Peter Zijlstra Cc: Corey Ashford Cc: Arnaldo Carvalho de Melo LKML-Reference: <18956.1272.818511.561835@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 11 ++++++++++- kernel/perf_counter.c | 5 ++--- 2 files changed, 12 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 004b6e162b9..c8c1dfc22c9 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -215,8 +215,11 @@ struct perf_counter_mmap_page { __u32 data_head; /* head in the data section */ }; +#define PERF_EVENT_MISC_CPUMODE_MASK (3 << 0) +#define PERF_EVENT_MISC_CPUMODE_UNKNOWN (0 << 0) #define PERF_EVENT_MISC_KERNEL (1 << 0) -#define PERF_EVENT_MISC_USER (1 << 1) +#define PERF_EVENT_MISC_USER (2 << 0) +#define PERF_EVENT_MISC_HYPERVISOR (3 << 0) #define PERF_EVENT_MISC_OVERFLOW (1 << 2) struct perf_event_header { @@ -596,6 +599,12 @@ extern int sysctl_perf_counter_mlock; extern void perf_counter_init(void); +#ifndef perf_misc_flags +#define perf_misc_flags(regs) (user_mode(regs) ? PERF_EVENT_MISC_USER : \ + PERF_EVENT_MISC_KERNEL) +#define perf_instruction_pointer(regs) instruction_pointer(regs) +#endif + #else static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 728a595399b..57840a94b16 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -2042,11 +2042,10 @@ static void perf_counter_output(struct perf_counter *counter, header.size = sizeof(header); header.misc = PERF_EVENT_MISC_OVERFLOW; - header.misc |= user_mode(regs) ? - PERF_EVENT_MISC_USER : PERF_EVENT_MISC_KERNEL; + header.misc |= perf_misc_flags(regs); if (record_type & PERF_RECORD_IP) { - ip = instruction_pointer(regs); + ip = perf_instruction_pointer(regs); header.type |= PERF_RECORD_IP; header.size += sizeof(ip); } -- cgit v1.2.3-70-g09d2 From a48b2d4a0091904b4cf57d667adc2faf689750d3 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Fri, 15 May 2009 20:12:47 -0700 Subject: Input: introduce lm8323 keypad driver lm8323 is the keypad driver used in n810 device. [akpm@linux-foundation.org: coding-style fixes] [dtor@mail.ru: various cleanups] Signed-off-by: Felipe Balbi Reviewed-by: Trilok Soni Signed-off-by: Andrew Morton Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/Kconfig | 13 +- drivers/input/keyboard/Makefile | 1 + drivers/input/keyboard/lm8323.c | 878 ++++++++++++++++++++++++++++++++++++++++ include/linux/i2c/lm8323.h | 46 +++ 4 files changed, 937 insertions(+), 1 deletion(-) create mode 100644 drivers/input/keyboard/lm8323.c create mode 100644 include/linux/i2c/lm8323.h (limited to 'include') diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig index 54775aaa7be..9d8f796c674 100644 --- a/drivers/input/keyboard/Kconfig +++ b/drivers/input/keyboard/Kconfig @@ -250,6 +250,17 @@ config KEYBOARD_HP7XX To compile this driver as a module, choose M here: the module will be called jornada720_kbd. +config KEYBOARD_LM8323 + tristate "LM8323 keypad chip" + depends on I2C + depends on LEDS_CLASS + help + If you say yes here you get support for the National Semiconductor + LM8323 keypad controller. + + To compile this driver as a module, choose M here: the + module will be called lm8323. + config KEYBOARD_OMAP tristate "TI OMAP keypad support" depends on (ARCH_OMAP1 || ARCH_OMAP2) @@ -332,7 +343,7 @@ config KEYBOARD_SH_KEYSC To compile this driver as a module, choose M here: the module will be called sh_keysc. -+ + config KEYBOARD_EP93XX tristate "EP93xx Matrix Keypad support" depends on ARCH_EP93XX diff --git a/drivers/input/keyboard/Makefile b/drivers/input/keyboard/Makefile index 13ba9c95493..156b647a259 100644 --- a/drivers/input/keyboard/Makefile +++ b/drivers/input/keyboard/Makefile @@ -18,6 +18,7 @@ obj-$(CONFIG_KEYBOARD_SPITZ) += spitzkbd.o obj-$(CONFIG_KEYBOARD_TOSA) += tosakbd.o obj-$(CONFIG_KEYBOARD_HIL) += hil_kbd.o obj-$(CONFIG_KEYBOARD_HIL_OLD) += hilkbd.o +obj-$(CONFIG_KEYBOARD_LM8323) += lm8323.o obj-$(CONFIG_KEYBOARD_OMAP) += omap-keypad.o obj-$(CONFIG_KEYBOARD_PXA27x) += pxa27x_keypad.o obj-$(CONFIG_KEYBOARD_PXA930_ROTARY) += pxa930_rotary.o diff --git a/drivers/input/keyboard/lm8323.c b/drivers/input/keyboard/lm8323.c new file mode 100644 index 00000000000..574eda2a495 --- /dev/null +++ b/drivers/input/keyboard/lm8323.c @@ -0,0 +1,878 @@ +/* + * drivers/i2c/chips/lm8323.c + * + * Copyright (C) 2007-2009 Nokia Corporation + * + * Written by Daniel Stone + * Timo O. Karjalainen + * + * Updated by Felipe Balbi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License only). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Commands to send to the chip. */ +#define LM8323_CMD_READ_ID 0x80 /* Read chip ID. */ +#define LM8323_CMD_WRITE_CFG 0x81 /* Set configuration item. */ +#define LM8323_CMD_READ_INT 0x82 /* Get interrupt status. */ +#define LM8323_CMD_RESET 0x83 /* Reset, same as external one */ +#define LM8323_CMD_WRITE_PORT_SEL 0x85 /* Set GPIO in/out. */ +#define LM8323_CMD_WRITE_PORT_STATE 0x86 /* Set GPIO pullup. */ +#define LM8323_CMD_READ_PORT_SEL 0x87 /* Get GPIO in/out. */ +#define LM8323_CMD_READ_PORT_STATE 0x88 /* Get GPIO pullup. */ +#define LM8323_CMD_READ_FIFO 0x89 /* Read byte from FIFO. */ +#define LM8323_CMD_RPT_READ_FIFO 0x8a /* Read FIFO (no increment). */ +#define LM8323_CMD_SET_ACTIVE 0x8b /* Set active time. */ +#define LM8323_CMD_READ_ERR 0x8c /* Get error status. */ +#define LM8323_CMD_READ_ROTATOR 0x8e /* Read rotator status. */ +#define LM8323_CMD_SET_DEBOUNCE 0x8f /* Set debouncing time. */ +#define LM8323_CMD_SET_KEY_SIZE 0x90 /* Set keypad size. */ +#define LM8323_CMD_READ_KEY_SIZE 0x91 /* Get keypad size. */ +#define LM8323_CMD_READ_CFG 0x92 /* Get configuration item. */ +#define LM8323_CMD_WRITE_CLOCK 0x93 /* Set clock config. */ +#define LM8323_CMD_READ_CLOCK 0x94 /* Get clock config. */ +#define LM8323_CMD_PWM_WRITE 0x95 /* Write PWM script. */ +#define LM8323_CMD_START_PWM 0x96 /* Start PWM engine. */ +#define LM8323_CMD_STOP_PWM 0x97 /* Stop PWM engine. */ + +/* Interrupt status. */ +#define INT_KEYPAD 0x01 /* Key event. */ +#define INT_ROTATOR 0x02 /* Rotator event. */ +#define INT_ERROR 0x08 /* Error: use CMD_READ_ERR. */ +#define INT_NOINIT 0x10 /* Lost configuration. */ +#define INT_PWM1 0x20 /* PWM1 stopped. */ +#define INT_PWM2 0x40 /* PWM2 stopped. */ +#define INT_PWM3 0x80 /* PWM3 stopped. */ + +/* Errors (signalled by INT_ERROR, read with CMD_READ_ERR). */ +#define ERR_BADPAR 0x01 /* Bad parameter. */ +#define ERR_CMDUNK 0x02 /* Unknown command. */ +#define ERR_KEYOVR 0x04 /* Too many keys pressed. */ +#define ERR_FIFOOVER 0x40 /* FIFO overflow. */ + +/* Configuration keys (CMD_{WRITE,READ}_CFG). */ +#define CFG_MUX1SEL 0x01 /* Select MUX1_OUT input. */ +#define CFG_MUX1EN 0x02 /* Enable MUX1_OUT. */ +#define CFG_MUX2SEL 0x04 /* Select MUX2_OUT input. */ +#define CFG_MUX2EN 0x08 /* Enable MUX2_OUT. */ +#define CFG_PSIZE 0x20 /* Package size (must be 0). */ +#define CFG_ROTEN 0x40 /* Enable rotator. */ + +/* Clock settings (CMD_{WRITE,READ}_CLOCK). */ +#define CLK_RCPWM_INTERNAL 0x00 +#define CLK_RCPWM_EXTERNAL 0x03 +#define CLK_SLOWCLKEN 0x08 /* Enable 32.768kHz clock. */ +#define CLK_SLOWCLKOUT 0x40 /* Enable slow pulse output. */ + +/* The possible addresses corresponding to CONFIG1 and CONFIG2 pin wirings. */ +#define LM8323_I2C_ADDR00 (0x84 >> 1) /* 1000 010x */ +#define LM8323_I2C_ADDR01 (0x86 >> 1) /* 1000 011x */ +#define LM8323_I2C_ADDR10 (0x88 >> 1) /* 1000 100x */ +#define LM8323_I2C_ADDR11 (0x8A >> 1) /* 1000 101x */ + +/* Key event fifo length */ +#define LM8323_FIFO_LEN 15 + +/* Commands for PWM engine; feed in with PWM_WRITE. */ +/* Load ramp counter from duty cycle field (range 0 - 0xff). */ +#define PWM_SET(v) (0x4000 | ((v) & 0xff)) +/* Go to start of script. */ +#define PWM_GOTOSTART 0x0000 +/* + * Stop engine (generates interrupt). If reset is 1, clear the program + * counter, else leave it. + */ +#define PWM_END(reset) (0xc000 | (!!(reset) << 11)) +/* + * Ramp. If s is 1, divide clock by 512, else divide clock by 16. + * Take t clock scales (up to 63) per step, for n steps (up to 126). + * If u is set, ramp up, else ramp down. + */ +#define PWM_RAMP(s, t, n, u) ((!!(s) << 14) | ((t) & 0x3f) << 8 | \ + ((n) & 0x7f) | ((u) ? 0 : 0x80)) +/* + * Loop (i.e. jump back to pos) for a given number of iterations (up to 63). + * If cnt is zero, execute until PWM_END is encountered. + */ +#define PWM_LOOP(cnt, pos) (0xa000 | (((cnt) & 0x3f) << 7) | \ + ((pos) & 0x3f)) +/* + * Wait for trigger. Argument is a mask of channels, shifted by the channel + * number, e.g. 0xa for channels 3 and 1. Note that channels are numbered + * from 1, not 0. + */ +#define PWM_WAIT_TRIG(chans) (0xe000 | (((chans) & 0x7) << 6)) +/* Send trigger. Argument is same as PWM_WAIT_TRIG. */ +#define PWM_SEND_TRIG(chans) (0xe000 | ((chans) & 0x7)) + +struct lm8323_pwm { + int id; + int fade_time; + int brightness; + int desired_brightness; + bool enabled; + bool running; + /* pwm lock */ + struct mutex lock; + struct work_struct work; + struct led_classdev cdev; + struct lm8323_chip *chip; +}; + +struct lm8323_chip { + /* device lock */ + struct mutex lock; + struct i2c_client *client; + struct work_struct work; + struct input_dev *idev; + bool kp_enabled; + bool pm_suspend; + unsigned keys_down; + char phys[32]; + unsigned short keymap[LM8323_KEYMAP_SIZE]; + int size_x; + int size_y; + int debounce_time; + int active_time; + struct lm8323_pwm pwm[LM8323_NUM_PWMS]; +}; + +#define client_to_lm8323(c) container_of(c, struct lm8323_chip, client) +#define dev_to_lm8323(d) container_of(d, struct lm8323_chip, client->dev) +#define work_to_lm8323(w) container_of(w, struct lm8323_chip, work) +#define cdev_to_pwm(c) container_of(c, struct lm8323_pwm, cdev) +#define work_to_pwm(w) container_of(w, struct lm8323_pwm, work) + +#define LM8323_MAX_DATA 8 + +/* + * To write, we just access the chip's address in write mode, and dump the + * command and data out on the bus. The command byte and data are taken as + * sequential u8s out of varargs, to a maximum of LM8323_MAX_DATA. + */ +static int lm8323_write(struct lm8323_chip *lm, int len, ...) +{ + int ret, i; + va_list ap; + u8 data[LM8323_MAX_DATA]; + + va_start(ap, len); + + if (unlikely(len > LM8323_MAX_DATA)) { + dev_err(&lm->client->dev, "tried to send %d bytes\n", len); + va_end(ap); + return 0; + } + + for (i = 0; i < len; i++) + data[i] = va_arg(ap, int); + + va_end(ap); + + /* + * If the host is asleep while we send the data, we can get a NACK + * back while it wakes up, so try again, once. + */ + ret = i2c_master_send(lm->client, data, len); + if (unlikely(ret == -EREMOTEIO)) + ret = i2c_master_send(lm->client, data, len); + if (unlikely(ret != len)) + dev_err(&lm->client->dev, "sent %d bytes of %d total\n", + len, ret); + + return ret; +} + +/* + * To read, we first send the command byte to the chip and end the transaction, + * then access the chip in read mode, at which point it will send the data. + */ +static int lm8323_read(struct lm8323_chip *lm, u8 cmd, u8 *buf, int len) +{ + int ret; + + /* + * If the host is asleep while we send the byte, we can get a NACK + * back while it wakes up, so try again, once. + */ + ret = i2c_master_send(lm->client, &cmd, 1); + if (unlikely(ret == -EREMOTEIO)) + ret = i2c_master_send(lm->client, &cmd, 1); + if (unlikely(ret != 1)) { + dev_err(&lm->client->dev, "sending read cmd 0x%02x failed\n", + cmd); + return 0; + } + + ret = i2c_master_recv(lm->client, buf, len); + if (unlikely(ret != len)) + dev_err(&lm->client->dev, "wanted %d bytes, got %d\n", + len, ret); + + return ret; +} + +/* + * Set the chip active time (idle time before it enters halt). + */ +static void lm8323_set_active_time(struct lm8323_chip *lm, int time) +{ + lm8323_write(lm, 2, LM8323_CMD_SET_ACTIVE, time >> 2); +} + +/* + * The signals are AT-style: the low 7 bits are the keycode, and the top + * bit indicates the state (1 for down, 0 for up). + */ +static inline u8 lm8323_whichkey(u8 event) +{ + return event & 0x7f; +} + +static inline int lm8323_ispress(u8 event) +{ + return (event & 0x80) ? 1 : 0; +} + +static void process_keys(struct lm8323_chip *lm) +{ + u8 event; + u8 key_fifo[LM8323_FIFO_LEN + 1]; + int old_keys_down = lm->keys_down; + int ret; + int i = 0; + + /* + * Read all key events from the FIFO at once. Next READ_FIFO clears the + * FIFO even if we didn't read all events previously. + */ + ret = lm8323_read(lm, LM8323_CMD_READ_FIFO, key_fifo, LM8323_FIFO_LEN); + + if (ret < 0) { + dev_err(&lm->client->dev, "Failed reading fifo \n"); + return; + } + key_fifo[ret] = 0; + + while ((event = key_fifo[i++])) { + u8 key = lm8323_whichkey(event); + int isdown = lm8323_ispress(event); + unsigned short keycode = lm->keymap[key]; + + dev_vdbg(&lm->client->dev, "key 0x%02x %s\n", + key, isdown ? "down" : "up"); + + if (lm->kp_enabled) { + input_event(lm->idev, EV_MSC, MSC_SCAN, key); + input_report_key(lm->idev, keycode, isdown); + input_sync(lm->idev); + } + + if (isdown) + lm->keys_down++; + else + lm->keys_down--; + } + + /* + * Errata: We need to ensure that the chip never enters halt mode + * during a keypress, so set active time to 0. When it's released, + * we can enter halt again, so set the active time back to normal. + */ + if (!old_keys_down && lm->keys_down) + lm8323_set_active_time(lm, 0); + if (old_keys_down && !lm->keys_down) + lm8323_set_active_time(lm, lm->active_time); +} + +static void lm8323_process_error(struct lm8323_chip *lm) +{ + u8 error; + + if (lm8323_read(lm, LM8323_CMD_READ_ERR, &error, 1) == 1) { + if (error & ERR_FIFOOVER) + dev_vdbg(&lm->client->dev, "fifo overflow!\n"); + if (error & ERR_KEYOVR) + dev_vdbg(&lm->client->dev, + "more than two keys pressed\n"); + if (error & ERR_CMDUNK) + dev_vdbg(&lm->client->dev, + "unknown command submitted\n"); + if (error & ERR_BADPAR) + dev_vdbg(&lm->client->dev, "bad command parameter\n"); + } +} + +static void lm8323_reset(struct lm8323_chip *lm) +{ + /* The docs say we must pass 0xAA as the data byte. */ + lm8323_write(lm, 2, LM8323_CMD_RESET, 0xAA); +} + +static int lm8323_configure(struct lm8323_chip *lm) +{ + int keysize = (lm->size_x << 4) | lm->size_y; + int clock = (CLK_SLOWCLKEN | CLK_RCPWM_EXTERNAL); + int debounce = lm->debounce_time >> 2; + int active = lm->active_time >> 2; + + /* + * Active time must be greater than the debounce time: if it's + * a close-run thing, give ourselves a 12ms buffer. + */ + if (debounce >= active) + active = debounce + 3; + + lm8323_write(lm, 2, LM8323_CMD_WRITE_CFG, 0); + lm8323_write(lm, 2, LM8323_CMD_WRITE_CLOCK, clock); + lm8323_write(lm, 2, LM8323_CMD_SET_KEY_SIZE, keysize); + lm8323_set_active_time(lm, lm->active_time); + lm8323_write(lm, 2, LM8323_CMD_SET_DEBOUNCE, debounce); + lm8323_write(lm, 3, LM8323_CMD_WRITE_PORT_STATE, 0xff, 0xff); + lm8323_write(lm, 3, LM8323_CMD_WRITE_PORT_SEL, 0, 0); + + /* + * Not much we can do about errors at this point, so just hope + * for the best. + */ + + return 0; +} + +static void pwm_done(struct lm8323_pwm *pwm) +{ + mutex_lock(&pwm->lock); + pwm->running = false; + if (pwm->desired_brightness != pwm->brightness) + schedule_work(&pwm->work); + mutex_unlock(&pwm->lock); +} + +/* + * Bottom half: handle the interrupt by posting key events, or dealing with + * errors appropriately. + */ +static void lm8323_work(struct work_struct *work) +{ + struct lm8323_chip *lm = work_to_lm8323(work); + u8 ints; + int i; + + mutex_lock(&lm->lock); + + while ((lm8323_read(lm, LM8323_CMD_READ_INT, &ints, 1) == 1) && ints) { + if (likely(ints & INT_KEYPAD)) + process_keys(lm); + if (ints & INT_ROTATOR) { + /* We don't currently support the rotator. */ + dev_vdbg(&lm->client->dev, "rotator fired\n"); + } + if (ints & INT_ERROR) { + dev_vdbg(&lm->client->dev, "error!\n"); + lm8323_process_error(lm); + } + if (ints & INT_NOINIT) { + dev_err(&lm->client->dev, "chip lost config; " + "reinitialising\n"); + lm8323_configure(lm); + } + for (i = 0; i < LM8323_NUM_PWMS; i++) { + if (ints & (1 << (INT_PWM1 + i))) { + dev_vdbg(&lm->client->dev, + "pwm%d engine completed\n", i); + pwm_done(&lm->pwm[i]); + } + } + } + + mutex_unlock(&lm->lock); +} + +/* + * We cannot use I2C in interrupt context, so we just schedule work. + */ +static irqreturn_t lm8323_irq(int irq, void *data) +{ + struct lm8323_chip *lm = data; + + schedule_work(&lm->work); + + return IRQ_HANDLED; +} + +/* + * Read the chip ID. + */ +static int lm8323_read_id(struct lm8323_chip *lm, u8 *buf) +{ + int bytes; + + bytes = lm8323_read(lm, LM8323_CMD_READ_ID, buf, 2); + if (unlikely(bytes != 2)) + return -EIO; + + return 0; +} + +static void lm8323_write_pwm_one(struct lm8323_pwm *pwm, int pos, u16 cmd) +{ + lm8323_write(pwm->chip, 4, LM8323_CMD_PWM_WRITE, (pos << 2) | pwm->id, + (cmd & 0xff00) >> 8, cmd & 0x00ff); +} + +/* + * Write a script into a given PWM engine, concluding with PWM_END. + * If 'kill' is nonzero, the engine will be shut down at the end + * of the script, producing a zero output. Otherwise the engine + * will be kept running at the final PWM level indefinitely. + */ +static void lm8323_write_pwm(struct lm8323_pwm *pwm, int kill, + int len, const u16 *cmds) +{ + int i; + + for (i = 0; i < len; i++) + lm8323_write_pwm_one(pwm, i, cmds[i]); + + lm8323_write_pwm_one(pwm, i++, PWM_END(kill)); + lm8323_write(pwm->chip, 2, LM8323_CMD_START_PWM, pwm->id); + pwm->running = true; +} + +static void lm8323_pwm_work(struct work_struct *work) +{ + struct lm8323_pwm *pwm = work_to_pwm(work); + int div512, perstep, steps, hz, up, kill; + u16 pwm_cmds[3]; + int num_cmds = 0; + + mutex_lock(&pwm->lock); + + /* + * Do nothing if we're already at the requested level, + * or previous setting is not yet complete. In the latter + * case we will be called again when the previous PWM script + * finishes. + */ + if (pwm->running || pwm->desired_brightness == pwm->brightness) + goto out; + + kill = (pwm->desired_brightness == 0); + up = (pwm->desired_brightness > pwm->brightness); + steps = abs(pwm->desired_brightness - pwm->brightness); + + /* + * Convert time (in ms) into a divisor (512 or 16 on a refclk of + * 32768Hz), and number of ticks per step. + */ + if ((pwm->fade_time / steps) > (32768 / 512)) { + div512 = 1; + hz = 32768 / 512; + } else { + div512 = 0; + hz = 32768 / 16; + } + + perstep = (hz * pwm->fade_time) / (steps * 1000); + + if (perstep == 0) + perstep = 1; + else if (perstep > 63) + perstep = 63; + + while (steps) { + int s; + + s = min(126, steps); + pwm_cmds[num_cmds++] = PWM_RAMP(div512, perstep, s, up); + steps -= s; + } + + lm8323_write_pwm(pwm, kill, num_cmds, pwm_cmds); + pwm->brightness = pwm->desired_brightness; + + out: + mutex_unlock(&pwm->lock); +} + +static void lm8323_pwm_set_brightness(struct led_classdev *led_cdev, + enum led_brightness brightness) +{ + struct lm8323_pwm *pwm = cdev_to_pwm(led_cdev); + struct lm8323_chip *lm = pwm->chip; + + mutex_lock(&pwm->lock); + pwm->desired_brightness = brightness; + mutex_unlock(&pwm->lock); + + if (in_interrupt()) { + schedule_work(&pwm->work); + } else { + /* + * Schedule PWM work as usual unless we are going into suspend + */ + mutex_lock(&lm->lock); + if (likely(!lm->pm_suspend)) + schedule_work(&pwm->work); + else + lm8323_pwm_work(&pwm->work); + mutex_unlock(&lm->lock); + } +} + +static ssize_t lm8323_pwm_show_time(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct led_classdev *led_cdev = dev_get_drvdata(dev); + struct lm8323_pwm *pwm = cdev_to_pwm(led_cdev); + + return sprintf(buf, "%d\n", pwm->fade_time); +} + +static ssize_t lm8323_pwm_store_time(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct led_classdev *led_cdev = dev_get_drvdata(dev); + struct lm8323_pwm *pwm = cdev_to_pwm(led_cdev); + int ret; + unsigned long time; + + ret = strict_strtoul(buf, 10, &time); + /* Numbers only, please. */ + if (ret) + return -EINVAL; + + pwm->fade_time = time; + + return strlen(buf); +} +static DEVICE_ATTR(time, 0644, lm8323_pwm_show_time, lm8323_pwm_store_time); + +static int init_pwm(struct lm8323_chip *lm, int id, struct device *dev, + const char *name) +{ + struct lm8323_pwm *pwm; + + BUG_ON(id > 3); + + pwm = &lm->pwm[id - 1]; + + pwm->id = id; + pwm->fade_time = 0; + pwm->brightness = 0; + pwm->desired_brightness = 0; + pwm->running = false; + pwm->enabled = false; + INIT_WORK(&pwm->work, lm8323_pwm_work); + mutex_init(&pwm->lock); + pwm->chip = lm; + + if (name) { + pwm->cdev.name = name; + pwm->cdev.brightness_set = lm8323_pwm_set_brightness; + if (led_classdev_register(dev, &pwm->cdev) < 0) { + dev_err(dev, "couldn't register PWM %d\n", id); + return -1; + } + if (device_create_file(pwm->cdev.dev, + &dev_attr_time) < 0) { + dev_err(dev, "couldn't register time attribute\n"); + led_classdev_unregister(&pwm->cdev); + return -1; + } + pwm->enabled = true; + } + + return 0; +} + +static struct i2c_driver lm8323_i2c_driver; + +static ssize_t lm8323_show_disable(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct lm8323_chip *lm = dev_get_drvdata(dev); + + return sprintf(buf, "%u\n", !lm->kp_enabled); +} + +static ssize_t lm8323_set_disable(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct lm8323_chip *lm = dev_get_drvdata(dev); + int ret; + unsigned long i; + + ret = strict_strtoul(buf, 10, &i); + + mutex_lock(&lm->lock); + lm->kp_enabled = !i; + mutex_unlock(&lm->lock); + + return count; +} +static DEVICE_ATTR(disable_kp, 0644, lm8323_show_disable, lm8323_set_disable); + +static int __devinit lm8323_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct lm8323_platform_data *pdata = client->dev.platform_data; + struct input_dev *idev; + struct lm8323_chip *lm; + int i, err; + unsigned long tmo; + u8 data[2]; + + if (!pdata || !pdata->size_x || !pdata->size_y) { + dev_err(&client->dev, "missing platform_data\n"); + return -EINVAL; + } + + if (pdata->size_x > 8) { + dev_err(&client->dev, "invalid x size %d specified\n", + pdata->size_x); + return -EINVAL; + } + + if (pdata->size_y > 12) { + dev_err(&client->dev, "invalid y size %d specified\n", + pdata->size_y); + return -EINVAL; + } + + lm = kzalloc(sizeof *lm, GFP_KERNEL); + idev = input_allocate_device(); + if (!lm || !idev) { + err = -ENOMEM; + goto fail1; + } + + i2c_set_clientdata(client, lm); + + lm->client = client; + lm->idev = idev; + mutex_init(&lm->lock); + INIT_WORK(&lm->work, lm8323_work); + + lm->size_x = pdata->size_x; + lm->size_y = pdata->size_y; + dev_vdbg(&client->dev, "Keypad size: %d x %d\n", + lm->size_x, lm->size_y); + + lm->debounce_time = pdata->debounce_time; + lm->active_time = pdata->active_time; + + lm8323_reset(lm); + + /* Nothing's set up to service the IRQ yet, so just spin for max. + * 100ms until we can configure. */ + tmo = jiffies + msecs_to_jiffies(100); + while (lm8323_read(lm, LM8323_CMD_READ_INT, data, 1) == 1) { + if (data[0] & INT_NOINIT) + break; + + if (time_after(jiffies, tmo)) { + dev_err(&client->dev, + "timeout waiting for initialisation\n"); + break; + } + + msleep(1); + } + + lm8323_configure(lm); + + /* If a true probe check the device */ + if (lm8323_read_id(lm, data) != 0) { + dev_err(&client->dev, "device not found\n"); + err = -ENODEV; + goto fail1; + } + + for (i = 0; i < LM8323_NUM_PWMS; i++) { + err = init_pwm(lm, i + 1, &client->dev, pdata->pwm_names[i]); + if (err < 0) + goto fail2; + } + + lm->kp_enabled = true; + err = device_create_file(&client->dev, &dev_attr_disable_kp); + if (err < 0) + goto fail2; + + idev->name = pdata->name ? : "LM8323 keypad"; + snprintf(lm->phys, sizeof(lm->phys), + "%s/input-kp", dev_name(&client->dev)); + idev->phys = lm->phys; + + idev->evbit[0] = BIT(EV_KEY) | BIT(EV_MSC); + __set_bit(MSC_SCAN, idev->mscbit); + for (i = 0; i < LM8323_KEYMAP_SIZE; i++) { + __set_bit(pdata->keymap[i], idev->keybit); + lm->keymap[i] = pdata->keymap[i]; + } + __clear_bit(KEY_RESERVED, idev->keybit); + + if (pdata->repeat) + __set_bit(EV_REP, idev->evbit); + + err = input_register_device(idev); + if (err) { + dev_dbg(&client->dev, "error registering input device\n"); + goto fail3; + } + + err = request_irq(client->irq, lm8323_irq, + IRQF_TRIGGER_FALLING | IRQF_DISABLED, + "lm8323", lm); + if (err) { + dev_err(&client->dev, "could not get IRQ %d\n", client->irq); + goto fail4; + } + + device_init_wakeup(&client->dev, 1); + enable_irq_wake(client->irq); + + return 0; + +fail4: + input_unregister_device(idev); + idev = NULL; +fail3: + device_remove_file(&client->dev, &dev_attr_disable_kp); +fail2: + while (--i >= 0) + if (lm->pwm[i].enabled) + led_classdev_unregister(&lm->pwm[i].cdev); +fail1: + input_free_device(idev); + kfree(lm); + return err; +} + +static int __devexit lm8323_remove(struct i2c_client *client) +{ + struct lm8323_chip *lm = i2c_get_clientdata(client); + int i; + + disable_irq_wake(client->irq); + free_irq(client->irq, lm); + cancel_work_sync(&lm->work); + + input_unregister_device(lm->idev); + + device_remove_file(&lm->client->dev, &dev_attr_disable_kp); + + for (i = 0; i < 3; i++) + if (lm->pwm[i].enabled) + led_classdev_unregister(&lm->pwm[i].cdev); + + kfree(lm); + + return 0; +} + +#ifdef CONFIG_PM +/* + * We don't need to explicitly suspend the chip, as it already switches off + * when there's no activity. + */ +static int lm8323_suspend(struct i2c_client *client, pm_message_t mesg) +{ + struct lm8323_chip *lm = i2c_get_clientdata(client); + int i; + + set_irq_wake(client->irq, 0); + disable_irq(client->irq); + + mutex_lock(&lm->lock); + lm->pm_suspend = true; + mutex_unlock(&lm->lock); + + for (i = 0; i < 3; i++) + if (lm->pwm[i].enabled) + led_classdev_suspend(&lm->pwm[i].cdev); + + return 0; +} + +static int lm8323_resume(struct i2c_client *client) +{ + struct lm8323_chip *lm = i2c_get_clientdata(client); + int i; + + mutex_lock(&lm->lock); + lm->pm_suspend = false; + mutex_unlock(&lm->lock); + + for (i = 0; i < 3; i++) + if (lm->pwm[i].enabled) + led_classdev_resume(&lm->pwm[i].cdev); + + enable_irq(client->irq); + set_irq_wake(client->irq, 1); + + return 0; +} +#else +#define lm8323_suspend NULL +#define lm8323_resume NULL +#endif + +static const struct i2c_device_id lm8323_id[] = { + { "lm8323", 0 }, + { } +}; + +static struct i2c_driver lm8323_i2c_driver = { + .driver = { + .name = "lm8323", + }, + .probe = lm8323_probe, + .remove = __devexit_p(lm8323_remove), + .suspend = lm8323_suspend, + .resume = lm8323_resume, + .id_table = lm8323_id, +}; +MODULE_DEVICE_TABLE(i2c, lm8323_id); + +static int __init lm8323_init(void) +{ + return i2c_add_driver(&lm8323_i2c_driver); +} +module_init(lm8323_init); + +static void __exit lm8323_exit(void) +{ + i2c_del_driver(&lm8323_i2c_driver); +} +module_exit(lm8323_exit); + +MODULE_AUTHOR("Timo O. Karjalainen "); +MODULE_AUTHOR("Daniel Stone"); +MODULE_AUTHOR("Felipe Balbi "); +MODULE_DESCRIPTION("LM8323 keypad driver"); +MODULE_LICENSE("GPL"); + diff --git a/include/linux/i2c/lm8323.h b/include/linux/i2c/lm8323.h new file mode 100644 index 00000000000..478d668bc59 --- /dev/null +++ b/include/linux/i2c/lm8323.h @@ -0,0 +1,46 @@ +/* + * lm8323.h - Configuration for LM8323 keypad driver. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License only). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __LINUX_LM8323_H +#define __LINUX_LM8323_H + +#include + +/* + * Largest keycode that the chip can send, plus one, + * so keys can be mapped directly at the index of the + * LM8323 keycode instead of subtracting one. + */ +#define LM8323_KEYMAP_SIZE (0x7f + 1) + +#define LM8323_NUM_PWMS 3 + +struct lm8323_platform_data { + int debounce_time; /* Time to watch for key bouncing, in ms. */ + int active_time; /* Idle time until sleep, in ms. */ + + int size_x; + int size_y; + bool repeat; + const unsigned short *keymap; + + const char *pwm_names[LM8323_NUM_PWMS]; + + const char *name; /* Device name. */ +}; + +#endif /* __LINUX_LM8323_H */ -- cgit v1.2.3-70-g09d2 From ca1b96e00ab5d1b0838965834469a0284c81a517 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Sun, 17 May 2009 19:12:21 +0200 Subject: ide: replace special_t typedef by IDE_SFLAG_* flags Replace: - special_t typedef by IDE_SFLAG_* flags - 'special_t special' ide_drive_t's field by 'u8 special_flags' one There should be no functional changes caused by this patch. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/ide-disk.c | 4 ++-- drivers/ide/ide-eh.c | 9 ++++----- drivers/ide/ide-io.c | 21 +++++++++++---------- drivers/ide/ide-probe.c | 6 +++--- drivers/ide/ide-taskfile.c | 2 +- drivers/ide/siimage.c | 4 ++-- include/linux/ide.h | 21 ++++++--------------- 7 files changed, 29 insertions(+), 38 deletions(-) (limited to 'include') diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index c2438804d3c..d345f5f23f0 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -428,14 +428,14 @@ static int set_multcount(ide_drive_t *drive, int arg) if (arg < 0 || arg > (drive->id[ATA_ID_MAX_MULTSECT] & 0xff)) return -EINVAL; - if (drive->special.b.set_multmode) + if (drive->special_flags & IDE_SFLAG_SET_MULTMODE) return -EBUSY; rq = blk_get_request(drive->queue, READ, __GFP_WAIT); rq->cmd_type = REQ_TYPE_ATA_TASKFILE; drive->mult_req = arg; - drive->special.b.set_multmode = 1; + drive->special_flags |= IDE_SFLAG_SET_MULTMODE; error = blk_execute_rq(drive->queue, NULL, rq, 0); blk_put_request(rq); diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c index 5d5fb961b5c..39d589254d4 100644 --- a/drivers/ide/ide-eh.c +++ b/drivers/ide/ide-eh.c @@ -52,7 +52,7 @@ static ide_startstop_t ide_ata_error(ide_drive_t *drive, struct request *rq, } if ((rq->errors & ERROR_RECAL) == ERROR_RECAL) - drive->special.b.recalibrate = 1; + drive->special_flags |= IDE_SFLAG_RECALIBRATE; ++rq->errors; @@ -268,9 +268,8 @@ static void ide_disk_pre_reset(ide_drive_t *drive) { int legacy = (drive->id[ATA_ID_CFS_ENABLE_2] & 0x0400) ? 0 : 1; - drive->special.all = 0; - drive->special.b.set_geometry = legacy; - drive->special.b.recalibrate = legacy; + drive->special_flags = + legacy ? (IDE_SFLAG_SET_GEOMETRY | IDE_SFLAG_RECALIBRATE) : 0; drive->mult_count = 0; drive->dev_flags &= ~IDE_DFLAG_PARKED; @@ -280,7 +279,7 @@ static void ide_disk_pre_reset(ide_drive_t *drive) drive->mult_req = 0; if (drive->mult_req != drive->mult_count) - drive->special.b.set_multmode = 1; + drive->special_flags |= IDE_SFLAG_SET_MULTMODE; } static void pre_reset(ide_drive_t *drive) diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 18557683ed5..644d7b4454a 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -194,14 +194,14 @@ static void ide_tf_set_setmult_cmd(ide_drive_t *drive, struct ide_taskfile *tf) static ide_startstop_t do_special(ide_drive_t *drive) { - special_t *s = &drive->special; struct ide_cmd cmd; #ifdef DEBUG - printk(KERN_DEBUG "%s: %s: 0x%02x\n", drive->name, __func__, s->all); + printk(KERN_DEBUG "%s: %s: 0x%02x\n", drive->name, __func__, + drive->special_flags); #endif if (drive->media != ide_disk) { - s->all = 0; + drive->special_flags = 0; drive->mult_req = 0; return ide_stopped; } @@ -209,14 +209,14 @@ static ide_startstop_t do_special(ide_drive_t *drive) memset(&cmd, 0, sizeof(cmd)); cmd.protocol = ATA_PROT_NODATA; - if (s->b.set_geometry) { - s->b.set_geometry = 0; + if (drive->special_flags & IDE_SFLAG_SET_GEOMETRY) { + drive->special_flags &= ~IDE_SFLAG_SET_GEOMETRY; ide_tf_set_specify_cmd(drive, &cmd.tf); - } else if (s->b.recalibrate) { - s->b.recalibrate = 0; + } else if (drive->special_flags & IDE_SFLAG_RECALIBRATE) { + drive->special_flags &= ~IDE_SFLAG_RECALIBRATE; ide_tf_set_restore_cmd(drive, &cmd.tf); - } else if (s->b.set_multmode) { - s->b.set_multmode = 0; + } else if (drive->special_flags & IDE_SFLAG_SET_MULTMODE) { + drive->special_flags &= ~IDE_SFLAG_SET_MULTMODE; ide_tf_set_setmult_cmd(drive, &cmd.tf); } else BUG(); @@ -339,7 +339,8 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) printk(KERN_ERR "%s: drive not ready for command\n", drive->name); return startstop; } - if (!drive->special.all) { + + if (drive->special_flags == 0) { struct ide_driver *drv; /* diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index b609a581df4..727a67109ff 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -97,7 +97,7 @@ static void ide_disk_init_mult_count(ide_drive_t *drive) drive->mult_req = id[ATA_ID_MULTSECT] & 0xff; if (drive->mult_req) - drive->special.b.set_multmode = 1; + drive->special_flags |= IDE_SFLAG_SET_MULTMODE; } } @@ -1138,8 +1138,8 @@ static void ide_port_init_devices_data(ide_hwif_t *hwif) drive->hwif = hwif; drive->ready_stat = ATA_DRDY; drive->bad_wstat = BAD_W_STAT; - drive->special.b.recalibrate = 1; - drive->special.b.set_geometry = 1; + drive->special_flags = IDE_SFLAG_RECALIBRATE | + IDE_SFLAG_SET_GEOMETRY; drive->name[0] = 'h'; drive->name[1] = 'd'; drive->name[2] = 'a' + j; diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index f400eb4d4af..8cab3c26acd 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -166,7 +166,7 @@ static ide_startstop_t task_no_data_intr(ide_drive_t *drive) if (!OK_STAT(stat, ATA_DRDY, BAD_STAT)) { if (custom && tf->command == ATA_CMD_SET_MULTI) { drive->mult_req = drive->mult_count = 0; - drive->special.b.recalibrate = 1; + drive->special_flags |= IDE_SFLAG_RECALIBRATE; (void)ide_dump_status(drive, __func__, stat); return ide_stopped; } else if (custom && tf->command == ATA_CMD_INIT_DEV_PARAMS) { diff --git a/drivers/ide/siimage.c b/drivers/ide/siimage.c index e4973cd1fba..bd82d228608 100644 --- a/drivers/ide/siimage.c +++ b/drivers/ide/siimage.c @@ -451,8 +451,8 @@ static int sil_sata_reset_poll(ide_drive_t *drive) static void sil_sata_pre_reset(ide_drive_t *drive) { if (drive->media == ide_disk) { - drive->special.b.set_geometry = 0; - drive->special.b.recalibrate = 0; + drive->special_flags &= + ~(IDE_SFLAG_SET_GEOMETRY | IDE_SFLAG_RECALIBRATE); } } diff --git a/include/linux/ide.h b/include/linux/ide.h index 34c128f0a33..fc61328a4cd 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -218,21 +218,12 @@ static inline void ide_std_init_ports(hw_regs_t *hw, /* * Special Driver Flags - * - * set_geometry : respecify drive geometry - * recalibrate : seek to cyl 0 - * set_multmode : set multmode count - * reserved : unused */ -typedef union { - unsigned all : 8; - struct { - unsigned set_geometry : 1; - unsigned recalibrate : 1; - unsigned set_multmode : 1; - unsigned reserved : 5; - } b; -} special_t; +enum { + IDE_SFLAG_SET_GEOMETRY = (1 << 0), + IDE_SFLAG_RECALIBRATE = (1 << 1), + IDE_SFLAG_SET_MULTMODE = (1 << 2), +}; /* * Status returned from various ide_ functions @@ -530,7 +521,7 @@ struct ide_drive_s { unsigned long sleep; /* sleep until this time */ unsigned long timeout; /* max time to wait for irq */ - special_t special; /* special action flags */ + u8 special_flags; /* special action flags */ u8 select; /* basic drive/head select reg value */ u8 retry_pio; /* retrying dma capable host in pio */ -- cgit v1.2.3-70-g09d2 From 29e52cf793ded6bece50de50e738596f94f07d9f Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Sun, 17 May 2009 19:12:22 +0200 Subject: ide: remove chipset field from hw_regs_t * Convert host drivers that still use hw_regs_t's chipset field to use the one in struct ide_port_info instead. * Move special handling of ide_pci chipset type from ide_hw_configure() to ide_init_port(). * Remove chipset field from hw_regs_t. While at it: - remove stale comment in delkin_cb.c There should be no functional changes caused by this patch. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/at91_ide.c | 2 +- drivers/ide/au1xxx-ide.c | 2 +- drivers/ide/buddha.c | 3 +-- drivers/ide/cmd640.c | 2 -- drivers/ide/delkin_cb.c | 2 +- drivers/ide/falconide.c | 3 +-- drivers/ide/gayle.c | 3 +-- drivers/ide/icside.c | 3 ++- drivers/ide/ide-4drives.c | 2 +- drivers/ide/ide-cs.c | 2 +- drivers/ide/ide-generic.c | 3 +-- drivers/ide/ide-h8300.c | 2 +- drivers/ide/ide-legacy.c | 1 - drivers/ide/ide-pnp.c | 2 +- drivers/ide/ide-probe.c | 4 +--- drivers/ide/ide_platform.c | 3 +-- drivers/ide/macide.c | 3 +-- drivers/ide/palm_bk3710.c | 2 +- drivers/ide/q40ide.c | 3 +-- drivers/ide/rapide.c | 2 +- drivers/ide/scc_pata.c | 2 +- drivers/ide/setup-pci.c | 1 - drivers/ide/sgiioc4.c | 1 - include/linux/ide.h | 1 - 24 files changed, 20 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/drivers/ide/at91_ide.c b/drivers/ide/at91_ide.c index 403d0e4265d..8d39cc9bdf9 100644 --- a/drivers/ide/at91_ide.c +++ b/drivers/ide/at91_ide.c @@ -216,6 +216,7 @@ static const struct ide_port_info at91_ide_port_info __initdata = { .host_flags = IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA | IDE_HFLAG_SINGLE | IDE_HFLAG_NO_IO_32BIT | IDE_HFLAG_UNMASK_IRQS, .pio_mask = ATA_PIO6, + .chipset = ide_generic, }; /* @@ -304,7 +305,6 @@ static int __init at91_ide_probe(struct platform_device *pdev) ide_std_init_ports(&hw, tf_base, ctl_base + 6); hw.irq = board->irq_pin; - hw.chipset = ide_generic; hw.dev = &pdev->dev; host = ide_host_alloc(&at91_ide_port_info, hws); diff --git a/drivers/ide/au1xxx-ide.c b/drivers/ide/au1xxx-ide.c index 46013644c96..9b31f830e2f 100644 --- a/drivers/ide/au1xxx-ide.c +++ b/drivers/ide/au1xxx-ide.c @@ -499,6 +499,7 @@ static const struct ide_port_info au1xxx_port_info = { #ifdef CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA .mwdma_mask = ATA_MWDMA2, #endif + .chipset = ide_au1xxx, }; static int au_ide_probe(struct platform_device *dev) @@ -548,7 +549,6 @@ static int au_ide_probe(struct platform_device *dev) auide_setup_ports(&hw, ahwif); hw.irq = ahwif->irq; hw.dev = &dev->dev; - hw.chipset = ide_au1xxx; ret = ide_host_add(&au1xxx_port_info, hws, &host); if (ret) diff --git a/drivers/ide/buddha.c b/drivers/ide/buddha.c index d028f8864bc..9aa2cd9be31 100644 --- a/drivers/ide/buddha.c +++ b/drivers/ide/buddha.c @@ -139,13 +139,12 @@ static void __init buddha_setup_ports(hw_regs_t *hw, unsigned long base, hw->irq = IRQ_AMIGA_PORTS; hw->ack_intr = ack_intr; - - hw->chipset = ide_generic; } static const struct ide_port_info buddha_port_info = { .host_flags = IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA, .irq_flags = IRQF_SHARED, + .chipset = ide_generic, }; /* diff --git a/drivers/ide/cmd640.c b/drivers/ide/cmd640.c index 8890276fef7..e862a2503ab 100644 --- a/drivers/ide/cmd640.c +++ b/drivers/ide/cmd640.c @@ -762,11 +762,9 @@ static int __init cmd640x_init(void) ide_std_init_ports(&hw[0], 0x1f0, 0x3f6); hw[0].irq = 14; - hw[0].chipset = ide_cmd640; ide_std_init_ports(&hw[1], 0x170, 0x376); hw[1].irq = 15; - hw[1].chipset = ide_cmd640; printk(KERN_INFO "cmd640: buggy cmd640%c interface on %s, config=0x%02x" "\n", 'a' + cmd640_chip_version - 1, bus_type, cfr); diff --git a/drivers/ide/delkin_cb.c b/drivers/ide/delkin_cb.c index f153b95619b..a0de834a81c 100644 --- a/drivers/ide/delkin_cb.c +++ b/drivers/ide/delkin_cb.c @@ -68,6 +68,7 @@ static const struct ide_port_info delkin_cb_port_info = { IDE_HFLAG_NO_DMA, .irq_flags = IRQF_SHARED, .init_chipset = delkin_cb_init_chipset, + .chipset = ide_pci, }; static int __devinit @@ -97,7 +98,6 @@ delkin_cb_probe (struct pci_dev *dev, const struct pci_device_id *id) ide_std_init_ports(&hw, base + 0x10, base + 0x1e); hw.irq = dev->irq; hw.dev = &dev->dev; - hw.chipset = ide_pci; /* this enables IRQ sharing */ rc = ide_host_add(&delkin_cb_port_info, hws, &host); if (rc) diff --git a/drivers/ide/falconide.c b/drivers/ide/falconide.c index 0e2df6755ec..770cfa67bdc 100644 --- a/drivers/ide/falconide.c +++ b/drivers/ide/falconide.c @@ -111,6 +111,7 @@ static const struct ide_port_info falconide_port_info = { .host_flags = IDE_HFLAG_MMIO | IDE_HFLAG_SERIALIZE | IDE_HFLAG_NO_DMA, .irq_flags = IRQF_SHARED, + .chipset = ide_generic, }; static void __init falconide_setup_ports(hw_regs_t *hw) @@ -128,8 +129,6 @@ static void __init falconide_setup_ports(hw_regs_t *hw) hw->irq = IRQ_MFP_IDE; hw->ack_intr = NULL; - - hw->chipset = ide_generic; } /* diff --git a/drivers/ide/gayle.c b/drivers/ide/gayle.c index c7119516c5a..71db2f9c336 100644 --- a/drivers/ide/gayle.c +++ b/drivers/ide/gayle.c @@ -106,14 +106,13 @@ static void __init gayle_setup_ports(hw_regs_t *hw, unsigned long base, hw->irq = IRQ_AMIGA_PORTS; hw->ack_intr = ack_intr; - - hw->chipset = ide_generic; } static const struct ide_port_info gayle_port_info = { .host_flags = IDE_HFLAG_MMIO | IDE_HFLAG_SERIALIZE | IDE_HFLAG_NO_DMA, .irq_flags = IRQF_SHARED, + .chipset = ide_generic, }; /* diff --git a/drivers/ide/icside.c b/drivers/ide/icside.c index 36da913cc55..6352a44ed17 100644 --- a/drivers/ide/icside.c +++ b/drivers/ide/icside.c @@ -398,11 +398,11 @@ static void icside_setup_ports(hw_regs_t *hw, void __iomem *base, hw->irq = ec->irq; hw->dev = &ec->dev; - hw->chipset = ide_acorn; } static const struct ide_port_info icside_v5_port_info = { .host_flags = IDE_HFLAG_NO_DMA, + .chipset = ide_acorn, }; static int __devinit @@ -457,6 +457,7 @@ static const struct ide_port_info icside_v6_port_info __initdata = { .host_flags = IDE_HFLAG_SERIALIZE | IDE_HFLAG_MMIO, .mwdma_mask = ATA_MWDMA2, .swdma_mask = ATA_SWDMA2, + .chipset = ide_acorn, }; static int __devinit diff --git a/drivers/ide/ide-4drives.c b/drivers/ide/ide-4drives.c index 78aca75a2c4..617ca7a5ec8 100644 --- a/drivers/ide/ide-4drives.c +++ b/drivers/ide/ide-4drives.c @@ -25,6 +25,7 @@ static const struct ide_port_info ide_4drives_port_info = { .port_ops = &ide_4drives_port_ops, .host_flags = IDE_HFLAG_SERIALIZE | IDE_HFLAG_NO_DMA | IDE_HFLAG_4DRIVES, + .chipset = ide_4drives, }; static int __init ide_4drives_init(void) @@ -52,7 +53,6 @@ static int __init ide_4drives_init(void) ide_std_init_ports(&hw, base, ctl); hw.irq = 14; - hw.chipset = ide_4drives; return ide_host_add(&ide_4drives_port_info, hws, NULL); } diff --git a/drivers/ide/ide-cs.c b/drivers/ide/ide-cs.c index 9e47f3529d5..43d09dcae28 100644 --- a/drivers/ide/ide-cs.c +++ b/drivers/ide/ide-cs.c @@ -155,6 +155,7 @@ static const struct ide_port_info idecs_port_info = { .port_ops = &idecs_port_ops, .host_flags = IDE_HFLAG_NO_DMA, .irq_flags = IRQF_SHARED, + .chipset = ide_pci, }; static struct ide_host *idecs_register(unsigned long io, unsigned long ctl, @@ -181,7 +182,6 @@ static struct ide_host *idecs_register(unsigned long io, unsigned long ctl, memset(&hw, 0, sizeof(hw)); ide_std_init_ports(&hw, io, ctl); hw.irq = irq; - hw.chipset = ide_pci; hw.dev = &handle->dev; rc = ide_host_add(&idecs_port_info, hws, &host); diff --git a/drivers/ide/ide-generic.c b/drivers/ide/ide-generic.c index 7812ca0be13..0427759d018 100644 --- a/drivers/ide/ide-generic.c +++ b/drivers/ide/ide-generic.c @@ -29,6 +29,7 @@ MODULE_PARM_DESC(probe_mask, "probe mask for legacy ISA IDE ports"); static const struct ide_port_info ide_generic_port_info = { .host_flags = IDE_HFLAG_NO_DMA, + .chipset = ide_generic, }; #ifdef CONFIG_ARM @@ -132,8 +133,6 @@ static int __init ide_generic_init(void) #else hw.irq = legacy_irqs[i]; #endif - hw.chipset = ide_generic; - rc = ide_host_add(&ide_generic_port_info, hws, NULL); if (rc) { release_region(io_addr + 0x206, 1); diff --git a/drivers/ide/ide-h8300.c b/drivers/ide/ide-h8300.c index c06ebdc4a13..40eff6c9759 100644 --- a/drivers/ide/ide-h8300.c +++ b/drivers/ide/ide-h8300.c @@ -73,12 +73,12 @@ static inline void hw_setup(hw_regs_t *hw) hw->io_ports_array[i] = CONFIG_H8300_IDE_BASE + H8300_IDE_GAP*i; hw->io_ports.ctl_addr = CONFIG_H8300_IDE_ALT; hw->irq = EXT_IRQ0 + CONFIG_H8300_IDE_IRQ; - hw->chipset = ide_generic; } static const struct ide_port_info h8300_port_info = { .tp_ops = &h8300_tp_ops, .host_flags = IDE_HFLAG_NO_IO_32BIT | IDE_HFLAG_NO_DMA, + .chipset = ide_generic, }; static int __init h8300_ide_init(void) diff --git a/drivers/ide/ide-legacy.c b/drivers/ide/ide-legacy.c index 8c5dcbf2254..0c5b29c56cb 100644 --- a/drivers/ide/ide-legacy.c +++ b/drivers/ide/ide-legacy.c @@ -33,7 +33,6 @@ static void ide_legacy_init_one(hw_regs_t **hws, hw_regs_t *hw, ide_std_init_ports(hw, base, ctl); hw->irq = irq; - hw->chipset = d->chipset; hw->config = config; hws[port_no] = hw; diff --git a/drivers/ide/ide-pnp.c b/drivers/ide/ide-pnp.c index 6e80b774e88..47043fda239 100644 --- a/drivers/ide/ide-pnp.c +++ b/drivers/ide/ide-pnp.c @@ -29,6 +29,7 @@ static struct pnp_device_id idepnp_devices[] = { static const struct ide_port_info ide_pnp_port_info = { .host_flags = IDE_HFLAG_NO_DMA, + .chipset = ide_generic, }; static int idepnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id) @@ -62,7 +63,6 @@ static int idepnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id) memset(&hw, 0, sizeof(hw)); ide_std_init_ports(&hw, base, ctl); hw.irq = pnp_irq(dev, 0); - hw.chipset = ide_generic; rc = ide_host_add(&ide_pnp_port_info, hws, &host); if (rc) diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 727a67109ff..f17ba1932ad 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -1048,8 +1048,7 @@ static void ide_init_port(ide_hwif_t *hwif, unsigned int port, { hwif->channel = port; - if (d->chipset) - hwif->chipset = d->chipset; + hwif->chipset = d->chipset ? d->chipset : ide_pci; if (d->init_iops) d->init_iops(hwif); @@ -1178,7 +1177,6 @@ static void ide_init_port_hw(ide_hwif_t *hwif, hw_regs_t *hw) { memcpy(&hwif->io_ports, &hw->io_ports, sizeof(hwif->io_ports)); hwif->irq = hw->irq; - hwif->chipset = hw->chipset; hwif->dev = hw->dev; hwif->gendev.parent = hw->parent ? hw->parent : hw->dev; hwif->ack_intr = hw->ack_intr; diff --git a/drivers/ide/ide_platform.c b/drivers/ide/ide_platform.c index 051b4ab0f35..813653362a2 100644 --- a/drivers/ide/ide_platform.c +++ b/drivers/ide/ide_platform.c @@ -40,12 +40,11 @@ static void __devinit plat_ide_setup_ports(hw_regs_t *hw, hw->io_ports.ctl_addr = (unsigned long)ctrl; hw->irq = irq; - - hw->chipset = ide_generic; } static const struct ide_port_info platform_ide_port_info = { .host_flags = IDE_HFLAG_NO_DMA, + .chipset = ide_generic, }; static int __devinit plat_ide_probe(struct platform_device *pdev) diff --git a/drivers/ide/macide.c b/drivers/ide/macide.c index 4b1718e8328..3af9e96da61 100644 --- a/drivers/ide/macide.c +++ b/drivers/ide/macide.c @@ -76,13 +76,12 @@ static void __init macide_setup_ports(hw_regs_t *hw, unsigned long base, hw->irq = irq; hw->ack_intr = ack_intr; - - hw->chipset = ide_generic; } static const struct ide_port_info macide_port_info = { .host_flags = IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA, .irq_flags = IRQF_SHARED, + .chipset = ide_generic, }; static const char *mac_ide_name[] = diff --git a/drivers/ide/palm_bk3710.c b/drivers/ide/palm_bk3710.c index 09d813d313f..a455c25f43c 100644 --- a/drivers/ide/palm_bk3710.c +++ b/drivers/ide/palm_bk3710.c @@ -306,6 +306,7 @@ static struct ide_port_info __devinitdata palm_bk3710_port_info = { .host_flags = IDE_HFLAG_MMIO, .pio_mask = ATA_PIO4, .mwdma_mask = ATA_MWDMA2, + .chipset = ide_palm3710, }; static int __init palm_bk3710_probe(struct platform_device *pdev) @@ -363,7 +364,6 @@ static int __init palm_bk3710_probe(struct platform_device *pdev) (base + IDE_PALM_ATA_PRI_CTL_OFFSET); hw.irq = irq->start; hw.dev = &pdev->dev; - hw.chipset = ide_palm3710; palm_bk3710_port_info.udma_mask = rate < 100000000 ? ATA_UDMA4 : ATA_UDMA5; diff --git a/drivers/ide/q40ide.c b/drivers/ide/q40ide.c index c7934667924..7488d4ff3d7 100644 --- a/drivers/ide/q40ide.c +++ b/drivers/ide/q40ide.c @@ -70,8 +70,6 @@ static void q40_ide_setup_ports(hw_regs_t *hw, unsigned long base, hw->irq = irq; hw->ack_intr = ack_intr; - - hw->chipset = ide_generic; } static void q40ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd, @@ -119,6 +117,7 @@ static const struct ide_port_info q40ide_port_info = { .tp_ops = &q40ide_tp_ops, .host_flags = IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA, .irq_flags = IRQF_SHARED, + .chipset = ide_generic, }; /* diff --git a/drivers/ide/rapide.c b/drivers/ide/rapide.c index d5003ca6980..bd4d7a8a666 100644 --- a/drivers/ide/rapide.c +++ b/drivers/ide/rapide.c @@ -13,6 +13,7 @@ static const struct ide_port_info rapide_port_info = { .host_flags = IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA, + .chipset = ide_generic, }; static void rapide_setup_ports(hw_regs_t *hw, void __iomem *base, @@ -49,7 +50,6 @@ rapide_probe(struct expansion_card *ec, const struct ecard_id *id) memset(&hw, 0, sizeof(hw)); rapide_setup_ports(&hw, base, base + 0x818, 1 << 6, ec->irq); - hw.chipset = ide_generic; hw.dev = &ec->dev; ret = ide_host_add(&rapide_port_info, hws, &host); diff --git a/drivers/ide/scc_pata.c b/drivers/ide/scc_pata.c index 5be41f25204..9e3aef31733 100644 --- a/drivers/ide/scc_pata.c +++ b/drivers/ide/scc_pata.c @@ -567,7 +567,6 @@ static int scc_ide_setup_pci_device(struct pci_dev *dev, hw.io_ports_array[i] = ports->dma + 0x20 + i * 4; hw.irq = dev->irq; hw.dev = &dev->dev; - hw.chipset = ide_pci; rc = ide_host_add(d, hws, &host); if (rc) @@ -823,6 +822,7 @@ static const struct ide_port_info scc_chipset __devinitdata = { .host_flags = IDE_HFLAG_SINGLE, .irq_flags = IRQF_SHARED, .pio_mask = ATA_PIO4, + .chipset = ide_pci, }; /** diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c index 7a3a12d6e63..82519ddc910 100644 --- a/drivers/ide/setup-pci.c +++ b/drivers/ide/setup-pci.c @@ -344,7 +344,6 @@ static int ide_hw_configure(struct pci_dev *dev, const struct ide_port_info *d, memset(hw, 0, sizeof(*hw)); hw->dev = &dev->dev; - hw->chipset = d->chipset ? d->chipset : ide_pci; ide_std_init_ports(hw, base, ctl | 2); return 0; diff --git a/drivers/ide/sgiioc4.c b/drivers/ide/sgiioc4.c index e5d2a48a84d..676d41c7add 100644 --- a/drivers/ide/sgiioc4.c +++ b/drivers/ide/sgiioc4.c @@ -575,7 +575,6 @@ sgiioc4_ide_setup_pci_device(struct pci_dev *dev) memset(&hw, 0, sizeof(hw)); sgiioc4_init_hwif_ports(&hw, cmd_base, ctl, irqport); hw.irq = dev->irq; - hw.chipset = ide_pci; hw.dev = &dev->dev; /* Initializing chipset IRQ Registers */ diff --git a/include/linux/ide.h b/include/linux/ide.h index fc61328a4cd..9652edbd26a 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -186,7 +186,6 @@ typedef struct hw_regs_s { int irq; /* our irq number */ ide_ack_intr_t *ack_intr; /* acknowledge interrupt */ - hwif_chipset_t chipset; struct device *dev, *parent; unsigned long config; } hw_regs_t; -- cgit v1.2.3-70-g09d2 From dca3983059a4481e4ae97bbf0ac4b4c21429e1a5 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Sun, 17 May 2009 19:12:24 +0200 Subject: ide: pass number of ports to ide_host_{alloc,add}() (v2) Pass number of ports to ide_host_{alloc,add}() and then update all users accordingly. v2: - drop no longer needed NULL initializers in buddha.c, cmd640.c and gayle.c (noticed by Sergei) There should be no functional changes caused by this patch. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/at91_ide.c | 5 ++--- drivers/ide/au1xxx-ide.c | 4 ++-- drivers/ide/buddha.c | 4 ++-- drivers/ide/cmd640.c | 5 +++-- drivers/ide/cs5520.c | 4 ++-- drivers/ide/delkin_cb.c | 4 ++-- drivers/ide/falconide.c | 4 ++-- drivers/ide/gayle.c | 4 ++-- drivers/ide/icside.c | 8 ++++---- drivers/ide/ide-4drives.c | 4 ++-- drivers/ide/ide-cs.c | 4 ++-- drivers/ide/ide-generic.c | 4 ++-- drivers/ide/ide-h8300.c | 4 ++-- drivers/ide/ide-legacy.c | 4 ++-- drivers/ide/ide-pnp.c | 4 ++-- drivers/ide/ide-probe.c | 9 +++++---- drivers/ide/ide_platform.c | 4 ++-- drivers/ide/macide.c | 4 ++-- drivers/ide/palm_bk3710.c | 4 ++-- drivers/ide/pmac.c | 4 ++-- drivers/ide/q40ide.c | 4 ++-- drivers/ide/rapide.c | 4 ++-- drivers/ide/scc_pata.c | 4 ++-- drivers/ide/setup-pci.c | 6 +++--- drivers/ide/sgiioc4.c | 4 ++-- drivers/ide/tx4938ide.c | 5 ++--- drivers/ide/tx4939ide.c | 5 ++--- include/linux/ide.h | 5 +++-- 28 files changed, 64 insertions(+), 64 deletions(-) (limited to 'include') diff --git a/drivers/ide/at91_ide.c b/drivers/ide/at91_ide.c index 8d39cc9bdf9..11fe1ffdff7 100644 --- a/drivers/ide/at91_ide.c +++ b/drivers/ide/at91_ide.c @@ -247,8 +247,7 @@ irqreturn_t at91_irq_handler(int irq, void *dev_id) static int __init at91_ide_probe(struct platform_device *pdev) { int ret; - hw_regs_t hw; - hw_regs_t *hws[] = { &hw, NULL, NULL, NULL }; + hw_regs_t hw, *hws[] = { &hw }; struct ide_host *host; struct resource *res; unsigned long tf_base = 0, ctl_base = 0; @@ -307,7 +306,7 @@ static int __init at91_ide_probe(struct platform_device *pdev) hw.irq = board->irq_pin; hw.dev = &pdev->dev; - host = ide_host_alloc(&at91_ide_port_info, hws); + host = ide_host_alloc(&at91_ide_port_info, hws, 1); if (!host) { perr("failed to allocate ide host\n"); return -ENOMEM; diff --git a/drivers/ide/au1xxx-ide.c b/drivers/ide/au1xxx-ide.c index 9b31f830e2f..32f5be68601 100644 --- a/drivers/ide/au1xxx-ide.c +++ b/drivers/ide/au1xxx-ide.c @@ -508,7 +508,7 @@ static int au_ide_probe(struct platform_device *dev) struct resource *res; struct ide_host *host; int ret = 0; - hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL }; + hw_regs_t hw, *hws[] = { &hw }; #if defined(CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA) char *mode = "MWDMA2"; @@ -550,7 +550,7 @@ static int au_ide_probe(struct platform_device *dev) hw.irq = ahwif->irq; hw.dev = &dev->dev; - ret = ide_host_add(&au1xxx_port_info, hws, &host); + ret = ide_host_add(&au1xxx_port_info, hws, 1, &host); if (ret) goto out; diff --git a/drivers/ide/buddha.c b/drivers/ide/buddha.c index 9aa2cd9be31..0450652cdab 100644 --- a/drivers/ide/buddha.c +++ b/drivers/ide/buddha.c @@ -160,7 +160,7 @@ static int __init buddha_init(void) while ((z = zorro_find_device(ZORRO_WILDCARD, z))) { unsigned long board; - hw_regs_t hw[MAX_NUM_HWIFS], *hws[] = { NULL, NULL, NULL, NULL }; + hw_regs_t hw[MAX_NUM_HWIFS], *hws[MAX_NUM_HWIFS]; if (z->id == ZORRO_PROD_INDIVIDUAL_COMPUTERS_BUDDHA) { buddha_num_hwifs = BUDDHA_NUM_HWIFS; @@ -224,7 +224,7 @@ fail_base2: hws[i] = &hw[i]; } - ide_host_add(&buddha_port_info, hws, NULL); + ide_host_add(&buddha_port_info, hws, i, NULL); } return 0; diff --git a/drivers/ide/cmd640.c b/drivers/ide/cmd640.c index e862a2503ab..edb3a7a35c8 100644 --- a/drivers/ide/cmd640.c +++ b/drivers/ide/cmd640.c @@ -708,7 +708,7 @@ static int __init cmd640x_init(void) int second_port_cmd640 = 0, rc; const char *bus_type, *port2; u8 b, cfr; - hw_regs_t hw[2], *hws[] = { NULL, NULL, NULL, NULL }; + hw_regs_t hw[2], *hws[2]; if (cmd640_vlb && probe_for_cmd640_vlb()) { bus_type = "VLB"; @@ -822,7 +822,8 @@ static int __init cmd640x_init(void) cmd640_dump_regs(); #endif - return ide_host_add(&cmd640_port_info, hws, NULL); + return ide_host_add(&cmd640_port_info, hws, second_port_cmd640 ? 2 : 1, + NULL); } module_param_named(probe_vlb, cmd640_vlb, bool, 0); diff --git a/drivers/ide/cs5520.c b/drivers/ide/cs5520.c index 87987a7d36c..a9023d7843f 100644 --- a/drivers/ide/cs5520.c +++ b/drivers/ide/cs5520.c @@ -110,7 +110,7 @@ static const struct ide_port_info cyrix_chipset __devinitdata = { static int __devinit cs5520_init_one(struct pci_dev *dev, const struct pci_device_id *id) { const struct ide_port_info *d = &cyrix_chipset; - hw_regs_t hw[4], *hws[] = { NULL, NULL, NULL, NULL }; + hw_regs_t hw[2], *hws[] = { NULL, NULL }; ide_setup_pci_noise(dev, d); @@ -136,7 +136,7 @@ static int __devinit cs5520_init_one(struct pci_dev *dev, const struct pci_devic ide_pci_setup_ports(dev, d, &hw[0], &hws[0]); hw[0].irq = 14; - return ide_host_add(d, hws, NULL); + return ide_host_add(d, hws, 2, NULL); } static const struct pci_device_id cs5520_pci_tbl[] = { diff --git a/drivers/ide/delkin_cb.c b/drivers/ide/delkin_cb.c index a0de834a81c..d4a76f22ed1 100644 --- a/drivers/ide/delkin_cb.c +++ b/drivers/ide/delkin_cb.c @@ -77,7 +77,7 @@ delkin_cb_probe (struct pci_dev *dev, const struct pci_device_id *id) struct ide_host *host; unsigned long base; int rc; - hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL }; + hw_regs_t hw, *hws[] = { &hw }; rc = pci_enable_device(dev); if (rc) { @@ -99,7 +99,7 @@ delkin_cb_probe (struct pci_dev *dev, const struct pci_device_id *id) hw.irq = dev->irq; hw.dev = &dev->dev; - rc = ide_host_add(&delkin_cb_port_info, hws, &host); + rc = ide_host_add(&delkin_cb_port_info, hws, 1, &host); if (rc) goto out_disable; diff --git a/drivers/ide/falconide.c b/drivers/ide/falconide.c index 770cfa67bdc..adb5b0cf762 100644 --- a/drivers/ide/falconide.c +++ b/drivers/ide/falconide.c @@ -138,7 +138,7 @@ static void __init falconide_setup_ports(hw_regs_t *hw) static int __init falconide_init(void) { struct ide_host *host; - hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL }; + hw_regs_t hw, *hws[] = { &hw }; int rc; if (!MACH_IS_ATARI || !ATARIHW_PRESENT(IDE)) @@ -153,7 +153,7 @@ static int __init falconide_init(void) falconide_setup_ports(&hw); - host = ide_host_alloc(&falconide_port_info, hws); + host = ide_host_alloc(&falconide_port_info, hws, 1); if (host == NULL) { rc = -ENOMEM; goto err; diff --git a/drivers/ide/gayle.c b/drivers/ide/gayle.c index 71db2f9c336..253ff34afd8 100644 --- a/drivers/ide/gayle.c +++ b/drivers/ide/gayle.c @@ -125,7 +125,7 @@ static int __init gayle_init(void) unsigned long base, ctrlport, irqport; ide_ack_intr_t *ack_intr; int a4000, i, rc; - hw_regs_t hw[GAYLE_NUM_HWIFS], *hws[] = { NULL, NULL, NULL, NULL }; + hw_regs_t hw[GAYLE_NUM_HWIFS], *hws[GAYLE_NUM_HWIFS]; if (!MACH_IS_AMIGA) return -ENODEV; @@ -170,7 +170,7 @@ found: hws[i] = &hw[i]; } - rc = ide_host_add(&gayle_port_info, hws, NULL); + rc = ide_host_add(&gayle_port_info, hws, i, NULL); if (rc) release_mem_region(res_start, res_n); diff --git a/drivers/ide/icside.c b/drivers/ide/icside.c index 6352a44ed17..6223b80beb3 100644 --- a/drivers/ide/icside.c +++ b/drivers/ide/icside.c @@ -410,7 +410,7 @@ icside_register_v5(struct icside_state *state, struct expansion_card *ec) { void __iomem *base; struct ide_host *host; - hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL }; + hw_regs_t hw, *hws[] = { &hw }; int ret; base = ecardm_iomap(ec, ECARD_RES_MEMC, 0, 0); @@ -431,7 +431,7 @@ icside_register_v5(struct icside_state *state, struct expansion_card *ec) icside_setup_ports(&hw, base, &icside_cardinfo_v5, ec); - host = ide_host_alloc(&icside_v5_port_info, hws); + host = ide_host_alloc(&icside_v5_port_info, hws, 1); if (host == NULL) return -ENODEV; @@ -467,7 +467,7 @@ icside_register_v6(struct icside_state *state, struct expansion_card *ec) struct ide_host *host; unsigned int sel = 0; int ret; - hw_regs_t hw[2], *hws[] = { &hw[0], &hw[1], NULL, NULL }; + hw_regs_t hw[2], *hws[] = { &hw[0], &hw[1] }; struct ide_port_info d = icside_v6_port_info; ioc_base = ecardm_iomap(ec, ECARD_RES_IOCFAST, 0, 0); @@ -507,7 +507,7 @@ icside_register_v6(struct icside_state *state, struct expansion_card *ec) icside_setup_ports(&hw[0], easi_base, &icside_cardinfo_v6_1, ec); icside_setup_ports(&hw[1], easi_base, &icside_cardinfo_v6_2, ec); - host = ide_host_alloc(&d, hws); + host = ide_host_alloc(&d, hws, 2); if (host == NULL) return -ENODEV; diff --git a/drivers/ide/ide-4drives.c b/drivers/ide/ide-4drives.c index 617ca7a5ec8..189b8bd9957 100644 --- a/drivers/ide/ide-4drives.c +++ b/drivers/ide/ide-4drives.c @@ -31,7 +31,7 @@ static const struct ide_port_info ide_4drives_port_info = { static int __init ide_4drives_init(void) { unsigned long base = 0x1f0, ctl = 0x3f6; - hw_regs_t hw, *hws[] = { &hw, &hw, NULL, NULL }; + hw_regs_t hw, *hws[] = { &hw, &hw }; if (probe_4drives == 0) return -ENODEV; @@ -54,7 +54,7 @@ static int __init ide_4drives_init(void) ide_std_init_ports(&hw, base, ctl); hw.irq = 14; - return ide_host_add(&ide_4drives_port_info, hws, NULL); + return ide_host_add(&ide_4drives_port_info, hws, 2, NULL); } module_init(ide_4drives_init); diff --git a/drivers/ide/ide-cs.c b/drivers/ide/ide-cs.c index 43d09dcae28..63309ad04cb 100644 --- a/drivers/ide/ide-cs.c +++ b/drivers/ide/ide-cs.c @@ -164,7 +164,7 @@ static struct ide_host *idecs_register(unsigned long io, unsigned long ctl, struct ide_host *host; ide_hwif_t *hwif; int i, rc; - hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL }; + hw_regs_t hw, *hws[] = { &hw }; if (!request_region(io, 8, DRV_NAME)) { printk(KERN_ERR "%s: I/O resource 0x%lX-0x%lX not free.\n", @@ -184,7 +184,7 @@ static struct ide_host *idecs_register(unsigned long io, unsigned long ctl, hw.irq = irq; hw.dev = &handle->dev; - rc = ide_host_add(&idecs_port_info, hws, &host); + rc = ide_host_add(&idecs_port_info, hws, 1, &host); if (rc) goto out_release; diff --git a/drivers/ide/ide-generic.c b/drivers/ide/ide-generic.c index 0427759d018..0d40848540d 100644 --- a/drivers/ide/ide-generic.c +++ b/drivers/ide/ide-generic.c @@ -86,7 +86,7 @@ static void ide_generic_check_pci_legacy_iobases(int *primary, int *secondary) static int __init ide_generic_init(void) { - hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL }; + hw_regs_t hw, *hws[] = { &hw }; unsigned long io_addr; int i, rc = 0, primary = 0, secondary = 0; @@ -133,7 +133,7 @@ static int __init ide_generic_init(void) #else hw.irq = legacy_irqs[i]; #endif - rc = ide_host_add(&ide_generic_port_info, hws, NULL); + rc = ide_host_add(&ide_generic_port_info, hws, 1, NULL); if (rc) { release_region(io_addr + 0x206, 1); release_region(io_addr, 8); diff --git a/drivers/ide/ide-h8300.c b/drivers/ide/ide-h8300.c index 40eff6c9759..0b5fabe2806 100644 --- a/drivers/ide/ide-h8300.c +++ b/drivers/ide/ide-h8300.c @@ -83,7 +83,7 @@ static const struct ide_port_info h8300_port_info = { static int __init h8300_ide_init(void) { - hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL }; + hw_regs_t hw, *hws[] = { &hw }; printk(KERN_INFO DRV_NAME ": H8/300 generic IDE interface\n"); @@ -96,7 +96,7 @@ static int __init h8300_ide_init(void) hw_setup(&hw); - return ide_host_add(&h8300_port_info, hws, NULL); + return ide_host_add(&h8300_port_info, hws, 1, NULL); out_busy: printk(KERN_ERR "ide-h8300: IDE I/F resource already used.\n"); diff --git a/drivers/ide/ide-legacy.c b/drivers/ide/ide-legacy.c index 0c5b29c56cb..98389e53990 100644 --- a/drivers/ide/ide-legacy.c +++ b/drivers/ide/ide-legacy.c @@ -40,7 +40,7 @@ static void ide_legacy_init_one(hw_regs_t **hws, hw_regs_t *hw, int ide_legacy_device_add(const struct ide_port_info *d, unsigned long config) { - hw_regs_t hw[2], *hws[] = { NULL, NULL, NULL, NULL }; + hw_regs_t hw[2], *hws[] = { NULL, NULL }; memset(&hw, 0, sizeof(hw)); @@ -52,6 +52,6 @@ int ide_legacy_device_add(const struct ide_port_info *d, unsigned long config) (d->host_flags & IDE_HFLAG_SINGLE)) return -ENOENT; - return ide_host_add(d, hws, NULL); + return ide_host_add(d, hws, 2, NULL); } EXPORT_SYMBOL_GPL(ide_legacy_device_add); diff --git a/drivers/ide/ide-pnp.c b/drivers/ide/ide-pnp.c index 47043fda239..6bca0f05ee9 100644 --- a/drivers/ide/ide-pnp.c +++ b/drivers/ide/ide-pnp.c @@ -37,7 +37,7 @@ static int idepnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id) struct ide_host *host; unsigned long base, ctl; int rc; - hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL }; + hw_regs_t hw, *hws[] = { &hw }; printk(KERN_INFO DRV_NAME ": generic PnP IDE interface\n"); @@ -64,7 +64,7 @@ static int idepnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id) ide_std_init_ports(&hw, base, ctl); hw.irq = pnp_irq(dev, 0); - rc = ide_host_add(&ide_pnp_port_info, hws, &host); + rc = ide_host_add(&ide_pnp_port_info, hws, 1, &host); if (rc) goto out; diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index f17ba1932ad..6c7451a6e60 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -1261,7 +1261,8 @@ out_nomem: return -ENOMEM; } -struct ide_host *ide_host_alloc(const struct ide_port_info *d, hw_regs_t **hws) +struct ide_host *ide_host_alloc(const struct ide_port_info *d, hw_regs_t **hws, + unsigned int n_ports) { struct ide_host *host; struct device *dev = hws[0] ? hws[0]->dev : NULL; @@ -1272,7 +1273,7 @@ struct ide_host *ide_host_alloc(const struct ide_port_info *d, hw_regs_t **hws) if (host == NULL) return NULL; - for (i = 0; i < MAX_HOST_PORTS; i++) { + for (i = 0; i < n_ports; i++) { ide_hwif_t *hwif; int idx; @@ -1443,12 +1444,12 @@ int ide_host_register(struct ide_host *host, const struct ide_port_info *d, EXPORT_SYMBOL_GPL(ide_host_register); int ide_host_add(const struct ide_port_info *d, hw_regs_t **hws, - struct ide_host **hostp) + unsigned int n_ports, struct ide_host **hostp) { struct ide_host *host; int rc; - host = ide_host_alloc(d, hws); + host = ide_host_alloc(d, hws, n_ports); if (host == NULL) return -ENOMEM; diff --git a/drivers/ide/ide_platform.c b/drivers/ide/ide_platform.c index 813653362a2..47413c2b5f8 100644 --- a/drivers/ide/ide_platform.c +++ b/drivers/ide/ide_platform.c @@ -54,7 +54,7 @@ static int __devinit plat_ide_probe(struct platform_device *pdev) struct pata_platform_info *pdata; struct ide_host *host; int ret = 0, mmio = 0; - hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL }; + hw_regs_t hw, *hws[] = { &hw }; struct ide_port_info d = platform_ide_port_info; pdata = pdev->dev.platform_data; @@ -98,7 +98,7 @@ static int __devinit plat_ide_probe(struct platform_device *pdev) if (mmio) d.host_flags |= IDE_HFLAG_MMIO; - ret = ide_host_add(&d, hws, &host); + ret = ide_host_add(&d, hws, 1, &host); if (ret) goto out; diff --git a/drivers/ide/macide.c b/drivers/ide/macide.c index 3af9e96da61..31aa2781860 100644 --- a/drivers/ide/macide.c +++ b/drivers/ide/macide.c @@ -96,7 +96,7 @@ static int __init macide_init(void) ide_ack_intr_t *ack_intr; unsigned long base; int irq; - hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL }; + hw_regs_t hw, *hws[] = { &hw }; if (!MACH_IS_MAC) return -ENODEV; @@ -126,7 +126,7 @@ static int __init macide_init(void) macide_setup_ports(&hw, base, irq, ack_intr); - return ide_host_add(&macide_port_info, hws, NULL); + return ide_host_add(&macide_port_info, hws, 1, NULL); } module_init(macide_init); diff --git a/drivers/ide/palm_bk3710.c b/drivers/ide/palm_bk3710.c index a455c25f43c..4507a6d801b 100644 --- a/drivers/ide/palm_bk3710.c +++ b/drivers/ide/palm_bk3710.c @@ -316,7 +316,7 @@ static int __init palm_bk3710_probe(struct platform_device *pdev) void __iomem *base; unsigned long rate, mem_size; int i, rc; - hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL }; + hw_regs_t hw, *hws[] = { &hw }; clk = clk_get(&pdev->dev, "IDECLK"); if (IS_ERR(clk)) @@ -369,7 +369,7 @@ static int __init palm_bk3710_probe(struct platform_device *pdev) ATA_UDMA5; /* Register the IDE interface with Linux */ - rc = ide_host_add(&palm_bk3710_port_info, hws, NULL); + rc = ide_host_add(&palm_bk3710_port_info, hws, 1, NULL); if (rc) goto out; diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c index f76e4e6b408..f4f806476e0 100644 --- a/drivers/ide/pmac.c +++ b/drivers/ide/pmac.c @@ -1029,7 +1029,7 @@ static int __devinit pmac_ide_setup_device(pmac_ide_hwif_t *pmif, hw_regs_t *hw) const int *bidp; struct ide_host *host; ide_hwif_t *hwif; - hw_regs_t *hws[] = { hw, NULL, NULL, NULL }; + hw_regs_t *hws[] = { hw }; struct ide_port_info d = pmac_port_info; int rc; @@ -1077,7 +1077,7 @@ static int __devinit pmac_ide_setup_device(pmac_ide_hwif_t *pmif, hw_regs_t *hw) /* Make sure we have sane timings */ sanitize_timings(pmif); - host = ide_host_alloc(&d, hws); + host = ide_host_alloc(&d, hws, 1); if (host == NULL) return -ENOMEM; hwif = host->ports[0]; diff --git a/drivers/ide/q40ide.c b/drivers/ide/q40ide.c index 7488d4ff3d7..e46229fe5ea 100644 --- a/drivers/ide/q40ide.c +++ b/drivers/ide/q40ide.c @@ -135,7 +135,7 @@ static const char *q40_ide_names[Q40IDE_NUM_HWIFS]={ static int __init q40ide_init(void) { int i; - hw_regs_t hw[Q40IDE_NUM_HWIFS], *hws[] = { NULL, NULL, NULL, NULL }; + hw_regs_t hw[Q40IDE_NUM_HWIFS], *hws[] = { NULL, NULL }; if (!MACH_IS_Q40) return -ENODEV; @@ -162,7 +162,7 @@ static int __init q40ide_init(void) hws[i] = &hw[i]; } - return ide_host_add(&q40ide_port_info, hws, NULL); + return ide_host_add(&q40ide_port_info, hws, Q40IDE_NUM_HWIFS, NULL); } module_init(q40ide_init); diff --git a/drivers/ide/rapide.c b/drivers/ide/rapide.c index bd4d7a8a666..c4da3dd39f5 100644 --- a/drivers/ide/rapide.c +++ b/drivers/ide/rapide.c @@ -36,7 +36,7 @@ rapide_probe(struct expansion_card *ec, const struct ecard_id *id) void __iomem *base; struct ide_host *host; int ret; - hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL }; + hw_regs_t hw, *hws[] = { &hw }; ret = ecard_request_resources(ec); if (ret) @@ -52,7 +52,7 @@ rapide_probe(struct expansion_card *ec, const struct ecard_id *id) rapide_setup_ports(&hw, base, base + 0x818, 1 << 6, ec->irq); hw.dev = &ec->dev; - ret = ide_host_add(&rapide_port_info, hws, &host); + ret = ide_host_add(&rapide_port_info, hws, 1, &host); if (ret) goto release; diff --git a/drivers/ide/scc_pata.c b/drivers/ide/scc_pata.c index 9e3aef31733..9415f8c8a41 100644 --- a/drivers/ide/scc_pata.c +++ b/drivers/ide/scc_pata.c @@ -559,7 +559,7 @@ static int scc_ide_setup_pci_device(struct pci_dev *dev, { struct scc_ports *ports = pci_get_drvdata(dev); struct ide_host *host; - hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL }; + hw_regs_t hw, *hws[] = { &hw }; int i, rc; memset(&hw, 0, sizeof(hw)); @@ -568,7 +568,7 @@ static int scc_ide_setup_pci_device(struct pci_dev *dev, hw.irq = dev->irq; hw.dev = &dev->dev; - rc = ide_host_add(d, hws, &host); + rc = ide_host_add(d, hws, 1, &host); if (rc) return rc; diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c index 82519ddc910..d78f4c99451 100644 --- a/drivers/ide/setup-pci.c +++ b/drivers/ide/setup-pci.c @@ -538,7 +538,7 @@ int ide_pci_init_one(struct pci_dev *dev, const struct ide_port_info *d, void *priv) { struct ide_host *host; - hw_regs_t hw[4], *hws[] = { NULL, NULL, NULL, NULL }; + hw_regs_t hw[2], *hws[] = { NULL, NULL }; int ret; ret = ide_setup_pci_controller(dev, d, 1); @@ -547,7 +547,7 @@ int ide_pci_init_one(struct pci_dev *dev, const struct ide_port_info *d, ide_pci_setup_ports(dev, d, &hw[0], &hws[0]); - host = ide_host_alloc(d, hws); + host = ide_host_alloc(d, hws, 2); if (host == NULL) { ret = -ENOMEM; goto out; @@ -596,7 +596,7 @@ int ide_pci_init_two(struct pci_dev *dev1, struct pci_dev *dev2, ide_pci_setup_ports(pdev[i], d, &hw[i*2], &hws[i*2]); } - host = ide_host_alloc(d, hws); + host = ide_host_alloc(d, hws, 4); if (host == NULL) { ret = -ENOMEM; goto out; diff --git a/drivers/ide/sgiioc4.c b/drivers/ide/sgiioc4.c index 676d41c7add..3f8ee357ffb 100644 --- a/drivers/ide/sgiioc4.c +++ b/drivers/ide/sgiioc4.c @@ -546,7 +546,7 @@ sgiioc4_ide_setup_pci_device(struct pci_dev *dev) unsigned long cmd_base, irqport; unsigned long bar0, cmd_phys_base, ctl; void __iomem *virt_base; - hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL }; + hw_regs_t hw, *hws[] = { &hw }; int rc; /* Get the CmdBlk and CtrlBlk Base Registers */ @@ -580,7 +580,7 @@ sgiioc4_ide_setup_pci_device(struct pci_dev *dev) /* Initializing chipset IRQ Registers */ writel(0x03, (void __iomem *)(irqport + IOC4_INTR_SET * 4)); - rc = ide_host_add(&sgiioc4_port_info, hws, NULL); + rc = ide_host_add(&sgiioc4_port_info, hws, 1, NULL); if (!rc) return 0; diff --git a/drivers/ide/tx4938ide.c b/drivers/ide/tx4938ide.c index e33d764e294..16adc18499f 100644 --- a/drivers/ide/tx4938ide.c +++ b/drivers/ide/tx4938ide.c @@ -130,8 +130,7 @@ static const struct ide_port_info tx4938ide_port_info __initdata = { static int __init tx4938ide_probe(struct platform_device *pdev) { - hw_regs_t hw; - hw_regs_t *hws[] = { &hw, NULL, NULL, NULL }; + hw_regs_t hw, *hws[] = { &hw }; struct ide_host *host; struct resource *res; struct tx4938ide_platform_info *pdata = pdev->dev.platform_data; @@ -183,7 +182,7 @@ static int __init tx4938ide_probe(struct platform_device *pdev) tx4938ide_tune_ebusc(pdata->ebus_ch, pdata->gbus_clock, 0); else d.port_ops = NULL; - ret = ide_host_add(&d, hws, &host); + ret = ide_host_add(&d, hws, 1, &host); if (!ret) platform_set_drvdata(pdev, host); return ret; diff --git a/drivers/ide/tx4939ide.c b/drivers/ide/tx4939ide.c index 564422d2397..fa57920d003 100644 --- a/drivers/ide/tx4939ide.c +++ b/drivers/ide/tx4939ide.c @@ -537,8 +537,7 @@ static const struct ide_port_info tx4939ide_port_info __initdata = { static int __init tx4939ide_probe(struct platform_device *pdev) { - hw_regs_t hw; - hw_regs_t *hws[] = { &hw, NULL, NULL, NULL }; + hw_regs_t hw, *hws[] = { &hw }; struct ide_host *host; struct resource *res; int irq, ret; @@ -581,7 +580,7 @@ static int __init tx4939ide_probe(struct platform_device *pdev) hw.dev = &pdev->dev; pr_info("TX4939 IDE interface (base %#lx, irq %d)\n", mapbase, irq); - host = ide_host_alloc(&tx4939ide_port_info, hws); + host = ide_host_alloc(&tx4939ide_port_info, hws, 1); if (!host) return -ENOMEM; /* use extra_base for base address of the all registers */ diff --git a/include/linux/ide.h b/include/linux/ide.h index 9652edbd26a..a3cd568553d 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1456,11 +1456,12 @@ void ide_undecoded_slave(ide_drive_t *); void ide_port_apply_params(ide_hwif_t *); int ide_sysfs_register_port(ide_hwif_t *); -struct ide_host *ide_host_alloc(const struct ide_port_info *, hw_regs_t **); +struct ide_host *ide_host_alloc(const struct ide_port_info *, hw_regs_t **, + unsigned int); void ide_host_free(struct ide_host *); int ide_host_register(struct ide_host *, const struct ide_port_info *, hw_regs_t **); -int ide_host_add(const struct ide_port_info *, hw_regs_t **, +int ide_host_add(const struct ide_port_info *, hw_regs_t **, unsigned int, struct ide_host **); void ide_host_remove(struct ide_host *); int ide_legacy_device_add(const struct ide_port_info *, unsigned long); -- cgit v1.2.3-70-g09d2 From 9f36d31437922354d104a2db407f397e79e4027e Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Sun, 17 May 2009 19:12:25 +0200 Subject: ide: remove hw_regs_t typedef Remove hw_regs_t typedef and rename struct hw_regs_s to struct ide_hw. There should be no functional changes caused by this patch. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/at91_ide.c | 2 +- drivers/ide/au1xxx-ide.c | 4 ++-- drivers/ide/buddha.c | 4 ++-- drivers/ide/cmd640.c | 2 +- drivers/ide/cs5520.c | 2 +- drivers/ide/delkin_cb.c | 2 +- drivers/ide/falconide.c | 4 ++-- drivers/ide/gayle.c | 4 ++-- drivers/ide/icside.c | 6 +++--- drivers/ide/ide-4drives.c | 2 +- drivers/ide/ide-cs.c | 2 +- drivers/ide/ide-generic.c | 2 +- drivers/ide/ide-h8300.c | 6 +++--- drivers/ide/ide-legacy.c | 4 ++-- drivers/ide/ide-pnp.c | 2 +- drivers/ide/ide-probe.c | 10 +++++----- drivers/ide/ide_platform.c | 4 ++-- drivers/ide/macide.c | 4 ++-- drivers/ide/palm_bk3710.c | 2 +- drivers/ide/pmac.c | 11 ++++++----- drivers/ide/q40ide.c | 6 +++--- drivers/ide/rapide.c | 4 ++-- drivers/ide/scc_pata.c | 2 +- drivers/ide/setup-pci.c | 16 ++++++++-------- drivers/ide/sgiioc4.c | 4 ++-- drivers/ide/tx4938ide.c | 2 +- drivers/ide/tx4939ide.c | 2 +- include/linux/ide.h | 14 +++++++------- 28 files changed, 65 insertions(+), 64 deletions(-) (limited to 'include') diff --git a/drivers/ide/at91_ide.c b/drivers/ide/at91_ide.c index 11fe1ffdff7..fc0949a8cfd 100644 --- a/drivers/ide/at91_ide.c +++ b/drivers/ide/at91_ide.c @@ -247,7 +247,7 @@ irqreturn_t at91_irq_handler(int irq, void *dev_id) static int __init at91_ide_probe(struct platform_device *pdev) { int ret; - hw_regs_t hw, *hws[] = { &hw }; + struct ide_hw hw, *hws[] = { &hw }; struct ide_host *host; struct resource *res; unsigned long tf_base = 0, ctl_base = 0; diff --git a/drivers/ide/au1xxx-ide.c b/drivers/ide/au1xxx-ide.c index 32f5be68601..58121bd6c11 100644 --- a/drivers/ide/au1xxx-ide.c +++ b/drivers/ide/au1xxx-ide.c @@ -449,7 +449,7 @@ static int auide_ddma_init(ide_hwif_t *hwif, const struct ide_port_info *d) } #endif -static void auide_setup_ports(hw_regs_t *hw, _auide_hwif *ahwif) +static void auide_setup_ports(struct ide_hw *hw, _auide_hwif *ahwif) { int i; unsigned long *ata_regs = hw->io_ports_array; @@ -508,7 +508,7 @@ static int au_ide_probe(struct platform_device *dev) struct resource *res; struct ide_host *host; int ret = 0; - hw_regs_t hw, *hws[] = { &hw }; + struct ide_hw hw, *hws[] = { &hw }; #if defined(CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA) char *mode = "MWDMA2"; diff --git a/drivers/ide/buddha.c b/drivers/ide/buddha.c index 0450652cdab..e3c6a591330 100644 --- a/drivers/ide/buddha.c +++ b/drivers/ide/buddha.c @@ -121,7 +121,7 @@ static int xsurf_ack_intr(ide_hwif_t *hwif) return 1; } -static void __init buddha_setup_ports(hw_regs_t *hw, unsigned long base, +static void __init buddha_setup_ports(struct ide_hw *hw, unsigned long base, unsigned long ctl, unsigned long irq_port, ide_ack_intr_t *ack_intr) { @@ -160,7 +160,7 @@ static int __init buddha_init(void) while ((z = zorro_find_device(ZORRO_WILDCARD, z))) { unsigned long board; - hw_regs_t hw[MAX_NUM_HWIFS], *hws[MAX_NUM_HWIFS]; + struct ide_hw hw[MAX_NUM_HWIFS], *hws[MAX_NUM_HWIFS]; if (z->id == ZORRO_PROD_INDIVIDUAL_COMPUTERS_BUDDHA) { buddha_num_hwifs = BUDDHA_NUM_HWIFS; diff --git a/drivers/ide/cmd640.c b/drivers/ide/cmd640.c index edb3a7a35c8..1683ed5c732 100644 --- a/drivers/ide/cmd640.c +++ b/drivers/ide/cmd640.c @@ -708,7 +708,7 @@ static int __init cmd640x_init(void) int second_port_cmd640 = 0, rc; const char *bus_type, *port2; u8 b, cfr; - hw_regs_t hw[2], *hws[2]; + struct ide_hw hw[2], *hws[2]; if (cmd640_vlb && probe_for_cmd640_vlb()) { bus_type = "VLB"; diff --git a/drivers/ide/cs5520.c b/drivers/ide/cs5520.c index a9023d7843f..bd066bb9d61 100644 --- a/drivers/ide/cs5520.c +++ b/drivers/ide/cs5520.c @@ -110,7 +110,7 @@ static const struct ide_port_info cyrix_chipset __devinitdata = { static int __devinit cs5520_init_one(struct pci_dev *dev, const struct pci_device_id *id) { const struct ide_port_info *d = &cyrix_chipset; - hw_regs_t hw[2], *hws[] = { NULL, NULL }; + struct ide_hw hw[2], *hws[] = { NULL, NULL }; ide_setup_pci_noise(dev, d); diff --git a/drivers/ide/delkin_cb.c b/drivers/ide/delkin_cb.c index d4a76f22ed1..1e10eba62ce 100644 --- a/drivers/ide/delkin_cb.c +++ b/drivers/ide/delkin_cb.c @@ -77,7 +77,7 @@ delkin_cb_probe (struct pci_dev *dev, const struct pci_device_id *id) struct ide_host *host; unsigned long base; int rc; - hw_regs_t hw, *hws[] = { &hw }; + struct ide_hw hw, *hws[] = { &hw }; rc = pci_enable_device(dev); if (rc) { diff --git a/drivers/ide/falconide.c b/drivers/ide/falconide.c index adb5b0cf762..22fa27389c3 100644 --- a/drivers/ide/falconide.c +++ b/drivers/ide/falconide.c @@ -114,7 +114,7 @@ static const struct ide_port_info falconide_port_info = { .chipset = ide_generic, }; -static void __init falconide_setup_ports(hw_regs_t *hw) +static void __init falconide_setup_ports(struct ide_hw *hw) { int i; @@ -138,7 +138,7 @@ static void __init falconide_setup_ports(hw_regs_t *hw) static int __init falconide_init(void) { struct ide_host *host; - hw_regs_t hw, *hws[] = { &hw }; + struct ide_hw hw, *hws[] = { &hw }; int rc; if (!MACH_IS_ATARI || !ATARIHW_PRESENT(IDE)) diff --git a/drivers/ide/gayle.c b/drivers/ide/gayle.c index 253ff34afd8..4451a6a5dfe 100644 --- a/drivers/ide/gayle.c +++ b/drivers/ide/gayle.c @@ -88,7 +88,7 @@ static int gayle_ack_intr_a1200(ide_hwif_t *hwif) return 1; } -static void __init gayle_setup_ports(hw_regs_t *hw, unsigned long base, +static void __init gayle_setup_ports(struct ide_hw *hw, unsigned long base, unsigned long ctl, unsigned long irq_port, ide_ack_intr_t *ack_intr) { @@ -125,7 +125,7 @@ static int __init gayle_init(void) unsigned long base, ctrlport, irqport; ide_ack_intr_t *ack_intr; int a4000, i, rc; - hw_regs_t hw[GAYLE_NUM_HWIFS], *hws[GAYLE_NUM_HWIFS]; + struct ide_hw hw[GAYLE_NUM_HWIFS], *hws[GAYLE_NUM_HWIFS]; if (!MACH_IS_AMIGA) return -ENODEV; diff --git a/drivers/ide/icside.c b/drivers/ide/icside.c index 6223b80beb3..c5269fa1f73 100644 --- a/drivers/ide/icside.c +++ b/drivers/ide/icside.c @@ -381,7 +381,7 @@ static int icside_dma_off_init(ide_hwif_t *hwif, const struct ide_port_info *d) return -EOPNOTSUPP; } -static void icside_setup_ports(hw_regs_t *hw, void __iomem *base, +static void icside_setup_ports(struct ide_hw *hw, void __iomem *base, struct cardinfo *info, struct expansion_card *ec) { unsigned long port = (unsigned long)base + info->dataoffset; @@ -410,7 +410,7 @@ icside_register_v5(struct icside_state *state, struct expansion_card *ec) { void __iomem *base; struct ide_host *host; - hw_regs_t hw, *hws[] = { &hw }; + struct ide_hw hw, *hws[] = { &hw }; int ret; base = ecardm_iomap(ec, ECARD_RES_MEMC, 0, 0); @@ -467,7 +467,7 @@ icside_register_v6(struct icside_state *state, struct expansion_card *ec) struct ide_host *host; unsigned int sel = 0; int ret; - hw_regs_t hw[2], *hws[] = { &hw[0], &hw[1] }; + struct ide_hw hw[2], *hws[] = { &hw[0], &hw[1] }; struct ide_port_info d = icside_v6_port_info; ioc_base = ecardm_iomap(ec, ECARD_RES_IOCFAST, 0, 0); diff --git a/drivers/ide/ide-4drives.c b/drivers/ide/ide-4drives.c index 189b8bd9957..979d342c338 100644 --- a/drivers/ide/ide-4drives.c +++ b/drivers/ide/ide-4drives.c @@ -31,7 +31,7 @@ static const struct ide_port_info ide_4drives_port_info = { static int __init ide_4drives_init(void) { unsigned long base = 0x1f0, ctl = 0x3f6; - hw_regs_t hw, *hws[] = { &hw, &hw }; + struct ide_hw hw, *hws[] = { &hw, &hw }; if (probe_4drives == 0) return -ENODEV; diff --git a/drivers/ide/ide-cs.c b/drivers/ide/ide-cs.c index 63309ad04cb..527908ff298 100644 --- a/drivers/ide/ide-cs.c +++ b/drivers/ide/ide-cs.c @@ -164,7 +164,7 @@ static struct ide_host *idecs_register(unsigned long io, unsigned long ctl, struct ide_host *host; ide_hwif_t *hwif; int i, rc; - hw_regs_t hw, *hws[] = { &hw }; + struct ide_hw hw, *hws[] = { &hw }; if (!request_region(io, 8, DRV_NAME)) { printk(KERN_ERR "%s: I/O resource 0x%lX-0x%lX not free.\n", diff --git a/drivers/ide/ide-generic.c b/drivers/ide/ide-generic.c index 0d40848540d..54d7c4685d2 100644 --- a/drivers/ide/ide-generic.c +++ b/drivers/ide/ide-generic.c @@ -86,7 +86,7 @@ static void ide_generic_check_pci_legacy_iobases(int *primary, int *secondary) static int __init ide_generic_init(void) { - hw_regs_t hw, *hws[] = { &hw }; + struct ide_hw hw, *hws[] = { &hw }; unsigned long io_addr; int i, rc = 0, primary = 0, secondary = 0; diff --git a/drivers/ide/ide-h8300.c b/drivers/ide/ide-h8300.c index 0b5fabe2806..520f42c5445 100644 --- a/drivers/ide/ide-h8300.c +++ b/drivers/ide/ide-h8300.c @@ -64,11 +64,11 @@ static const struct ide_tp_ops h8300_tp_ops = { #define H8300_IDE_GAP (2) -static inline void hw_setup(hw_regs_t *hw) +static inline void hw_setup(struct ide_hw *hw) { int i; - memset(hw, 0, sizeof(hw_regs_t)); + memset(hw, 0, sizeof(*hw)); for (i = 0; i <= 7; i++) hw->io_ports_array[i] = CONFIG_H8300_IDE_BASE + H8300_IDE_GAP*i; hw->io_ports.ctl_addr = CONFIG_H8300_IDE_ALT; @@ -83,7 +83,7 @@ static const struct ide_port_info h8300_port_info = { static int __init h8300_ide_init(void) { - hw_regs_t hw, *hws[] = { &hw }; + struct ide_hw hw, *hws[] = { &hw }; printk(KERN_INFO DRV_NAME ": H8/300 generic IDE interface\n"); diff --git a/drivers/ide/ide-legacy.c b/drivers/ide/ide-legacy.c index 98389e53990..b9654a7bb7b 100644 --- a/drivers/ide/ide-legacy.c +++ b/drivers/ide/ide-legacy.c @@ -1,7 +1,7 @@ #include #include -static void ide_legacy_init_one(hw_regs_t **hws, hw_regs_t *hw, +static void ide_legacy_init_one(struct ide_hw **hws, struct ide_hw *hw, u8 port_no, const struct ide_port_info *d, unsigned long config) { @@ -40,7 +40,7 @@ static void ide_legacy_init_one(hw_regs_t **hws, hw_regs_t *hw, int ide_legacy_device_add(const struct ide_port_info *d, unsigned long config) { - hw_regs_t hw[2], *hws[] = { NULL, NULL }; + struct ide_hw hw[2], *hws[] = { NULL, NULL }; memset(&hw, 0, sizeof(hw)); diff --git a/drivers/ide/ide-pnp.c b/drivers/ide/ide-pnp.c index 6bca0f05ee9..017b1df3b80 100644 --- a/drivers/ide/ide-pnp.c +++ b/drivers/ide/ide-pnp.c @@ -37,7 +37,7 @@ static int idepnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id) struct ide_host *host; unsigned long base, ctl; int rc; - hw_regs_t hw, *hws[] = { &hw }; + struct ide_hw hw, *hws[] = { &hw }; printk(KERN_INFO DRV_NAME ": generic PnP IDE interface\n"); diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 6c7451a6e60..29363829a3f 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -1173,7 +1173,7 @@ static void ide_init_port_data(ide_hwif_t *hwif, unsigned int index) ide_port_init_devices_data(hwif); } -static void ide_init_port_hw(ide_hwif_t *hwif, hw_regs_t *hw) +static void ide_init_port_hw(ide_hwif_t *hwif, struct ide_hw *hw) { memcpy(&hwif->io_ports, &hw->io_ports, sizeof(hwif->io_ports)); hwif->irq = hw->irq; @@ -1261,8 +1261,8 @@ out_nomem: return -ENOMEM; } -struct ide_host *ide_host_alloc(const struct ide_port_info *d, hw_regs_t **hws, - unsigned int n_ports) +struct ide_host *ide_host_alloc(const struct ide_port_info *d, + struct ide_hw **hws, unsigned int n_ports) { struct ide_host *host; struct device *dev = hws[0] ? hws[0]->dev : NULL; @@ -1349,7 +1349,7 @@ static void ide_disable_port(ide_hwif_t *hwif) } int ide_host_register(struct ide_host *host, const struct ide_port_info *d, - hw_regs_t **hws) + struct ide_hw **hws) { ide_hwif_t *hwif, *mate = NULL; int i, j = 0; @@ -1443,7 +1443,7 @@ int ide_host_register(struct ide_host *host, const struct ide_port_info *d, } EXPORT_SYMBOL_GPL(ide_host_register); -int ide_host_add(const struct ide_port_info *d, hw_regs_t **hws, +int ide_host_add(const struct ide_port_info *d, struct ide_hw **hws, unsigned int n_ports, struct ide_host **hostp) { struct ide_host *host; diff --git a/drivers/ide/ide_platform.c b/drivers/ide/ide_platform.c index 47413c2b5f8..ee9b55ecc62 100644 --- a/drivers/ide/ide_platform.c +++ b/drivers/ide/ide_platform.c @@ -21,7 +21,7 @@ #include #include -static void __devinit plat_ide_setup_ports(hw_regs_t *hw, +static void __devinit plat_ide_setup_ports(struct ide_hw *hw, void __iomem *base, void __iomem *ctrl, struct pata_platform_info *pdata, @@ -54,7 +54,7 @@ static int __devinit plat_ide_probe(struct platform_device *pdev) struct pata_platform_info *pdata; struct ide_host *host; int ret = 0, mmio = 0; - hw_regs_t hw, *hws[] = { &hw }; + struct ide_hw hw, *hws[] = { &hw }; struct ide_port_info d = platform_ide_port_info; pdata = pdev->dev.platform_data; diff --git a/drivers/ide/macide.c b/drivers/ide/macide.c index 31aa2781860..1447c8c9056 100644 --- a/drivers/ide/macide.c +++ b/drivers/ide/macide.c @@ -62,7 +62,7 @@ int macide_ack_intr(ide_hwif_t* hwif) return 0; } -static void __init macide_setup_ports(hw_regs_t *hw, unsigned long base, +static void __init macide_setup_ports(struct ide_hw *hw, unsigned long base, int irq, ide_ack_intr_t *ack_intr) { int i; @@ -96,7 +96,7 @@ static int __init macide_init(void) ide_ack_intr_t *ack_intr; unsigned long base; int irq; - hw_regs_t hw, *hws[] = { &hw }; + struct ide_hw hw, *hws[] = { &hw }; if (!MACH_IS_MAC) return -ENODEV; diff --git a/drivers/ide/palm_bk3710.c b/drivers/ide/palm_bk3710.c index 4507a6d801b..3c1dc015215 100644 --- a/drivers/ide/palm_bk3710.c +++ b/drivers/ide/palm_bk3710.c @@ -316,7 +316,7 @@ static int __init palm_bk3710_probe(struct platform_device *pdev) void __iomem *base; unsigned long rate, mem_size; int i, rc; - hw_regs_t hw, *hws[] = { &hw }; + struct ide_hw hw, *hws[] = { &hw }; clk = clk_get(&pdev->dev, "IDECLK"); if (IS_ERR(clk)) diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c index f4f806476e0..97642a7a79c 100644 --- a/drivers/ide/pmac.c +++ b/drivers/ide/pmac.c @@ -1023,13 +1023,14 @@ static const struct ide_port_info pmac_port_info = { * Setup, register & probe an IDE channel driven by this driver, this is * called by one of the 2 probe functions (macio or PCI). */ -static int __devinit pmac_ide_setup_device(pmac_ide_hwif_t *pmif, hw_regs_t *hw) +static int __devinit pmac_ide_setup_device(pmac_ide_hwif_t *pmif, + struct ide_hw *hw) { struct device_node *np = pmif->node; const int *bidp; struct ide_host *host; ide_hwif_t *hwif; - hw_regs_t *hws[] = { hw }; + struct ide_hw *hws[] = { hw }; struct ide_port_info d = pmac_port_info; int rc; @@ -1124,7 +1125,7 @@ static int __devinit pmac_ide_setup_device(pmac_ide_hwif_t *pmif, hw_regs_t *hw) return 0; } -static void __devinit pmac_ide_init_ports(hw_regs_t *hw, unsigned long base) +static void __devinit pmac_ide_init_ports(struct ide_hw *hw, unsigned long base) { int i; @@ -1144,7 +1145,7 @@ pmac_ide_macio_attach(struct macio_dev *mdev, const struct of_device_id *match) unsigned long regbase; pmac_ide_hwif_t *pmif; int irq, rc; - hw_regs_t hw; + struct ide_hw hw; pmif = kzalloc(sizeof(*pmif), GFP_KERNEL); if (pmif == NULL) @@ -1268,7 +1269,7 @@ pmac_ide_pci_attach(struct pci_dev *pdev, const struct pci_device_id *id) void __iomem *base; unsigned long rbase, rlen; int rc; - hw_regs_t hw; + struct ide_hw hw; np = pci_device_to_OF_node(pdev); if (np == NULL) { diff --git a/drivers/ide/q40ide.c b/drivers/ide/q40ide.c index e46229fe5ea..ab49a97023d 100644 --- a/drivers/ide/q40ide.c +++ b/drivers/ide/q40ide.c @@ -51,11 +51,11 @@ static int q40ide_default_irq(unsigned long base) /* * Addresses are pretranslated for Q40 ISA access. */ -static void q40_ide_setup_ports(hw_regs_t *hw, unsigned long base, +static void q40_ide_setup_ports(struct ide_hw *hw, unsigned long base, ide_ack_intr_t *ack_intr, int irq) { - memset(hw, 0, sizeof(hw_regs_t)); + memset(hw, 0, sizeof(*hw)); /* BIG FAT WARNING: assumption: only DATA port is ever used in 16 bit mode */ hw->io_ports.data_addr = Q40_ISA_IO_W(base); @@ -135,7 +135,7 @@ static const char *q40_ide_names[Q40IDE_NUM_HWIFS]={ static int __init q40ide_init(void) { int i; - hw_regs_t hw[Q40IDE_NUM_HWIFS], *hws[] = { NULL, NULL }; + struct ide_hw hw[Q40IDE_NUM_HWIFS], *hws[] = { NULL, NULL }; if (!MACH_IS_Q40) return -ENODEV; diff --git a/drivers/ide/rapide.c b/drivers/ide/rapide.c index c4da3dd39f5..00f54248f41 100644 --- a/drivers/ide/rapide.c +++ b/drivers/ide/rapide.c @@ -16,7 +16,7 @@ static const struct ide_port_info rapide_port_info = { .chipset = ide_generic, }; -static void rapide_setup_ports(hw_regs_t *hw, void __iomem *base, +static void rapide_setup_ports(struct ide_hw *hw, void __iomem *base, void __iomem *ctrl, unsigned int sz, int irq) { unsigned long port = (unsigned long)base; @@ -36,7 +36,7 @@ rapide_probe(struct expansion_card *ec, const struct ecard_id *id) void __iomem *base; struct ide_host *host; int ret; - hw_regs_t hw, *hws[] = { &hw }; + struct ide_hw hw, *hws[] = { &hw }; ret = ecard_request_resources(ec); if (ret) diff --git a/drivers/ide/scc_pata.c b/drivers/ide/scc_pata.c index 9415f8c8a41..1104bb301eb 100644 --- a/drivers/ide/scc_pata.c +++ b/drivers/ide/scc_pata.c @@ -559,7 +559,7 @@ static int scc_ide_setup_pci_device(struct pci_dev *dev, { struct scc_ports *ports = pci_get_drvdata(dev); struct ide_host *host; - hw_regs_t hw, *hws[] = { &hw }; + struct ide_hw hw, *hws[] = { &hw }; int i, rc; memset(&hw, 0, sizeof(hw)); diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c index d78f4c99451..5314edffc30 100644 --- a/drivers/ide/setup-pci.c +++ b/drivers/ide/setup-pci.c @@ -301,11 +301,11 @@ static int ide_pci_check_iomem(struct pci_dev *dev, const struct ide_port_info * } /** - * ide_hw_configure - configure a hw_regs_t instance + * ide_hw_configure - configure a struct ide_hw instance * @dev: PCI device holding interface * @d: IDE port info * @port: port number - * @hw: hw_regs_t instance corresponding to this port + * @hw: struct ide_hw instance corresponding to this port * * Perform the initial set up for the hardware interface structure. This * is done per interface port rather than per PCI device. There may be @@ -315,7 +315,7 @@ static int ide_pci_check_iomem(struct pci_dev *dev, const struct ide_port_info * */ static int ide_hw_configure(struct pci_dev *dev, const struct ide_port_info *d, - unsigned int port, hw_regs_t *hw) + unsigned int port, struct ide_hw *hw) { unsigned long ctl = 0, base = 0; @@ -445,8 +445,8 @@ out: * ide_pci_setup_ports - configure ports/devices on PCI IDE * @dev: PCI device * @d: IDE port info - * @hw: hw_regs_t instances corresponding to this PCI IDE device - * @hws: hw_regs_t pointers table to update + * @hw: struct ide_hw instances corresponding to this PCI IDE device + * @hws: struct ide_hw pointers table to update * * Scan the interfaces attached to this device and do any * necessary per port setup. Attach the devices and ask the @@ -458,7 +458,7 @@ out: */ void ide_pci_setup_ports(struct pci_dev *dev, const struct ide_port_info *d, - hw_regs_t *hw, hw_regs_t **hws) + struct ide_hw *hw, struct ide_hw **hws) { int channels = (d->host_flags & IDE_HFLAG_SINGLE) ? 1 : 2, port; u8 tmp; @@ -538,7 +538,7 @@ int ide_pci_init_one(struct pci_dev *dev, const struct ide_port_info *d, void *priv) { struct ide_host *host; - hw_regs_t hw[2], *hws[] = { NULL, NULL }; + struct ide_hw hw[2], *hws[] = { NULL, NULL }; int ret; ret = ide_setup_pci_controller(dev, d, 1); @@ -586,7 +586,7 @@ int ide_pci_init_two(struct pci_dev *dev1, struct pci_dev *dev2, struct pci_dev *pdev[] = { dev1, dev2 }; struct ide_host *host; int ret, i; - hw_regs_t hw[4], *hws[] = { NULL, NULL, NULL, NULL }; + struct ide_hw hw[4], *hws[] = { NULL, NULL, NULL, NULL }; for (i = 0; i < 2; i++) { ret = ide_setup_pci_controller(pdev[i], d, !i); diff --git a/drivers/ide/sgiioc4.c b/drivers/ide/sgiioc4.c index 3f8ee357ffb..5f37f168f94 100644 --- a/drivers/ide/sgiioc4.c +++ b/drivers/ide/sgiioc4.c @@ -91,7 +91,7 @@ typedef struct { static void -sgiioc4_init_hwif_ports(hw_regs_t * hw, unsigned long data_port, +sgiioc4_init_hwif_ports(struct ide_hw *hw, unsigned long data_port, unsigned long ctrl_port, unsigned long irq_port) { unsigned long reg = data_port; @@ -546,7 +546,7 @@ sgiioc4_ide_setup_pci_device(struct pci_dev *dev) unsigned long cmd_base, irqport; unsigned long bar0, cmd_phys_base, ctl; void __iomem *virt_base; - hw_regs_t hw, *hws[] = { &hw }; + struct ide_hw hw, *hws[] = { &hw }; int rc; /* Get the CmdBlk and CtrlBlk Base Registers */ diff --git a/drivers/ide/tx4938ide.c b/drivers/ide/tx4938ide.c index 16adc18499f..ea89fddeed9 100644 --- a/drivers/ide/tx4938ide.c +++ b/drivers/ide/tx4938ide.c @@ -130,7 +130,7 @@ static const struct ide_port_info tx4938ide_port_info __initdata = { static int __init tx4938ide_probe(struct platform_device *pdev) { - hw_regs_t hw, *hws[] = { &hw }; + struct ide_hw hw, *hws[] = { &hw }; struct ide_host *host; struct resource *res; struct tx4938ide_platform_info *pdata = pdev->dev.platform_data; diff --git a/drivers/ide/tx4939ide.c b/drivers/ide/tx4939ide.c index fa57920d003..9f73fd43d1f 100644 --- a/drivers/ide/tx4939ide.c +++ b/drivers/ide/tx4939ide.c @@ -537,7 +537,7 @@ static const struct ide_port_info tx4939ide_port_info __initdata = { static int __init tx4939ide_probe(struct platform_device *pdev) { - hw_regs_t hw, *hws[] = { &hw }; + struct ide_hw hw, *hws[] = { &hw }; struct ide_host *host; struct resource *res; int irq, ret; diff --git a/include/linux/ide.h b/include/linux/ide.h index a3cd568553d..b1b903a0dac 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -178,7 +178,7 @@ typedef u8 hwif_chipset_t; /* * Structure to hold all information about the location of this port */ -typedef struct hw_regs_s { +struct ide_hw { union { struct ide_io_ports io_ports; unsigned long io_ports_array[IDE_NR_PORTS]; @@ -188,9 +188,9 @@ typedef struct hw_regs_s { ide_ack_intr_t *ack_intr; /* acknowledge interrupt */ struct device *dev, *parent; unsigned long config; -} hw_regs_t; +}; -static inline void ide_std_init_ports(hw_regs_t *hw, +static inline void ide_std_init_ports(struct ide_hw *hw, unsigned long io_addr, unsigned long ctl_addr) { @@ -1212,7 +1212,7 @@ static inline int ide_pci_is_in_compatibility_mode(struct pci_dev *dev) } void ide_pci_setup_ports(struct pci_dev *, const struct ide_port_info *, - hw_regs_t *, hw_regs_t **); + struct ide_hw *, struct ide_hw **); void ide_setup_pci_noise(struct pci_dev *, const struct ide_port_info *); #ifdef CONFIG_BLK_DEV_IDEDMA_PCI @@ -1456,12 +1456,12 @@ void ide_undecoded_slave(ide_drive_t *); void ide_port_apply_params(ide_hwif_t *); int ide_sysfs_register_port(ide_hwif_t *); -struct ide_host *ide_host_alloc(const struct ide_port_info *, hw_regs_t **, +struct ide_host *ide_host_alloc(const struct ide_port_info *, struct ide_hw **, unsigned int); void ide_host_free(struct ide_host *); int ide_host_register(struct ide_host *, const struct ide_port_info *, - hw_regs_t **); -int ide_host_add(const struct ide_port_info *, hw_regs_t **, unsigned int, + struct ide_hw **); +int ide_host_add(const struct ide_port_info *, struct ide_hw **, unsigned int, struct ide_host **); void ide_host_remove(struct ide_host *); int ide_legacy_device_add(const struct ide_port_info *, unsigned long); -- cgit v1.2.3-70-g09d2 From 8b3521eeb7598c3b10c7e14361a7974464527702 Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Mon, 11 May 2009 05:52:49 +0000 Subject: ipv4: remove an unused parameter from configure method of fib_rules_ops. Signed-off-by: Rami Rosen Signed-off-by: David S. Miller --- include/net/fib_rules.h | 1 - net/core/fib_rules.c | 2 +- net/decnet/dn_rules.c | 2 +- net/ipv4/fib_rules.c | 2 +- net/ipv6/fib6_rules.c | 2 +- 5 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index c2bb5cae651..b9b63395d00 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -48,7 +48,6 @@ struct fib_rules_ops struct flowi *, int); int (*configure)(struct fib_rule *, struct sk_buff *, - struct nlmsghdr *, struct fib_rule_hdr *, struct nlattr **); int (*compare)(struct fib_rule *, diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 98691e1466b..17d9f497b79 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -299,7 +299,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) } else if (rule->action == FR_ACT_GOTO) goto errout_free; - err = ops->configure(rule, skb, nlh, frh, tb); + err = ops->configure(rule, skb, frh, tb); if (err < 0) goto errout_free; diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index 14fbca55e90..a2690b12e03 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -115,7 +115,7 @@ static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) } static int dn_fib_rule_configure(struct fib_rule *rule, struct sk_buff *skb, - struct nlmsghdr *nlh, struct fib_rule_hdr *frh, + struct fib_rule_hdr *frh, struct nlattr **tb) { int err = -EINVAL; diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 6080d712082..38904be4102 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -134,7 +134,7 @@ static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = { }; static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, - struct nlmsghdr *nlh, struct fib_rule_hdr *frh, + struct fib_rule_hdr *frh, struct nlattr **tb) { struct net *net = sock_net(skb->sk); diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index f5de3f9dc69..e1a36dbb5a2 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -151,7 +151,7 @@ static const struct nla_policy fib6_rule_policy[FRA_MAX+1] = { }; static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, - struct nlmsghdr *nlh, struct fib_rule_hdr *frh, + struct fib_rule_hdr *frh, struct nlattr **tb) { int err = -EINVAL; -- cgit v1.2.3-70-g09d2 From 9d21493b4beb8f918ba248032fefa393074a5e2b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 17 May 2009 20:55:16 -0700 Subject: net: tx scalability works : trans_start struct net_device trans_start field is a hot spot on SMP and high performance devices, particularly multi queues ones, because every transmitter dirties it. Is main use is tx watchdog and bonding alive checks. But as most devices dont use NETIF_F_LLTX, we have to lock a netdev_queue before calling their ndo_start_xmit(). So it makes sense to move trans_start from net_device to netdev_queue. Its update will occur on a already present (and in exclusive state) cache line, for free. We can do this transition smoothly. An old driver continue to update dev->trans_start, while an updated one updates txq->trans_start. Further patches could also put tx_bytes/tx_packets counters in netdev_queue to avoid dirtying dev->stats (vlan device comes to mind) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 8 ++++---- include/linux/netdevice.h | 11 +++++++++++ net/sched/sch_generic.c | 40 +++++++++++++++++++++++++++++++--------- 3 files changed, 46 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 815191dd03c..30b9ea6d62b 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2795,7 +2795,7 @@ void bond_loadbalance_arp_mon(struct work_struct *work) */ bond_for_each_slave(bond, slave, i) { if (slave->link != BOND_LINK_UP) { - if (time_before_eq(jiffies, slave->dev->trans_start + delta_in_ticks) && + if (time_before_eq(jiffies, dev_trans_start(slave->dev) + delta_in_ticks) && time_before_eq(jiffies, slave->dev->last_rx + delta_in_ticks)) { slave->link = BOND_LINK_UP; @@ -2827,7 +2827,7 @@ void bond_loadbalance_arp_mon(struct work_struct *work) * when the source ip is 0, so don't take the link down * if we don't know our ip yet */ - if (time_after_eq(jiffies, slave->dev->trans_start + 2*delta_in_ticks) || + if (time_after_eq(jiffies, dev_trans_start(slave->dev) + 2*delta_in_ticks) || (time_after_eq(jiffies, slave->dev->last_rx + 2*delta_in_ticks))) { slave->link = BOND_LINK_DOWN; @@ -2938,7 +2938,7 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks) * the bond has an IP address) */ if ((slave->state == BOND_STATE_ACTIVE) && - (time_after_eq(jiffies, slave->dev->trans_start + + (time_after_eq(jiffies, dev_trans_start(slave->dev) + 2 * delta_in_ticks) || (time_after_eq(jiffies, slave_last_rx(bond, slave) + 2 * delta_in_ticks)))) { @@ -2982,7 +2982,7 @@ static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks) write_lock_bh(&bond->curr_slave_lock); if (!bond->curr_active_slave && - time_before_eq(jiffies, slave->dev->trans_start + + time_before_eq(jiffies, dev_trans_start(slave->dev) + delta_in_ticks)) { slave->link = BOND_LINK_UP; bond_change_active_slave(bond, slave); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2af89b662ca..cd547d04a8c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -470,6 +470,10 @@ struct netdev_queue { */ spinlock_t _xmit_lock ____cacheline_aligned_in_smp; int xmit_lock_owner; + /* + * please use this field instead of dev->trans_start + */ + unsigned long trans_start; } ____cacheline_aligned_in_smp; @@ -819,6 +823,11 @@ struct net_device * One part is mostly used on xmit path (device) */ /* These may be needed for future network-power-down code. */ + + /* + * trans_start here is expensive for high speed devices on SMP, + * please use netdev_queue->trans_start instead. + */ unsigned long trans_start; /* Time (in jiffies) of last Tx */ int watchdog_timeo; /* used by dev_watchdog() */ @@ -1541,6 +1550,8 @@ static inline int netif_carrier_ok(const struct net_device *dev) return !test_bit(__LINK_STATE_NOCARRIER, &dev->state); } +extern unsigned long dev_trans_start(struct net_device *dev); + extern void __netdev_watchdog_up(struct net_device *dev); extern void netif_carrier_on(struct net_device *dev); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 5f5efe4e607..27d03816ec3 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -196,6 +196,21 @@ void __qdisc_run(struct Qdisc *q) clear_bit(__QDISC_STATE_RUNNING, &q->state); } +unsigned long dev_trans_start(struct net_device *dev) +{ + unsigned long val, res = dev->trans_start; + unsigned int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + val = netdev_get_tx_queue(dev, i)->trans_start; + if (val && time_after(val, res)) + res = val; + } + dev->trans_start = res; + return res; +} +EXPORT_SYMBOL(dev_trans_start); + static void dev_watchdog(unsigned long arg) { struct net_device *dev = (struct net_device *)arg; @@ -205,25 +220,30 @@ static void dev_watchdog(unsigned long arg) if (netif_device_present(dev) && netif_running(dev) && netif_carrier_ok(dev)) { - int some_queue_stopped = 0; + int some_queue_timedout = 0; unsigned int i; + unsigned long trans_start; for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq; txq = netdev_get_tx_queue(dev, i); - if (netif_tx_queue_stopped(txq)) { - some_queue_stopped = 1; + /* + * old device drivers set dev->trans_start + */ + trans_start = txq->trans_start ? : dev->trans_start; + if (netif_tx_queue_stopped(txq) && + time_after(jiffies, (trans_start + + dev->watchdog_timeo))) { + some_queue_timedout = 1; break; } } - if (some_queue_stopped && - time_after(jiffies, (dev->trans_start + - dev->watchdog_timeo))) { + if (some_queue_timedout) { char drivername[64]; - WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n", - dev->name, netdev_drivername(dev, drivername, 64)); + WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n", + dev->name, netdev_drivername(dev, drivername, 64), i); dev->netdev_ops->ndo_tx_timeout(dev); } if (!mod_timer(&dev->watchdog_timer, @@ -602,8 +622,10 @@ static void transition_one_qdisc(struct net_device *dev, clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state); rcu_assign_pointer(dev_queue->qdisc, new_qdisc); - if (need_watchdog_p && new_qdisc != &noqueue_qdisc) + if (need_watchdog_p && new_qdisc != &noqueue_qdisc) { + dev_queue->trans_start = 0; *need_watchdog_p = 1; + } } void dev_activate(struct net_device *dev) -- cgit v1.2.3-70-g09d2 From f2a3e626202a87734a47153935ec9d15c7fcf761 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 15 May 2009 06:04:12 +0000 Subject: mdio: Add 10GBASE-T SNR register definition These do not have an in-kernel user but may be useful to user-space. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/mdio.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 26b4eb3bbee..825f1e2e2ec 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -46,6 +46,8 @@ /* Media-dependent registers. */ #define MDIO_PMA_10GBT_TXPWR 131 /* 10GBASE-T TX power control */ +#define MDIO_PMA_10GBT_SNR 133 /* 10GBASE-T SNR margin, lane A. + * Lanes B-D are numbered 134-136. */ #define MDIO_PMA_10GBR_FECABLE 170 /* 10GBASE-R FEC ability */ #define MDIO_PCS_10GBX_STAT1 24 /* 10GBASE-X PCS status 1 */ #define MDIO_PCS_10GBRT_STAT1 32 /* 10GBASE-R/-T PCS status 1 */ @@ -188,6 +190,11 @@ /* PMA 10GBASE-T TX power register. */ #define MDIO_PMA_10GBT_TXPWR_SHORT 0x0001 /* Short-reach mode */ +/* PMA 10GBASE-T SNR registers. */ +/* Value is SNR margin in dB, clamped to range [-127, 127], plus 0x8000. */ +#define MDIO_PMA_10GBT_SNR_BIAS 0x8000 +#define MDIO_PMA_10GBT_SNR_MAX 127 + /* PMA 10GBASE-R FEC ability register. */ #define MDIO_PMA_10GBR_FECABLE_ABLE 0x0001 /* FEC ability */ #define MDIO_PMA_10GBR_FECABLE_ERRABLE 0x0002 /* FEC error indic. ability */ -- cgit v1.2.3-70-g09d2 From e0b221bf4e07edf2fda645e457dc3c35c2f2f3a9 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 15 May 2009 06:05:49 +0000 Subject: mdio: Add XENPAK LASI register definitions These registers were originally defined for XENPAK modules, but are also implemented by many other 10G PHYs. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/mdio.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include') diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 825f1e2e2ec..56851646529 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -55,6 +55,14 @@ #define MDIO_AN_10GBT_CTRL 32 /* 10GBASE-T auto-negotiation control */ #define MDIO_AN_10GBT_STAT 33 /* 10GBASE-T auto-negotiation status */ +/* LASI (Link Alarm Status Interrupt) registers, defined by XENPAK MSA. */ +#define MDIO_PMA_LASI_RXCTRL 0x9000 /* RX_ALARM control */ +#define MDIO_PMA_LASI_TXCTRL 0x9001 /* TX_ALARM control */ +#define MDIO_PMA_LASI_CTRL 0x9002 /* LASI control */ +#define MDIO_PMA_LASI_RXSTAT 0x9003 /* RX_ALARM status */ +#define MDIO_PMA_LASI_TXSTAT 0x9004 /* TX_ALARM status */ +#define MDIO_PMA_LASI_STAT 0x9005 /* LASI status */ + /* Control register 1. */ /* Enable extended speed selection */ #define MDIO_CTRL1_SPEEDSELEXT (BMCR_SPEED1000 | BMCR_SPEED100) @@ -218,6 +226,26 @@ #define MDIO_AN_10GBT_STAT_MS 0x4000 /* Master/slave config */ #define MDIO_AN_10GBT_STAT_MSFLT 0x8000 /* Master/slave config fault */ +/* LASI RX_ALARM control/status registers. */ +#define MDIO_PMA_LASI_RX_PHYXSLFLT 0x0001 /* PHY XS RX local fault */ +#define MDIO_PMA_LASI_RX_PCSLFLT 0x0008 /* PCS RX local fault */ +#define MDIO_PMA_LASI_RX_PMALFLT 0x0010 /* PMA/PMD RX local fault */ +#define MDIO_PMA_LASI_RX_OPTICPOWERFLT 0x0020 /* RX optical power fault */ +#define MDIO_PMA_LASI_RX_WISLFLT 0x0200 /* WIS local fault */ + +/* LASI TX_ALARM control/status registers. */ +#define MDIO_PMA_LASI_TX_PHYXSLFLT 0x0001 /* PHY XS TX local fault */ +#define MDIO_PMA_LASI_TX_PCSLFLT 0x0008 /* PCS TX local fault */ +#define MDIO_PMA_LASI_TX_PMALFLT 0x0010 /* PMA/PMD TX local fault */ +#define MDIO_PMA_LASI_TX_LASERPOWERFLT 0x0080 /* Laser output power fault */ +#define MDIO_PMA_LASI_TX_LASERTEMPFLT 0x0100 /* Laser temperature fault */ +#define MDIO_PMA_LASI_TX_LASERBICURRFLT 0x0200 /* Laser bias current fault */ + +/* LASI control/status registers. */ +#define MDIO_PMA_LASI_LSALARM 0x0001 /* LS_ALARM enable/status */ +#define MDIO_PMA_LASI_TXALARM 0x0002 /* TX_ALARM enable/status */ +#define MDIO_PMA_LASI_RXALARM 0x0004 /* RX_ALARM enable/status */ + /* Mapping between MDIO PRTAD/DEVAD and mii_ioctl_data::phy_id */ #define MDIO_PHY_ID_C45 0x8000 -- cgit v1.2.3-70-g09d2 From 184dd3459bb334d9061b58faed3610d08d6c7ff8 Mon Sep 17 00:00:00 2001 From: Vasu Dev Date: Sun, 17 May 2009 12:33:28 +0000 Subject: fcoe: adds spma mode support If we can find a type NETDEV_HW_ADDR_T_SAN mac address from the corresponding netdev for a fcoe interface then sets up added the fc->ctlr.spma flag and stores spma mode address in ctl_src_addr. In case the spma flag is set then:- 1. Adds spma mode MAC address in ctl_src_addr as secondary MAC address, the FLOGI for FIP and pre-FIP will go out using this address. 2. Cleans up stored spma MAC address in ctl_src_addr in fcoe_netdev_cleanup. 3. Sets up spma bit in fip_flags for FIP solicitations along with exiting FPMA bit setting. 4. Initialize the FLOGI FIP MAC descriptor to stored spma MAC address in ctl_src_addr. This is used as proposed FCoE MAC address from initiator along with both SPMA and FPMA bit set in FIP solicitation, in response the switch may grant any FPMA or SPMA mode MAC address to initiator. Removes FIP descriptor type checking against ELS type ELS_FLOGI in fcoe_ctlr_encaps to update a FIP MAC descriptor, instead now checks against FIP_DT_FLOGI. I've tested this with available FPMA-only FCoE switch but since data_src_addr is updated using same old code for both FPMA and SPMA modes with FIP or pre-FIP links, so added SPMA mode will work with SPMA-only switch also provided that switch grants a valid MAC address. Signed-off-by: Vasu Dev Signed-off-by: Yi Zou Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/scsi/fcoe/fcoe.c | 24 ++++++++++++++++++++++-- drivers/scsi/fcoe/libfcoe.c | 10 +++++++++- include/scsi/libfcoe.h | 2 ++ 3 files changed, 33 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index 04f500571d4..f2d16127bd0 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -238,6 +238,9 @@ void fcoe_netdev_cleanup(struct fcoe_softc *fc) if (!is_zero_ether_addr(fc->ctlr.data_src_addr)) dev_unicast_delete(fc->real_dev, fc->ctlr.data_src_addr, ETH_ALEN); + if (fc->ctlr.spma) + dev_unicast_delete(fc->real_dev, + fc->ctlr.ctl_src_addr, ETH_ALEN); dev_mc_delete(fc->real_dev, FIP_ALL_ENODE_MACS, ETH_ALEN, 0); rtnl_unlock(); } @@ -257,6 +260,7 @@ static int fcoe_netdev_config(struct fc_lport *lp, struct net_device *netdev) u64 wwnn, wwpn; struct fcoe_softc *fc; u8 flogi_maddr[ETH_ALEN]; + struct netdev_hw_addr *ha; /* Setup lport private data to point to fcoe softc */ fc = lport_priv(lp); @@ -313,9 +317,23 @@ static int fcoe_netdev_config(struct fc_lport *lp, struct net_device *netdev) skb_queue_head_init(&fc->fcoe_pending_queue); fc->fcoe_pending_queue_active = 0; + /* look for SAN MAC address, if multiple SAN MACs exist, only + * use the first one for SPMA */ + rcu_read_lock(); + for_each_dev_addr(netdev, ha) { + if ((ha->type == NETDEV_HW_ADDR_T_SAN) && + (is_valid_ether_addr(fc->ctlr.ctl_src_addr))) { + memcpy(fc->ctlr.ctl_src_addr, ha->addr, ETH_ALEN); + fc->ctlr.spma = 1; + break; + } + } + rcu_read_unlock(); + /* setup Source Mac Address */ - memcpy(fc->ctlr.ctl_src_addr, fc->real_dev->dev_addr, - fc->real_dev->addr_len); + if (!fc->ctlr.spma) + memcpy(fc->ctlr.ctl_src_addr, fc->real_dev->dev_addr, + fc->real_dev->addr_len); wwnn = fcoe_wwn_from_mac(fc->real_dev->dev_addr, 1, 0); fc_set_wwnn(lp, wwnn); @@ -331,6 +349,8 @@ static int fcoe_netdev_config(struct fc_lport *lp, struct net_device *netdev) rtnl_lock(); memcpy(flogi_maddr, (u8[6]) FC_FCOE_FLOGI_MAC, ETH_ALEN); dev_unicast_add(fc->real_dev, flogi_maddr, ETH_ALEN); + if (fc->ctlr.spma) + dev_unicast_add(fc->real_dev, fc->ctlr.ctl_src_addr, ETH_ALEN); rtnl_unlock(); /* diff --git a/drivers/scsi/fcoe/libfcoe.c b/drivers/scsi/fcoe/libfcoe.c index f410f4abb54..444a06bdb70 100644 --- a/drivers/scsi/fcoe/libfcoe.c +++ b/drivers/scsi/fcoe/libfcoe.c @@ -198,6 +198,8 @@ static void fcoe_ctlr_solicit(struct fcoe_ctlr *fip, struct fcoe_fcf *fcf) sol->fip.fip_subcode = FIP_SC_SOL; sol->fip.fip_dl_len = htons(sizeof(sol->desc) / FIP_BPW); sol->fip.fip_flags = htons(FIP_FL_FPMA); + if (fip->spma) + sol->fip.fip_flags |= htons(FIP_FL_SPMA); sol->desc.mac.fd_desc.fip_dtype = FIP_DT_MAC; sol->desc.mac.fd_desc.fip_dlen = sizeof(sol->desc.mac) / FIP_BPW; @@ -350,6 +352,8 @@ static void fcoe_ctlr_send_keep_alive(struct fcoe_ctlr *fip, int ports, u8 *sa) kal->fip.fip_dl_len = htons((sizeof(kal->mac) + ports * sizeof(*vn)) / FIP_BPW); kal->fip.fip_flags = htons(FIP_FL_FPMA); + if (fip->spma) + kal->fip.fip_flags |= htons(FIP_FL_SPMA); kal->mac.fd_desc.fip_dtype = FIP_DT_MAC; kal->mac.fd_desc.fip_dlen = sizeof(kal->mac) / FIP_BPW; @@ -413,6 +417,8 @@ static int fcoe_ctlr_encaps(struct fcoe_ctlr *fip, cap->fip.fip_subcode = FIP_SC_REQ; cap->fip.fip_dl_len = htons((dlen + sizeof(*mac)) / FIP_BPW); cap->fip.fip_flags = htons(FIP_FL_FPMA); + if (fip->spma) + cap->fip.fip_flags |= htons(FIP_FL_SPMA); cap->encaps.fd_desc.fip_dtype = dtype; cap->encaps.fd_desc.fip_dlen = dlen / FIP_BPW; @@ -421,8 +427,10 @@ static int fcoe_ctlr_encaps(struct fcoe_ctlr *fip, memset(mac, 0, sizeof(mac)); mac->fd_desc.fip_dtype = FIP_DT_MAC; mac->fd_desc.fip_dlen = sizeof(*mac) / FIP_BPW; - if (dtype != ELS_FLOGI) + if (dtype != FIP_DT_FLOGI) memcpy(mac->fd_mac, fip->data_src_addr, ETH_ALEN); + else if (fip->spma) + memcpy(mac->fd_mac, fip->ctl_src_addr, ETH_ALEN); skb->protocol = htons(ETH_P_802_3); skb_reset_mac_header(skb); diff --git a/include/scsi/libfcoe.h b/include/scsi/libfcoe.h index 666cc131732..b2410605b74 100644 --- a/include/scsi/libfcoe.h +++ b/include/scsi/libfcoe.h @@ -73,6 +73,7 @@ enum fip_state { * @link: current link status for libfc. * @last_link: last link state reported to libfc. * @map_dest: use the FC_MAP mode for destination MAC addresses. + * @spma: supports SPMA server-provided MACs mode * @dest_addr: MAC address of the selected FC forwarder. * @ctl_src_addr: the native MAC address of our local port. * @data_src_addr: the assigned MAC address for the local port after FLOGI. @@ -104,6 +105,7 @@ struct fcoe_ctlr { u8 link; u8 last_link; u8 map_dest; + u8 spma; u8 dest_addr[ETH_ALEN]; u8 ctl_src_addr[ETH_ALEN]; u8 data_src_addr[ETH_ALEN]; -- cgit v1.2.3-70-g09d2 From 888a589f6be07d624e21e2174d98375e9f95911b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 15 May 2009 13:59:37 -0700 Subject: mm, x86: remove MEMORY_HOTPLUG_RESERVE related code after: | commit b263295dbffd33b0fbff670720fa178c30e3392a | Author: Christoph Lameter | Date: Wed Jan 30 13:30:47 2008 +0100 | | x86: 64-bit, make sparsemem vmemmap the only memory model we don't have MEMORY_HOTPLUG_RESERVE anymore. Historically, x86-64 had an architecture-specific method for memory hotplug whereby it scanned the SRAT for physical memory ranges that could be potentially used for memory hot-add later. By reserving those ranges without physical memory, the memmap would be allocated and left dormant until needed. This depended on the DISCONTIG memory model which has been removed so the code implementing HOTPLUG_RESERVE is now dead. This patch removes the dead code used by MEMORY_HOTPLUG_RESERVE. (Changelog authored by Mel.) v2: updated changelog, and remove hotadd= in doc [ Impact: remove dead code ] Signed-off-by: Yinghai Lu Reviewed-by: Christoph Lameter Reviewed-by: Mel Gorman Workflow-found-OK-by: Andrew Morton LKML-Reference: <4A0C4910.7090508@kernel.org> Signed-off-by: Ingo Molnar --- Documentation/x86/x86_64/boot-options.txt | 5 --- arch/x86/include/asm/numa_64.h | 3 -- arch/x86/mm/numa_64.c | 5 --- arch/x86/mm/srat_64.c | 63 ++++++---------------------- include/linux/mm.h | 2 - mm/page_alloc.c | 69 ------------------------------- 6 files changed, 12 insertions(+), 135 deletions(-) (limited to 'include') diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt index 34c13040a71..2db5893d6c9 100644 --- a/Documentation/x86/x86_64/boot-options.txt +++ b/Documentation/x86/x86_64/boot-options.txt @@ -150,11 +150,6 @@ NUMA Otherwise, the remaining system RAM is allocated to an additional node. - numa=hotadd=percent - Only allow hotadd memory to preallocate page structures upto - percent of already available memory. - numa=hotadd=0 will disable hotadd memory. - ACPI acpi=off Don't enable ACPI diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h index 064ed6df4cb..7feff0648d7 100644 --- a/arch/x86/include/asm/numa_64.h +++ b/arch/x86/include/asm/numa_64.h @@ -17,9 +17,6 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks, extern void numa_init_array(void); extern int numa_off; -extern void srat_reserve_add_area(int nodeid); -extern int hotadd_percent; - extern s16 apicid_to_node[MAX_LOCAL_APIC]; extern unsigned long numa_free_all_bootmem(void); diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index fb61d81a656..a6a93c39523 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -272,9 +272,6 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages<> PAGE_SHIFT; unsigned long e_pfn = end >> PAGE_SHIFT; - int ret = 0, changed = 0; + int changed = 0; struct bootnode *nd = &nodes_add[node]; /* I had some trouble with strange memory hotadd regions breaking @@ -210,7 +201,7 @@ reserve_hotadd(int node, unsigned long start, unsigned long end) mistakes */ if ((signed long)(end - start) < NODE_MIN_SIZE) { printk(KERN_ERR "SRAT: Hotplug area too small\n"); - return -1; + return; } /* This check might be a bit too strict, but I'm keeping it for now. */ @@ -218,12 +209,7 @@ reserve_hotadd(int node, unsigned long start, unsigned long end) printk(KERN_ERR "SRAT: Hotplug area %lu -> %lu has existing memory\n", s_pfn, e_pfn); - return -1; - } - - if (!hotadd_enough_memory(&nodes_add[node])) { - printk(KERN_ERR "SRAT: Hotplug area too large\n"); - return -1; + return; } /* Looks good */ @@ -245,11 +231,9 @@ reserve_hotadd(int node, unsigned long start, unsigned long end) printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n"); } - ret = update_end_of_memory(nd->end); - if (changed) - printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end); - return ret; + printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", + nd->start, nd->end); } /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ @@ -310,13 +294,10 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) start, end); e820_register_active_regions(node, start >> PAGE_SHIFT, end >> PAGE_SHIFT); - push_node_boundaries(node, nd->start >> PAGE_SHIFT, - nd->end >> PAGE_SHIFT); - if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && - (reserve_hotadd(node, start, end) < 0)) { - /* Ignore hotadd region. Undo damage */ - printk(KERN_NOTICE "SRAT: Hotplug region ignored\n"); + if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) { + update_nodes_add(node, start, end); + /* restore nodes[node] */ *nd = oldnode; if ((nd->start | nd->end) == 0) node_clear(node, nodes_parsed); @@ -510,26 +491,6 @@ static int null_slit_node_compare(int a, int b) } #endif /* CONFIG_NUMA_EMU */ -void __init srat_reserve_add_area(int nodeid) -{ - if (found_add_area && nodes_add[nodeid].end) { - u64 total_mb; - - printk(KERN_INFO "SRAT: Reserving hot-add memory space " - "for node %d at %Lx-%Lx\n", - nodeid, nodes_add[nodeid].start, nodes_add[nodeid].end); - total_mb = (nodes_add[nodeid].end - nodes_add[nodeid].start) - >> PAGE_SHIFT; - total_mb *= sizeof(struct page); - total_mb >>= 20; - printk(KERN_INFO "SRAT: This will cost you %Lu MB of " - "pre-allocated memory.\n", (unsigned long long)total_mb); - reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start, - nodes_add[nodeid].end - nodes_add[nodeid].start, - BOOTMEM_DEFAULT); - } -} - int __node_distance(int a, int b) { int index; diff --git a/include/linux/mm.h b/include/linux/mm.h index bff1f0d475c..511b0986709 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1031,8 +1031,6 @@ extern void add_active_range(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); extern void remove_active_range(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); -extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn, - unsigned long end_pfn); extern void remove_all_active_ranges(void); extern unsigned long absent_pages_in_range(unsigned long start_pfn, unsigned long end_pfn); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index fe753ecf2aa..474c7e9dd51 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -149,10 +149,6 @@ static unsigned long __meminitdata dma_reserve; static int __meminitdata nr_nodemap_entries; static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES]; static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES]; -#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE - static unsigned long __meminitdata node_boundary_start_pfn[MAX_NUMNODES]; - static unsigned long __meminitdata node_boundary_end_pfn[MAX_NUMNODES]; -#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */ static unsigned long __initdata required_kernelcore; static unsigned long __initdata required_movablecore; static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES]; @@ -3102,64 +3098,6 @@ void __init sparse_memory_present_with_active_regions(int nid) early_node_map[i].end_pfn); } -/** - * push_node_boundaries - Push node boundaries to at least the requested boundary - * @nid: The nid of the node to push the boundary for - * @start_pfn: The start pfn of the node - * @end_pfn: The end pfn of the node - * - * In reserve-based hot-add, mem_map is allocated that is unused until hotadd - * time. Specifically, on x86_64, SRAT will report ranges that can potentially - * be hotplugged even though no physical memory exists. This function allows - * an arch to push out the node boundaries so mem_map is allocated that can - * be used later. - */ -#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE -void __init push_node_boundaries(unsigned int nid, - unsigned long start_pfn, unsigned long end_pfn) -{ - mminit_dprintk(MMINIT_TRACE, "zoneboundary", - "Entering push_node_boundaries(%u, %lu, %lu)\n", - nid, start_pfn, end_pfn); - - /* Initialise the boundary for this node if necessary */ - if (node_boundary_end_pfn[nid] == 0) - node_boundary_start_pfn[nid] = -1UL; - - /* Update the boundaries */ - if (node_boundary_start_pfn[nid] > start_pfn) - node_boundary_start_pfn[nid] = start_pfn; - if (node_boundary_end_pfn[nid] < end_pfn) - node_boundary_end_pfn[nid] = end_pfn; -} - -/* If necessary, push the node boundary out for reserve hotadd */ -static void __meminit account_node_boundary(unsigned int nid, - unsigned long *start_pfn, unsigned long *end_pfn) -{ - mminit_dprintk(MMINIT_TRACE, "zoneboundary", - "Entering account_node_boundary(%u, %lu, %lu)\n", - nid, *start_pfn, *end_pfn); - - /* Return if boundary information has not been provided */ - if (node_boundary_end_pfn[nid] == 0) - return; - - /* Check the boundaries and update if necessary */ - if (node_boundary_start_pfn[nid] < *start_pfn) - *start_pfn = node_boundary_start_pfn[nid]; - if (node_boundary_end_pfn[nid] > *end_pfn) - *end_pfn = node_boundary_end_pfn[nid]; -} -#else -void __init push_node_boundaries(unsigned int nid, - unsigned long start_pfn, unsigned long end_pfn) {} - -static void __meminit account_node_boundary(unsigned int nid, - unsigned long *start_pfn, unsigned long *end_pfn) {} -#endif - - /** * get_pfn_range_for_nid - Return the start and end page frames for a node * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned. @@ -3185,9 +3123,6 @@ void __meminit get_pfn_range_for_nid(unsigned int nid, if (*start_pfn == -1UL) *start_pfn = 0; - - /* Push the node boundaries out if requested */ - account_node_boundary(nid, start_pfn, end_pfn); } /* @@ -3793,10 +3728,6 @@ void __init remove_all_active_ranges(void) { memset(early_node_map, 0, sizeof(early_node_map)); nr_nodemap_entries = 0; -#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE - memset(node_boundary_start_pfn, 0, sizeof(node_boundary_start_pfn)); - memset(node_boundary_end_pfn, 0, sizeof(node_boundary_end_pfn)); -#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */ } /* Compare two active node_active_regions */ -- cgit v1.2.3-70-g09d2 From 302b4215daa0a704c843da40fd2529e5757a72da Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Mon, 18 May 2009 13:51:32 +0800 Subject: PCI: support the ATS capability The PCIe ATS capability makes the Endpoint be able to request the DMA address translation from the IOMMU and cache the translation in the device side, thus alleviate IOMMU pressure and improve the hardware performance in the I/O virtualization environment. Signed-off-by: Yu Zhao Acked-by: Jesse Barnes Signed-off-by: David Woodhouse --- drivers/pci/iov.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++ drivers/pci/pci.h | 37 +++++++++++++++++ include/linux/pci.h | 2 + include/linux/pci_regs.h | 10 +++++ 4 files changed, 154 insertions(+) (limited to 'include') diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index b497daab3d4..0a7a1b40286 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -5,6 +5,7 @@ * * PCI Express I/O Virtualization (IOV) support. * Single Root IOV 1.0 + * Address Translation Service 1.0 */ #include @@ -679,3 +680,107 @@ irqreturn_t pci_sriov_migration(struct pci_dev *dev) return sriov_migration(dev) ? IRQ_HANDLED : IRQ_NONE; } EXPORT_SYMBOL_GPL(pci_sriov_migration); + +static int ats_alloc_one(struct pci_dev *dev, int ps) +{ + int pos; + u16 cap; + struct pci_ats *ats; + + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ATS); + if (!pos) + return -ENODEV; + + ats = kzalloc(sizeof(*ats), GFP_KERNEL); + if (!ats) + return -ENOMEM; + + ats->pos = pos; + ats->stu = ps; + pci_read_config_word(dev, pos + PCI_ATS_CAP, &cap); + ats->qdep = PCI_ATS_CAP_QDEP(cap) ? PCI_ATS_CAP_QDEP(cap) : + PCI_ATS_MAX_QDEP; + dev->ats = ats; + + return 0; +} + +static void ats_free_one(struct pci_dev *dev) +{ + kfree(dev->ats); + dev->ats = NULL; +} + +/** + * pci_enable_ats - enable the ATS capability + * @dev: the PCI device + * @ps: the IOMMU page shift + * + * Returns 0 on success, or negative on failure. + */ +int pci_enable_ats(struct pci_dev *dev, int ps) +{ + int rc; + u16 ctrl; + + BUG_ON(dev->ats); + + if (ps < PCI_ATS_MIN_STU) + return -EINVAL; + + rc = ats_alloc_one(dev, ps); + if (rc) + return rc; + + ctrl = PCI_ATS_CTRL_ENABLE; + ctrl |= PCI_ATS_CTRL_STU(ps - PCI_ATS_MIN_STU); + pci_write_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, ctrl); + + return 0; +} + +/** + * pci_disable_ats - disable the ATS capability + * @dev: the PCI device + */ +void pci_disable_ats(struct pci_dev *dev) +{ + u16 ctrl; + + BUG_ON(!dev->ats); + + pci_read_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, &ctrl); + ctrl &= ~PCI_ATS_CTRL_ENABLE; + pci_write_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, ctrl); + + ats_free_one(dev); +} + +/** + * pci_ats_queue_depth - query the ATS Invalidate Queue Depth + * @dev: the PCI device + * + * Returns the queue depth on success, or negative on failure. + * + * The ATS spec uses 0 in the Invalidate Queue Depth field to + * indicate that the function can accept 32 Invalidate Request. + * But here we use the `real' values (i.e. 1~32) for the Queue + * Depth. + */ +int pci_ats_queue_depth(struct pci_dev *dev) +{ + int pos; + u16 cap; + + if (dev->ats) + return dev->ats->qdep; + + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ATS); + if (!pos) + return -ENODEV; + + pci_read_config_word(dev, pos + PCI_ATS_CAP, &cap); + + return PCI_ATS_CAP_QDEP(cap) ? PCI_ATS_CAP_QDEP(cap) : + PCI_ATS_MAX_QDEP; +} diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index d03f6b99f29..3c2ec64f78e 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -229,6 +229,13 @@ struct pci_sriov { u8 __iomem *mstate; /* VF Migration State Array */ }; +/* Address Translation Service */ +struct pci_ats { + int pos; /* capability position */ + int stu; /* Smallest Translation Unit */ + int qdep; /* Invalidate Queue Depth */ +}; + #ifdef CONFIG_PCI_IOV extern int pci_iov_init(struct pci_dev *dev); extern void pci_iov_release(struct pci_dev *dev); @@ -236,6 +243,20 @@ extern int pci_iov_resource_bar(struct pci_dev *dev, int resno, enum pci_bar_type *type); extern void pci_restore_iov_state(struct pci_dev *dev); extern int pci_iov_bus_range(struct pci_bus *bus); + +extern int pci_enable_ats(struct pci_dev *dev, int ps); +extern void pci_disable_ats(struct pci_dev *dev); +extern int pci_ats_queue_depth(struct pci_dev *dev); +/** + * pci_ats_enabled - query the ATS status + * @dev: the PCI device + * + * Returns 1 if ATS capability is enabled, or 0 if not. + */ +static inline int pci_ats_enabled(struct pci_dev *dev) +{ + return !!dev->ats; +} #else static inline int pci_iov_init(struct pci_dev *dev) { @@ -257,6 +278,22 @@ static inline int pci_iov_bus_range(struct pci_bus *bus) { return 0; } + +static inline int pci_enable_ats(struct pci_dev *dev, int ps) +{ + return -ENODEV; +} +static inline void pci_disable_ats(struct pci_dev *dev) +{ +} +static inline int pci_ats_queue_depth(struct pci_dev *dev) +{ + return -ENODEV; +} +static inline int pci_ats_enabled(struct pci_dev *dev) +{ + return 0; +} #endif /* CONFIG_PCI_IOV */ #endif /* DRIVERS_PCI_H */ diff --git a/include/linux/pci.h b/include/linux/pci.h index 72698d89e76..bd3e4a798c4 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -188,6 +188,7 @@ struct pci_cap_saved_state { struct pcie_link_state; struct pci_vpd; struct pci_sriov; +struct pci_ats; /* * The pci_dev structure is used to describe PCI devices. @@ -285,6 +286,7 @@ struct pci_dev { struct pci_sriov *sriov; /* SR-IOV capability related */ struct pci_dev *physfn; /* the PF this VF is associated with */ }; + struct pci_ats *ats; /* Address Translation Service */ #endif }; diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index e4d08c1b2e0..c03189c56c7 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -501,6 +501,7 @@ #define PCI_EXT_CAP_ID_DSN 3 #define PCI_EXT_CAP_ID_PWR 4 #define PCI_EXT_CAP_ID_ARI 14 +#define PCI_EXT_CAP_ID_ATS 15 #define PCI_EXT_CAP_ID_SRIOV 16 /* Advanced Error Reporting */ @@ -619,6 +620,15 @@ #define PCI_ARI_CTRL_ACS 0x0002 /* ACS Function Groups Enable */ #define PCI_ARI_CTRL_FG(x) (((x) >> 4) & 7) /* Function Group */ +/* Address Translation Service */ +#define PCI_ATS_CAP 0x04 /* ATS Capability Register */ +#define PCI_ATS_CAP_QDEP(x) ((x) & 0x1f) /* Invalidate Queue Depth */ +#define PCI_ATS_MAX_QDEP 32 /* Max Invalidate Queue Depth */ +#define PCI_ATS_CTRL 0x06 /* ATS Control Register */ +#define PCI_ATS_CTRL_ENABLE 0x8000 /* ATS Enable */ +#define PCI_ATS_CTRL_STU(x) ((x) & 0x1f) /* Smallest Translation Unit */ +#define PCI_ATS_MIN_STU 12 /* shift of minimum STU block */ + /* Single Root I/O Virtualization */ #define PCI_SRIOV_CAP 0x04 /* SR-IOV Capabilities */ #define PCI_SRIOV_CAP_VFM 0x01 /* VF Migration Capable */ -- cgit v1.2.3-70-g09d2 From 1cde26f928863d90e9e7c1217880c8450464d305 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 18 May 2009 14:41:30 +0200 Subject: virtio_blk: SG_IO passthru support Add support for SG_IO passthru to virtio_blk. We add the scsi command block after the normal outhdr, and the scsi inhdr with full status information aswell as the sense buffer before the regular inhdr. [hch: forward ported, added the VIRTIO_BLK_F_SCSI flags, some comments and tested the whole beast] [axboe: updated to use ->resid and not dual-path the byte count] Signed-off-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Rusty Russell (+ checkpatch.pl tweak) Signed-off-by: Jens Axboe --- drivers/block/virtio_blk.c | 64 ++++++++++++++++++++++++++++++++++++---------- include/linux/virtio_blk.h | 8 ++++++ 2 files changed, 58 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index dfe9ee5f169..62275dbdf2e 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -37,6 +37,7 @@ struct virtblk_req struct list_head list; struct request *req; struct virtio_blk_outhdr out_hdr; + struct virtio_scsi_inhdr in_hdr; u8 status; }; @@ -49,7 +50,9 @@ static void blk_done(struct virtqueue *vq) spin_lock_irqsave(&vblk->lock, flags); while ((vbr = vblk->vq->vq_ops->get_buf(vblk->vq, &len)) != NULL) { + unsigned int nr_bytes; int error; + switch (vbr->status) { case VIRTIO_BLK_S_OK: error = 0; @@ -62,6 +65,12 @@ static void blk_done(struct virtqueue *vq) break; } + if (blk_pc_request(vbr->req)) { + vbr->req->resid_len = vbr->in_hdr.residual; + vbr->req->sense_len = vbr->in_hdr.sense_len; + vbr->req->errors = vbr->in_hdr.errors; + } + __blk_end_request_all(vbr->req, error); list_del(&vbr->list); mempool_free(vbr, vblk->pool); @@ -74,7 +83,7 @@ static void blk_done(struct virtqueue *vq) static bool do_req(struct request_queue *q, struct virtio_blk *vblk, struct request *req) { - unsigned long num, out, in; + unsigned long num, out = 0, in = 0; struct virtblk_req *vbr; vbr = mempool_alloc(vblk->pool, GFP_ATOMIC); @@ -99,18 +108,36 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, if (blk_barrier_rq(vbr->req)) vbr->out_hdr.type |= VIRTIO_BLK_T_BARRIER; - sg_set_buf(&vblk->sg[0], &vbr->out_hdr, sizeof(vbr->out_hdr)); - num = blk_rq_map_sg(q, vbr->req, vblk->sg+1); - sg_set_buf(&vblk->sg[num+1], &vbr->status, sizeof(vbr->status)); + sg_set_buf(&vblk->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr)); - if (rq_data_dir(vbr->req) == WRITE) { - vbr->out_hdr.type |= VIRTIO_BLK_T_OUT; - out = 1 + num; - in = 1; - } else { - vbr->out_hdr.type |= VIRTIO_BLK_T_IN; - out = 1; - in = 1 + num; + /* + * If this is a packet command we need a couple of additional headers. + * Behind the normal outhdr we put a segment with the scsi command + * block, and before the normal inhdr we put the sense data and the + * inhdr with additional status information before the normal inhdr. + */ + if (blk_pc_request(vbr->req)) + sg_set_buf(&vblk->sg[out++], vbr->req->cmd, vbr->req->cmd_len); + + num = blk_rq_map_sg(q, vbr->req, vblk->sg + out); + + if (blk_pc_request(vbr->req)) { + sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, 96); + sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr, + sizeof(vbr->in_hdr)); + } + + sg_set_buf(&vblk->sg[num + out + in++], &vbr->status, + sizeof(vbr->status)); + + if (num) { + if (rq_data_dir(vbr->req) == WRITE) { + vbr->out_hdr.type |= VIRTIO_BLK_T_OUT; + out += num; + } else { + vbr->out_hdr.type |= VIRTIO_BLK_T_IN; + in += num; + } } if (vblk->vq->vq_ops->add_buf(vblk->vq, vblk->sg, out, in, vbr)) { @@ -148,8 +175,16 @@ static void do_virtblk_request(struct request_queue *q) static int virtblk_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, unsigned long data) { - return scsi_cmd_ioctl(bdev->bd_disk->queue, - bdev->bd_disk, mode, cmd, + struct gendisk *disk = bdev->bd_disk; + struct virtio_blk *vblk = disk->private_data; + + /* + * Only allow the generic SCSI ioctls if the host can support it. + */ + if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI)) + return -ENOIOCTLCMD; + + return scsi_cmd_ioctl(disk->queue, disk, mode, cmd, (void __user *)data); } @@ -356,6 +391,7 @@ static struct virtio_device_id id_table[] = { static unsigned int features[] = { VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, + VIRTIO_BLK_F_SCSI, }; static struct virtio_driver virtio_blk = { diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h index 94c56d29869..4dbcbc1c348 100644 --- a/include/linux/virtio_blk.h +++ b/include/linux/virtio_blk.h @@ -15,6 +15,7 @@ #define VIRTIO_BLK_F_GEOMETRY 4 /* Legacy geometry available */ #define VIRTIO_BLK_F_RO 5 /* Disk is read-only */ #define VIRTIO_BLK_F_BLK_SIZE 6 /* Block size of disk is available*/ +#define VIRTIO_BLK_F_SCSI 7 /* Supports scsi command passthru */ struct virtio_blk_config { @@ -55,6 +56,13 @@ struct virtio_blk_outhdr __u64 sector; }; +struct virtio_scsi_inhdr { + __u32 errors; + __u32 data_len; + __u32 sense_len; + __u32 residual; +}; + /* And this is the final byte of the write scatter-gather list. */ #define VIRTIO_BLK_S_OK 0 #define VIRTIO_BLK_S_IOERR 1 -- cgit v1.2.3-70-g09d2 From aa5d2b515b6fca5f8a56eac84f7fa0a68c1ce9b7 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Mon, 18 May 2009 13:51:34 +0800 Subject: VT-d: parse ATSR in DMA Remapping Reporting Structure Parse the Root Port ATS Capability Reporting Structure in the DMA Remapping Reporting Structure ACPI table. Signed-off-by: Yu Zhao Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 112 +++++++++++++++++++++++++++++++++++++++++--- include/linux/dmar.h | 9 ++++ include/linux/intel-iommu.h | 1 + 3 files changed, 116 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index f23460a5d10..6d7f9619b8a 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -267,6 +267,84 @@ rmrr_parse_dev(struct dmar_rmrr_unit *rmrru) } return ret; } + +static LIST_HEAD(dmar_atsr_units); + +static int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr) +{ + struct acpi_dmar_atsr *atsr; + struct dmar_atsr_unit *atsru; + + atsr = container_of(hdr, struct acpi_dmar_atsr, header); + atsru = kzalloc(sizeof(*atsru), GFP_KERNEL); + if (!atsru) + return -ENOMEM; + + atsru->hdr = hdr; + atsru->include_all = atsr->flags & 0x1; + + list_add(&atsru->list, &dmar_atsr_units); + + return 0; +} + +static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru) +{ + int rc; + struct acpi_dmar_atsr *atsr; + + if (atsru->include_all) + return 0; + + atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); + rc = dmar_parse_dev_scope((void *)(atsr + 1), + (void *)atsr + atsr->header.length, + &atsru->devices_cnt, &atsru->devices, + atsr->segment); + if (rc || !atsru->devices_cnt) { + list_del(&atsru->list); + kfree(atsru); + } + + return rc; +} + +int dmar_find_matched_atsr_unit(struct pci_dev *dev) +{ + int i; + struct pci_bus *bus; + struct acpi_dmar_atsr *atsr; + struct dmar_atsr_unit *atsru; + + list_for_each_entry(atsru, &dmar_atsr_units, list) { + atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); + if (atsr->segment == pci_domain_nr(dev->bus)) + goto found; + } + + return 0; + +found: + for (bus = dev->bus; bus; bus = bus->parent) { + struct pci_dev *bridge = bus->self; + + if (!bridge || !bridge->is_pcie || + bridge->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE) + return 0; + + if (bridge->pcie_type == PCI_EXP_TYPE_ROOT_PORT) { + for (i = 0; i < atsru->devices_cnt; i++) + if (atsru->devices[i] == bridge) + return 1; + break; + } + } + + if (atsru->include_all) + return 1; + + return 0; +} #endif static void __init @@ -274,22 +352,28 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header) { struct acpi_dmar_hardware_unit *drhd; struct acpi_dmar_reserved_memory *rmrr; + struct acpi_dmar_atsr *atsr; switch (header->type) { case ACPI_DMAR_TYPE_HARDWARE_UNIT: - drhd = (struct acpi_dmar_hardware_unit *)header; + drhd = container_of(header, struct acpi_dmar_hardware_unit, + header); printk (KERN_INFO PREFIX - "DRHD (flags: 0x%08x)base: 0x%016Lx\n", - drhd->flags, (unsigned long long)drhd->address); + "DRHD base: %#016Lx flags: %#x\n", + (unsigned long long)drhd->address, drhd->flags); break; case ACPI_DMAR_TYPE_RESERVED_MEMORY: - rmrr = (struct acpi_dmar_reserved_memory *)header; - + rmrr = container_of(header, struct acpi_dmar_reserved_memory, + header); printk (KERN_INFO PREFIX - "RMRR base: 0x%016Lx end: 0x%016Lx\n", + "RMRR base: %#016Lx end: %#016Lx\n", (unsigned long long)rmrr->base_address, (unsigned long long)rmrr->end_address); break; + case ACPI_DMAR_TYPE_ATSR: + atsr = container_of(header, struct acpi_dmar_atsr, header); + printk(KERN_INFO PREFIX "ATSR flags: %#x\n", atsr->flags); + break; } } @@ -361,6 +445,11 @@ parse_dmar_table(void) case ACPI_DMAR_TYPE_RESERVED_MEMORY: #ifdef CONFIG_DMAR ret = dmar_parse_one_rmrr(entry_header); +#endif + break; + case ACPI_DMAR_TYPE_ATSR: +#ifdef CONFIG_DMAR + ret = dmar_parse_one_atsr(entry_header); #endif break; default: @@ -431,11 +520,19 @@ int __init dmar_dev_scope_init(void) #ifdef CONFIG_DMAR { struct dmar_rmrr_unit *rmrr, *rmrr_n; + struct dmar_atsr_unit *atsr, *atsr_n; + list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) { ret = rmrr_parse_dev(rmrr); if (ret) return ret; } + + list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) { + ret = atsr_parse_dev(atsr); + if (ret) + return ret; + } } #endif @@ -468,6 +565,9 @@ int __init dmar_table_init(void) #ifdef CONFIG_DMAR if (list_empty(&dmar_rmrr_units)) printk(KERN_INFO PREFIX "No RMRR found\n"); + + if (list_empty(&dmar_atsr_units)) + printk(KERN_INFO PREFIX "No ATSR found\n"); #endif #ifdef CONFIG_INTR_REMAP diff --git a/include/linux/dmar.h b/include/linux/dmar.h index e397dc342cd..7c9a207e5da 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -185,6 +185,15 @@ struct dmar_rmrr_unit { #define for_each_rmrr_units(rmrr) \ list_for_each_entry(rmrr, &dmar_rmrr_units, list) + +struct dmar_atsr_unit { + struct list_head list; /* list of ATSR units */ + struct acpi_dmar_header *hdr; /* ACPI header */ + struct pci_dev **devices; /* target devices */ + int devices_cnt; /* target device count */ + u8 include_all:1; /* include all ports */ +}; + /* Intel DMAR initialization functions */ extern int intel_iommu_init(void); #else diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 29e05a034c0..0a1939f200f 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -331,6 +331,7 @@ static inline void __iommu_flush_cache( } extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); +extern int dmar_find_matched_atsr_unit(struct pci_dev *dev); extern int alloc_iommu(struct dmar_drhd_unit *drhd); extern void free_iommu(struct intel_iommu *iommu); -- cgit v1.2.3-70-g09d2 From 6ba6c3a4cacfd68bf970e3e04e2ff0d66fa0f695 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Mon, 18 May 2009 13:51:35 +0800 Subject: VT-d: add device IOTLB invalidation support Support device IOTLB invalidation to flush the translation cached in the Endpoint. Signed-off-by: Yu Zhao Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 77 ++++++++++++++++++++++++++++++++++++++++----- include/linux/intel-iommu.h | 14 ++++++++- 2 files changed, 82 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 6d7f9619b8a..7b287cb38b7 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -699,7 +699,8 @@ void free_iommu(struct intel_iommu *iommu) */ static inline void reclaim_free_desc(struct q_inval *qi) { - while (qi->desc_status[qi->free_tail] == QI_DONE) { + while (qi->desc_status[qi->free_tail] == QI_DONE || + qi->desc_status[qi->free_tail] == QI_ABORT) { qi->desc_status[qi->free_tail] = QI_FREE; qi->free_tail = (qi->free_tail + 1) % QI_LENGTH; qi->free_cnt++; @@ -709,10 +710,13 @@ static inline void reclaim_free_desc(struct q_inval *qi) static int qi_check_fault(struct intel_iommu *iommu, int index) { u32 fault; - int head; + int head, tail; struct q_inval *qi = iommu->qi; int wait_index = (index + 1) % QI_LENGTH; + if (qi->desc_status[wait_index] == QI_ABORT) + return -EAGAIN; + fault = readl(iommu->reg + DMAR_FSTS_REG); /* @@ -722,7 +726,11 @@ static int qi_check_fault(struct intel_iommu *iommu, int index) */ if (fault & DMA_FSTS_IQE) { head = readl(iommu->reg + DMAR_IQH_REG); - if ((head >> 4) == index) { + if ((head >> DMAR_IQ_SHIFT) == index) { + printk(KERN_ERR "VT-d detected invalid descriptor: " + "low=%llx, high=%llx\n", + (unsigned long long)qi->desc[index].low, + (unsigned long long)qi->desc[index].high); memcpy(&qi->desc[index], &qi->desc[wait_index], sizeof(struct qi_desc)); __iommu_flush_cache(iommu, &qi->desc[index], @@ -732,6 +740,32 @@ static int qi_check_fault(struct intel_iommu *iommu, int index) } } + /* + * If ITE happens, all pending wait_desc commands are aborted. + * No new descriptors are fetched until the ITE is cleared. + */ + if (fault & DMA_FSTS_ITE) { + head = readl(iommu->reg + DMAR_IQH_REG); + head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH; + head |= 1; + tail = readl(iommu->reg + DMAR_IQT_REG); + tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH; + + writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG); + + do { + if (qi->desc_status[head] == QI_IN_USE) + qi->desc_status[head] = QI_ABORT; + head = (head - 2 + QI_LENGTH) % QI_LENGTH; + } while (head != tail); + + if (qi->desc_status[wait_index] == QI_ABORT) + return -EAGAIN; + } + + if (fault & DMA_FSTS_ICE) + writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG); + return 0; } @@ -741,7 +775,7 @@ static int qi_check_fault(struct intel_iommu *iommu, int index) */ int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) { - int rc = 0; + int rc; struct q_inval *qi = iommu->qi; struct qi_desc *hw, wait_desc; int wait_index, index; @@ -752,6 +786,9 @@ int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) hw = qi->desc; +restart: + rc = 0; + spin_lock_irqsave(&qi->q_lock, flags); while (qi->free_cnt < 3) { spin_unlock_irqrestore(&qi->q_lock, flags); @@ -782,7 +819,7 @@ int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) * update the HW tail register indicating the presence of * new descriptors. */ - writel(qi->free_head << 4, iommu->reg + DMAR_IQT_REG); + writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG); while (qi->desc_status[wait_index] != QI_DONE) { /* @@ -794,18 +831,21 @@ int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) */ rc = qi_check_fault(iommu, index); if (rc) - goto out; + break; spin_unlock(&qi->q_lock); cpu_relax(); spin_lock(&qi->q_lock); } -out: - qi->desc_status[index] = qi->desc_status[wait_index] = QI_DONE; + + qi->desc_status[index] = QI_DONE; reclaim_free_desc(qi); spin_unlock_irqrestore(&qi->q_lock, flags); + if (rc == -EAGAIN) + goto restart; + return rc; } @@ -857,6 +897,27 @@ void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, qi_submit_sync(&desc, iommu); } +void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep, + u64 addr, unsigned mask) +{ + struct qi_desc desc; + + if (mask) { + BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1)); + addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1; + desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE; + } else + desc.high = QI_DEV_IOTLB_ADDR(addr); + + if (qdep >= QI_DEV_IOTLB_MAX_INVS) + qdep = 0; + + desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) | + QI_DIOTLB_TYPE; + + qi_submit_sync(&desc, iommu); +} + /* * Disable Queued Invalidation interface. */ diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 0a1939f200f..40561b224a1 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -53,6 +53,7 @@ #define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */ #define DMAR_IQH_REG 0x80 /* Invalidation queue head register */ #define DMAR_IQT_REG 0x88 /* Invalidation queue tail register */ +#define DMAR_IQ_SHIFT 4 /* Invalidation queue head/tail shift */ #define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */ #define DMAR_ICS_REG 0x98 /* Invalidation complete status register */ #define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr register */ @@ -198,6 +199,8 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define DMA_FSTS_PPF ((u32)2) #define DMA_FSTS_PFO ((u32)1) #define DMA_FSTS_IQE (1 << 4) +#define DMA_FSTS_ICE (1 << 5) +#define DMA_FSTS_ITE (1 << 6) #define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff) /* FRCD_REG, 32 bits access */ @@ -226,7 +229,8 @@ do { \ enum { QI_FREE, QI_IN_USE, - QI_DONE + QI_DONE, + QI_ABORT }; #define QI_CC_TYPE 0x1 @@ -255,6 +259,12 @@ enum { #define QI_CC_DID(did) (((u64)did) << 16) #define QI_CC_GRAN(gran) (((u64)gran) >> (DMA_CCMD_INVL_GRANU_OFFSET-4)) +#define QI_DEV_IOTLB_SID(sid) ((u64)((sid) & 0xffff) << 32) +#define QI_DEV_IOTLB_QDEP(qdep) (((qdep) & 0x1f) << 16) +#define QI_DEV_IOTLB_ADDR(addr) ((u64)(addr) & VTD_PAGE_MASK) +#define QI_DEV_IOTLB_SIZE 1 +#define QI_DEV_IOTLB_MAX_INVS 32 + struct qi_desc { u64 low, high; }; @@ -344,6 +354,8 @@ extern void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, u64 type); extern void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, unsigned int size_order, u64 type); +extern void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep, + u64 addr, unsigned mask); extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); -- cgit v1.2.3-70-g09d2 From 93a23a7271dfb811b3adb72779054c3a24433112 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Mon, 18 May 2009 13:51:37 +0800 Subject: VT-d: support the device IOTLB Enable the device IOTLB (i.e. ATS) for both the bare metal and KVM environments. Signed-off-by: Yu Zhao Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 109 ++++++++++++++++++++++++++++++++++++++---- include/linux/dma_remapping.h | 1 + include/linux/intel-iommu.h | 1 + 3 files changed, 102 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 6d7cb84c63e..c3cdfc90c13 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -252,6 +252,7 @@ struct device_domain_info { u8 bus; /* PCI bus number */ u8 devfn; /* PCI devfn number */ struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ + struct intel_iommu *iommu; /* IOMMU used by this device */ struct dmar_domain *domain; /* pointer to domain */ }; @@ -945,6 +946,77 @@ static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, (unsigned long long)DMA_TLB_IAIG(val)); } +static struct device_domain_info *iommu_support_dev_iotlb( + struct dmar_domain *domain, int segment, u8 bus, u8 devfn) +{ + int found = 0; + unsigned long flags; + struct device_domain_info *info; + struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn); + + if (!ecap_dev_iotlb_support(iommu->ecap)) + return NULL; + + if (!iommu->qi) + return NULL; + + spin_lock_irqsave(&device_domain_lock, flags); + list_for_each_entry(info, &domain->devices, link) + if (info->bus == bus && info->devfn == devfn) { + found = 1; + break; + } + spin_unlock_irqrestore(&device_domain_lock, flags); + + if (!found || !info->dev) + return NULL; + + if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS)) + return NULL; + + if (!dmar_find_matched_atsr_unit(info->dev)) + return NULL; + + info->iommu = iommu; + + return info; +} + +static void iommu_enable_dev_iotlb(struct device_domain_info *info) +{ + if (!info) + return; + + pci_enable_ats(info->dev, VTD_PAGE_SHIFT); +} + +static void iommu_disable_dev_iotlb(struct device_domain_info *info) +{ + if (!info->dev || !pci_ats_enabled(info->dev)) + return; + + pci_disable_ats(info->dev); +} + +static void iommu_flush_dev_iotlb(struct dmar_domain *domain, + u64 addr, unsigned mask) +{ + u16 sid, qdep; + unsigned long flags; + struct device_domain_info *info; + + spin_lock_irqsave(&device_domain_lock, flags); + list_for_each_entry(info, &domain->devices, link) { + if (!info->dev || !pci_ats_enabled(info->dev)) + continue; + + sid = info->bus << 8 | info->devfn; + qdep = pci_ats_queue_depth(info->dev); + qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask); + } + spin_unlock_irqrestore(&device_domain_lock, flags); +} + static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, u64 addr, unsigned int pages) { @@ -965,6 +1037,8 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, else iommu->flush.flush_iotlb(iommu, did, addr, mask, DMA_TLB_PSI_FLUSH); + if (did) + iommu_flush_dev_iotlb(iommu->domains[did], addr, mask); } static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) @@ -1305,6 +1379,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment, unsigned long ndomains; int id; int agaw; + struct device_domain_info *info = NULL; pr_debug("Set context mapping for %02x:%02x.%d\n", bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); @@ -1372,15 +1447,21 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment, context_set_domain_id(context, id); + if (translation != CONTEXT_TT_PASS_THROUGH) { + info = iommu_support_dev_iotlb(domain, segment, bus, devfn); + translation = info ? CONTEXT_TT_DEV_IOTLB : + CONTEXT_TT_MULTI_LEVEL; + } /* * In pass through mode, AW must be programmed to indicate the largest * AGAW value supported by hardware. And ASR is ignored by hardware. */ - if (likely(translation == CONTEXT_TT_MULTI_LEVEL)) { - context_set_address_width(context, iommu->agaw); - context_set_address_root(context, virt_to_phys(pgd)); - } else + if (unlikely(translation == CONTEXT_TT_PASS_THROUGH)) context_set_address_width(context, iommu->msagaw); + else { + context_set_address_root(context, virt_to_phys(pgd)); + context_set_address_width(context, iommu->agaw); + } context_set_translation_type(context, translation); context_set_fault_enable(context); @@ -1402,6 +1483,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment, } else { iommu_flush_write_buffer(iommu); } + iommu_enable_dev_iotlb(info); spin_unlock_irqrestore(&iommu->lock, flags); spin_lock_irqsave(&domain->iommu_lock, flags); @@ -1552,6 +1634,7 @@ static void domain_remove_dev_info(struct dmar_domain *domain) info->dev->dev.archdata.iommu = NULL; spin_unlock_irqrestore(&device_domain_lock, flags); + iommu_disable_dev_iotlb(info); iommu = device_to_iommu(info->segment, info->bus, info->devfn); iommu_detach_dev(iommu, info->bus, info->devfn); free_devinfo_mem(info); @@ -2259,10 +2342,16 @@ static void flush_unmaps(void) continue; iommu->flush.flush_iotlb(iommu, 0, 0, 0, - DMA_TLB_GLOBAL_FLUSH, 0); + DMA_TLB_GLOBAL_FLUSH); for (j = 0; j < deferred_flush[i].next; j++) { - __free_iova(&deferred_flush[i].domain[j]->iovad, - deferred_flush[i].iova[j]); + unsigned long mask; + struct iova *iova = deferred_flush[i].iova[j]; + + mask = (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT; + mask = ilog2(mask >> VTD_PAGE_SHIFT); + iommu_flush_dev_iotlb(deferred_flush[i].domain[j], + iova->pfn_lo << PAGE_SHIFT, mask); + __free_iova(&deferred_flush[i].domain[j]->iovad, iova); } deferred_flush[i].next = 0; } @@ -2943,6 +3032,7 @@ static void vm_domain_remove_one_dev_info(struct dmar_domain *domain, info->dev->dev.archdata.iommu = NULL; spin_unlock_irqrestore(&device_domain_lock, flags); + iommu_disable_dev_iotlb(info); iommu_detach_dev(iommu, info->bus, info->devfn); iommu_detach_dependent_devices(iommu, pdev); free_devinfo_mem(info); @@ -2993,6 +3083,7 @@ static void vm_domain_remove_all_dev_info(struct dmar_domain *domain) spin_unlock_irqrestore(&device_domain_lock, flags1); + iommu_disable_dev_iotlb(info); iommu = device_to_iommu(info->segment, info->bus, info->devfn); iommu_detach_dev(iommu, info->bus, info->devfn); iommu_detach_dependent_devices(iommu, info->dev); @@ -3197,11 +3288,11 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, return -EFAULT; } - ret = domain_context_mapping(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL); + ret = vm_domain_add_dev_info(dmar_domain, pdev); if (ret) return ret; - ret = vm_domain_add_dev_info(dmar_domain, pdev); + ret = domain_context_mapping(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL); return ret; } diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index e0a03aff63d..5619f852273 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -14,6 +14,7 @@ #define DMA_PTE_SNP (1 << 11) #define CONTEXT_TT_MULTI_LEVEL 0 +#define CONTEXT_TT_DEV_IOTLB 1 #define CONTEXT_TT_PASS_THROUGH 2 struct intel_iommu; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 40561b224a1..482dc91fd53 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -124,6 +124,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define ecap_pass_through(e) ((e >> 6) & 0x1) #define ecap_eim_support(e) ((e >> 4) & 0x1) #define ecap_ir_support(e) ((e >> 3) & 0x1) +#define ecap_dev_iotlb_support(e) (((e) >> 2) & 0x1) #define ecap_max_handle_mask(e) ((e >> 20) & 0xf) #define ecap_sc_support(e) ((e >> 7) & 0x1) /* Snooping Control */ -- cgit v1.2.3-70-g09d2 From 6d3ddc81f5762d54ce7d1db70eb757c6c12fabbc Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 16 May 2009 17:47:29 +0100 Subject: ASoC: Split DAPM power checks from sequencing of power changes DAPM has always applied any changes to the power state of widgets as soon as it has determined that they are required. Instead of doing this store all the changes that are required on lists of widgets to power up and down, then iterate over those lists and apply the changes. This changes the sequence in which changes are implemented, doing all power downs before power ups and always using the up/down sequences (previously they were only used when changes were due to DAC/ADC power events). The error handling is also changed so that we continue attempting to power widgets if some changes fail. The main benefit of this is to allow future changes to do optimisations over the whole power sequence and to reduce the number of walks of the widget graph required to check the power status of widgets. Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 3 ++ include/sound/soc.h | 2 ++ sound/soc/soc-dapm.c | 81 +++++++++++++++++++++++++++++++++--------------- 3 files changed, 61 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index 533f9f25649..b3f789d0cee 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -385,6 +385,9 @@ struct snd_soc_dapm_widget { /* widget input and outputs */ struct list_head sources; struct list_head sinks; + + /* used during DAPM updates */ + struct list_head power_list; }; #endif diff --git a/include/sound/soc.h b/include/sound/soc.h index 6ab80bf7abd..8309ce81cf3 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -372,6 +372,8 @@ struct snd_soc_codec { enum snd_soc_bias_level bias_level; enum snd_soc_bias_level suspend_bias_level; struct delayed_work delayed_work; + struct list_head up_list; + struct list_head down_list; /* codec DAI's */ struct snd_soc_dai *dai; diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 7847f80e96d..04ef84106d7 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -658,7 +658,7 @@ static int dapm_supply_check_power(struct snd_soc_dapm_widget *w) static int dapm_power_widget(struct snd_soc_codec *codec, int event, struct snd_soc_dapm_widget *w) { - int power, ret; + int ret; switch (w->id) { case snd_soc_dapm_pre: @@ -696,18 +696,8 @@ static int dapm_power_widget(struct snd_soc_codec *codec, int event, return 0; default: - break; + return dapm_generic_apply_power(w); } - - if (!w->power_check) - return 0; - - power = w->power_check(w); - if (w->power == power) - return 0; - w->power = power; - - return dapm_generic_apply_power(w); } /* @@ -722,27 +712,68 @@ static int dapm_power_widget(struct snd_soc_codec *codec, int event, static int dapm_power_widgets(struct snd_soc_codec *codec, int event) { struct snd_soc_dapm_widget *w; - int i, c = 1, *seq = NULL, ret = 0; - - /* do we have a sequenced stream event */ - if (event == SND_SOC_DAPM_STREAM_START) { - c = ARRAY_SIZE(dapm_up_seq); - seq = dapm_up_seq; - } else if (event == SND_SOC_DAPM_STREAM_STOP) { - c = ARRAY_SIZE(dapm_down_seq); - seq = dapm_down_seq; + int ret = 0; + int i, power; + + INIT_LIST_HEAD(&codec->up_list); + INIT_LIST_HEAD(&codec->down_list); + + /* Check which widgets we need to power and store them in + * lists indicating if they should be powered up or down. + */ + list_for_each_entry(w, &codec->dapm_widgets, list) { + switch (w->id) { + case snd_soc_dapm_pre: + list_add_tail(&codec->down_list, &w->power_list); + break; + case snd_soc_dapm_post: + list_add_tail(&codec->up_list, &w->power_list); + break; + + default: + if (!w->power_check) + continue; + + power = w->power_check(w); + if (w->power == power) + continue; + + if (power) + list_add_tail(&w->power_list, &codec->up_list); + else + list_add_tail(&w->power_list, + &codec->down_list); + + w->power = power; + break; + } } - for (i = 0; i < c; i++) { - list_for_each_entry(w, &codec->dapm_widgets, list) { + /* Power down widgets first; try to avoid amplifying pops. */ + for (i = 0; i < ARRAY_SIZE(dapm_down_seq); i++) { + list_for_each_entry(w, &codec->down_list, power_list) { + /* is widget in stream order */ + if (w->id != dapm_down_seq[i]) + continue; + + ret = dapm_power_widget(codec, event, w); + if (ret != 0) + pr_err("Failed to power down %s: %d\n", + w->name, ret); + } + } + /* Now power up. */ + for (i = 0; i < ARRAY_SIZE(dapm_up_seq); i++) { + list_for_each_entry(w, &codec->up_list, power_list) { /* is widget in stream order */ - if (seq && seq[i] && w->id != seq[i]) + if (w->id != dapm_up_seq[i]) continue; ret = dapm_power_widget(codec, event, w); if (ret != 0) - return ret; + pr_err("Failed to power up %s: %d\n", + w->name, ret); } } -- cgit v1.2.3-70-g09d2 From 452c5eaa0d5162e02ffee742ea17540887bc2904 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sun, 17 May 2009 21:41:23 +0100 Subject: ASoC: Integrate bias management with DAPM power management Rather than managing the bias level of the system based on if there is an active audio stream manage it based on there being an active DAPM widget. This simplifies the code a little, moving the power handling into one place, and improves audio performance for bypass paths when no playbacks or captures are active. Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 2 -- include/sound/soc.h | 1 + sound/soc/soc-core.c | 61 +++++++------------------------------ sound/soc/soc-dapm.c | 78 +++++++++++++++++++++++++++++++++--------------- 4 files changed, 65 insertions(+), 77 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index b3f789d0cee..ec8a45f9a06 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -279,8 +279,6 @@ int snd_soc_dapm_add_routes(struct snd_soc_codec *codec, /* dapm events */ int snd_soc_dapm_stream_event(struct snd_soc_codec *codec, char *stream, int event); -int snd_soc_dapm_set_bias_level(struct snd_soc_device *socdev, - enum snd_soc_bias_level level); /* dapm sys fs - used by the core */ int snd_soc_dapm_sys_add(struct device *dev); diff --git a/include/sound/soc.h b/include/sound/soc.h index 8309ce81cf3..2af3213df90 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -339,6 +339,7 @@ struct snd_soc_codec { struct module *owner; struct mutex mutex; struct device *dev; + struct snd_soc_device *socdev; struct list_head list; diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index c0e706645ec..4aa8e2d3506 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -299,7 +299,6 @@ static void close_delayed_work(struct work_struct *work) { struct snd_soc_card *card = container_of(work, struct snd_soc_card, delayed_work.work); - struct snd_soc_device *socdev = card->socdev; struct snd_soc_codec *codec = card->codec; struct snd_soc_dai *codec_dai; int i; @@ -315,27 +314,10 @@ static void close_delayed_work(struct work_struct *work) /* are we waiting on this codec DAI stream */ if (codec_dai->pop_wait == 1) { - - /* Reduce power if no longer active */ - if (codec->active == 0) { - pr_debug("pop wq D1 %s %s\n", codec->name, - codec_dai->playback.stream_name); - snd_soc_dapm_set_bias_level(socdev, - SND_SOC_BIAS_PREPARE); - } - codec_dai->pop_wait = 0; snd_soc_dapm_stream_event(codec, codec_dai->playback.stream_name, SND_SOC_DAPM_STREAM_STOP); - - /* Fall into standby if no longer active */ - if (codec->active == 0) { - pr_debug("pop wq D3 %s %s\n", codec->name, - codec_dai->playback.stream_name); - snd_soc_dapm_set_bias_level(socdev, - SND_SOC_BIAS_STANDBY); - } } } mutex_unlock(&pcm_mutex); @@ -399,10 +381,6 @@ static int soc_codec_close(struct snd_pcm_substream *substream) snd_soc_dapm_stream_event(codec, codec_dai->capture.stream_name, SND_SOC_DAPM_STREAM_STOP); - - if (codec->active == 0 && codec_dai->pop_wait == 0) - snd_soc_dapm_set_bias_level(socdev, - SND_SOC_BIAS_STANDBY); } mutex_unlock(&pcm_mutex); @@ -467,36 +445,16 @@ static int soc_pcm_prepare(struct snd_pcm_substream *substream) cancel_delayed_work(&card->delayed_work); } - /* do we need to power up codec */ - if (codec->bias_level != SND_SOC_BIAS_ON) { - snd_soc_dapm_set_bias_level(socdev, - SND_SOC_BIAS_PREPARE); - - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) - snd_soc_dapm_stream_event(codec, - codec_dai->playback.stream_name, - SND_SOC_DAPM_STREAM_START); - else - snd_soc_dapm_stream_event(codec, - codec_dai->capture.stream_name, - SND_SOC_DAPM_STREAM_START); - - snd_soc_dapm_set_bias_level(socdev, SND_SOC_BIAS_ON); - snd_soc_dai_digital_mute(codec_dai, 0); - - } else { - /* codec already powered - power on widgets */ - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) - snd_soc_dapm_stream_event(codec, - codec_dai->playback.stream_name, - SND_SOC_DAPM_STREAM_START); - else - snd_soc_dapm_stream_event(codec, - codec_dai->capture.stream_name, - SND_SOC_DAPM_STREAM_START); + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) + snd_soc_dapm_stream_event(codec, + codec_dai->playback.stream_name, + SND_SOC_DAPM_STREAM_START); + else + snd_soc_dapm_stream_event(codec, + codec_dai->capture.stream_name, + SND_SOC_DAPM_STREAM_START); - snd_soc_dai_digital_mute(codec_dai, 0); - } + snd_soc_dai_digital_mute(codec_dai, 0); out: mutex_unlock(&pcm_mutex); @@ -1372,6 +1330,7 @@ int snd_soc_new_pcms(struct snd_soc_device *socdev, int idx, const char *xid) return ret; } + codec->socdev = socdev; codec->card->dev = socdev->dev; codec->card->private_data = codec; strncpy(codec->card->driver, codec->name, sizeof(codec->card->driver)); diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index d130602b307..4ca5e56388a 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -94,6 +94,30 @@ static inline struct snd_soc_dapm_widget *dapm_cnew_widget( return kmemdup(_widget, sizeof(*_widget), GFP_KERNEL); } +/** + * snd_soc_dapm_set_bias_level - set the bias level for the system + * @socdev: audio device + * @level: level to configure + * + * Configure the bias (power) levels for the SoC audio device. + * + * Returns 0 for success else error. + */ +static int snd_soc_dapm_set_bias_level(struct snd_soc_device *socdev, + enum snd_soc_bias_level level) +{ + struct snd_soc_card *card = socdev->card; + struct snd_soc_codec *codec = socdev->card->codec; + int ret = 0; + + if (card->set_bias_level) + ret = card->set_bias_level(card, level); + if (ret == 0 && codec->set_bias_level) + ret = codec->set_bias_level(codec, level); + + return ret; +} + /* set up initial codec paths */ static void dapm_set_path_status(struct snd_soc_dapm_widget *w, struct snd_soc_dapm_path *p, int i) @@ -707,9 +731,11 @@ static int dapm_power_widget(struct snd_soc_codec *codec, int event, */ static int dapm_power_widgets(struct snd_soc_codec *codec, int event) { + struct snd_soc_device *socdev = codec->socdev; struct snd_soc_dapm_widget *w; int ret = 0; int i, power; + int sys_power = 0; INIT_LIST_HEAD(&codec->up_list); INIT_LIST_HEAD(&codec->down_list); @@ -731,6 +757,9 @@ static int dapm_power_widgets(struct snd_soc_codec *codec, int event) continue; power = w->power_check(w); + if (power) + sys_power = 1; + if (w->power == power) continue; @@ -745,6 +774,15 @@ static int dapm_power_widgets(struct snd_soc_codec *codec, int event) } } + /* If we're changing to all on or all off then prepare */ + if ((sys_power && codec->bias_level == SND_SOC_BIAS_STANDBY) || + (!sys_power && codec->bias_level == SND_SOC_BIAS_ON)) { + ret = snd_soc_dapm_set_bias_level(socdev, + SND_SOC_BIAS_PREPARE); + if (ret != 0) + pr_err("Failed to prepare bias: %d\n", ret); + } + /* Power down widgets first; try to avoid amplifying pops. */ for (i = 0; i < ARRAY_SIZE(dapm_down_seq); i++) { list_for_each_entry(w, &codec->down_list, power_list) { @@ -773,6 +811,22 @@ static int dapm_power_widgets(struct snd_soc_codec *codec, int event) } } + /* If we just powered the last thing off drop to standby bias */ + if (codec->bias_level == SND_SOC_BIAS_PREPARE && !sys_power) { + ret = snd_soc_dapm_set_bias_level(socdev, + SND_SOC_BIAS_STANDBY); + if (ret != 0) + pr_err("Failed to apply standby bias: %d\n", ret); + } + + /* If we just powered up then move to active bias */ + if (codec->bias_level == SND_SOC_BIAS_PREPARE && sys_power) { + ret = snd_soc_dapm_set_bias_level(socdev, + SND_SOC_BIAS_ON); + if (ret != 0) + pr_err("Failed to apply active bias: %d\n", ret); + } + return 0; } @@ -1720,30 +1774,6 @@ int snd_soc_dapm_stream_event(struct snd_soc_codec *codec, } EXPORT_SYMBOL_GPL(snd_soc_dapm_stream_event); -/** - * snd_soc_dapm_set_bias_level - set the bias level for the system - * @socdev: audio device - * @level: level to configure - * - * Configure the bias (power) levels for the SoC audio device. - * - * Returns 0 for success else error. - */ -int snd_soc_dapm_set_bias_level(struct snd_soc_device *socdev, - enum snd_soc_bias_level level) -{ - struct snd_soc_card *card = socdev->card; - struct snd_soc_codec *codec = socdev->card->codec; - int ret = 0; - - if (card->set_bias_level) - ret = card->set_bias_level(card, level); - if (ret == 0 && codec->set_bias_level) - ret = codec->set_bias_level(codec, level); - - return ret; -} - /** * snd_soc_dapm_enable_pin - enable pin. * @codec: SoC codec -- cgit v1.2.3-70-g09d2 From 7004bf252c53da18f6b55103e0c92f777f846806 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 18 May 2009 00:34:33 +0000 Subject: net: add tx_packets/tx_bytes/tx_dropped counters in struct netdev_queue offsetof(struct net_device, features)=0x44 offsetof(struct net_device, stats.tx_packets)=0x54 offsetof(struct net_device, stats.tx_bytes)=0x5c offsetof(struct net_device, stats.tx_dropped)=0x6c Network drivers that touch dev->stats.tx_packets/stats.tx_bytes in their tx path can slow down SMP operations, since they dirty a cache line that should stay shared (dev->features is needed in rx and tx paths) We could move away stats field in net_device but it wont help that much. (Two cache lines dirtied in tx path, we can do one only) Better solution is to add tx_packets/tx_bytes/tx_dropped in struct netdev_queue because this structure is already touched in tx path and counters updates will then be free (no increase in size) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ net/core/dev.c | 23 ++++++++++++++++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cd547d04a8c..f8574e76b74 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -474,6 +474,9 @@ struct netdev_queue { * please use this field instead of dev->trans_start */ unsigned long trans_start; + unsigned long tx_bytes; + unsigned long tx_packets; + unsigned long tx_dropped; } ____cacheline_aligned_in_smp; diff --git a/net/core/dev.c b/net/core/dev.c index 14dd725aaab..6d3630d1627 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4943,13 +4943,30 @@ void netdev_run_todo(void) * the internal statistics structure is used. */ const struct net_device_stats *dev_get_stats(struct net_device *dev) - { +{ const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_get_stats) return ops->ndo_get_stats(dev); - else - return &dev->stats; + else { + unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0; + struct net_device_stats *stats = &dev->stats; + unsigned int i; + struct netdev_queue *txq; + + for (i = 0; i < dev->num_tx_queues; i++) { + txq = netdev_get_tx_queue(dev, i); + tx_bytes += txq->tx_bytes; + tx_packets += txq->tx_packets; + tx_dropped += txq->tx_dropped; + } + if (tx_bytes || tx_packets || tx_dropped) { + stats->tx_bytes = tx_bytes; + stats->tx_packets = tx_packets; + stats->tx_dropped = tx_dropped; + } + return stats; + } } EXPORT_SYMBOL(dev_get_stats); -- cgit v1.2.3-70-g09d2 From 75834fc3b6fcff00327f5d2a18760c1e8e0179c5 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Mon, 18 May 2009 10:26:10 -0400 Subject: SELinux: move SELINUX_MAGIC into magic.h The selinuxfs superblock magic is used inside the IMA code, but is being defined in two places and could someday get out of sync. This patch moves the declaration into magic.h so it is only done once. Signed-off-by: Eric Paris Signed-off-by: James Morris --- include/linux/magic.h | 1 + security/integrity/ima/ima_policy.c | 8 +++----- security/selinux/include/security.h | 3 +-- 3 files changed, 5 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/magic.h b/include/linux/magic.h index 5b4e28bcb78..927138cf305 100644 --- a/include/linux/magic.h +++ b/include/linux/magic.h @@ -9,6 +9,7 @@ #define DEBUGFS_MAGIC 0x64626720 #define SYSFS_MAGIC 0x62656572 #define SECURITYFS_MAGIC 0x73636673 +#define SELINUX_MAGIC 0xf97cff8c #define TMPFS_MAGIC 0x01021994 #define SQUASHFS_MAGIC 0x73717368 #define EFS_SUPER_MAGIC 0x414A53 diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index dec6dcb1c8d..31d677f7c65 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -49,14 +49,12 @@ struct ima_measure_rule_entry { * written in terms of .action, .func, .mask, .fsmagic, and .uid */ static struct ima_measure_rule_entry default_rules[] = { - {.action = DONT_MEASURE,.fsmagic = PROC_SUPER_MAGIC, - .flags = IMA_FSMAGIC}, + {.action = DONT_MEASURE,.fsmagic = PROC_SUPER_MAGIC,.flags = IMA_FSMAGIC}, {.action = DONT_MEASURE,.fsmagic = SYSFS_MAGIC,.flags = IMA_FSMAGIC}, {.action = DONT_MEASURE,.fsmagic = DEBUGFS_MAGIC,.flags = IMA_FSMAGIC}, {.action = DONT_MEASURE,.fsmagic = TMPFS_MAGIC,.flags = IMA_FSMAGIC}, - {.action = DONT_MEASURE,.fsmagic = SECURITYFS_MAGIC, - .flags = IMA_FSMAGIC}, - {.action = DONT_MEASURE,.fsmagic = 0xF97CFF8C,.flags = IMA_FSMAGIC}, + {.action = DONT_MEASURE,.fsmagic = SECURITYFS_MAGIC,.flags = IMA_FSMAGIC}, + {.action = DONT_MEASURE,.fsmagic = SELINUX_MAGIC,.flags = IMA_FSMAGIC}, {.action = MEASURE,.func = FILE_MMAP,.mask = MAY_EXEC, .flags = IMA_FUNC | IMA_MASK}, {.action = MEASURE,.func = BPRM_CHECK,.mask = MAY_EXEC, diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index a7be3f01fb0..ca835795a8b 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -8,14 +8,13 @@ #ifndef _SELINUX_SECURITY_H_ #define _SELINUX_SECURITY_H_ +#include #include "flask.h" #define SECSID_NULL 0x00000000 /* unspecified SID */ #define SECSID_WILD 0xffffffff /* wildcard SID */ #define SECCLASS_NULL 0x0000 /* no class */ -#define SELINUX_MAGIC 0xf97cff8c - /* Identify specific policy version changes */ #define POLICYDB_VERSION_BASE 15 #define POLICYDB_VERSION_BOOL 16 -- cgit v1.2.3-70-g09d2 From 39549eef3587f1c1e8c65c88a2400d10fd30ea17 Mon Sep 17 00:00:00 2001 From: Wolfgang Grandegger Date: Fri, 15 May 2009 23:39:29 +0000 Subject: can: CAN Network device driver and Netlink interface The CAN network device driver interface provides a generic interface to setup, configure and monitor CAN network devices. It exports a set of common data structures and functions, which all real CAN network device drivers should use. Please have a look to the SJA1000 or MSCAN driver to understand how to use them. The name of the module is can-dev.ko. Furthermore, it adds a Netlink interface allowing to configure the CAN device using the program "ip" from the iproute2 utility suite. For further information please check "Documentation/networking/can.txt" Signed-off-by: Wolfgang Grandegger Signed-off-by: Oliver Hartkopp Signed-off-by: David S. Miller --- drivers/net/can/Kconfig | 23 ++ drivers/net/can/Makefile | 5 + drivers/net/can/dev.c | 657 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/can/Kbuild | 1 + include/linux/can/dev.h | 70 +++++ include/linux/can/netlink.h | 113 ++++++++ 6 files changed, 869 insertions(+) create mode 100644 drivers/net/can/dev.c create mode 100644 include/linux/can/dev.h create mode 100644 include/linux/can/netlink.h (limited to 'include') diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig index 57def0d5737..77adb8ef6e4 100644 --- a/drivers/net/can/Kconfig +++ b/drivers/net/can/Kconfig @@ -12,6 +12,29 @@ config CAN_VCAN This driver can also be built as a module. If so, the module will be called vcan. +config CAN_DEV + tristate "Platform CAN drivers with Netlink support" + depends on CAN + default Y + ---help--- + Enables the common framework for platform CAN drivers with Netlink + support. This is the standard library for CAN drivers. + If unsure, say Y. + +config CAN_CALC_BITTIMING + bool "CAN bit-timing calculation" + depends on CAN_DEV + default Y + ---help--- + If enabled, CAN bit-timing parameters will be calculated for the + bit-rate specified via Netlink argument "bitrate" when the device + get started. This works fine for the most common CAN controllers + with standard bit-rates but may fail for exotic bit-rates or CAN + source clock frequencies. Disabling saves some space, but then the + bit-timing parameters must be specified directly using the Netlink + arguments "tq", "prop_seg", "phase_seg1", "phase_seg2" and "sjw". + If unsure, say Y. + config CAN_DEBUG_DEVICES bool "CAN devices debugging messages" depends on CAN diff --git a/drivers/net/can/Makefile b/drivers/net/can/Makefile index c4bead705cd..6c865475cd8 100644 --- a/drivers/net/can/Makefile +++ b/drivers/net/can/Makefile @@ -3,3 +3,8 @@ # obj-$(CONFIG_CAN_VCAN) += vcan.o + +obj-$(CONFIG_CAN_DEV) += can-dev.o +can-dev-y := dev.o + +ccflags-$(CONFIG_CAN_DEBUG_DEVICES) := -DDEBUG diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c new file mode 100644 index 00000000000..52b0e7d8901 --- /dev/null +++ b/drivers/net/can/dev.c @@ -0,0 +1,657 @@ +/* + * Copyright (C) 2005 Marc Kleine-Budde, Pengutronix + * Copyright (C) 2006 Andrey Volkov, Varma Electronics + * Copyright (C) 2008-2009 Wolfgang Grandegger + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the version 2 of the GNU General Public License + * as published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define MOD_DESC "CAN device driver interface" + +MODULE_DESCRIPTION(MOD_DESC); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Wolfgang Grandegger "); + +#ifdef CONFIG_CAN_CALC_BITTIMING +#define CAN_CALC_MAX_ERROR 50 /* in one-tenth of a percent */ + +/* + * Bit-timing calculation derived from: + * + * Code based on LinCAN sources and H8S2638 project + * Copyright 2004-2006 Pavel Pisa - DCE FELK CVUT cz + * Copyright 2005 Stanislav Marek + * email: pisa@cmp.felk.cvut.cz + * + * Calculates proper bit-timing parameters for a specified bit-rate + * and sample-point, which can then be used to set the bit-timing + * registers of the CAN controller. You can find more information + * in the header file linux/can/netlink.h. + */ +static int can_update_spt(const struct can_bittiming_const *btc, + int sampl_pt, int tseg, int *tseg1, int *tseg2) +{ + *tseg2 = tseg + 1 - (sampl_pt * (tseg + 1)) / 1000; + if (*tseg2 < btc->tseg2_min) + *tseg2 = btc->tseg2_min; + if (*tseg2 > btc->tseg2_max) + *tseg2 = btc->tseg2_max; + *tseg1 = tseg - *tseg2; + if (*tseg1 > btc->tseg1_max) { + *tseg1 = btc->tseg1_max; + *tseg2 = tseg - *tseg1; + } + return 1000 * (tseg + 1 - *tseg2) / (tseg + 1); +} + +static int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt) +{ + struct can_priv *priv = netdev_priv(dev); + const struct can_bittiming_const *btc = priv->bittiming_const; + long rate, best_rate = 0; + long best_error = 1000000000, error = 0; + int best_tseg = 0, best_brp = 0, brp = 0; + int tsegall, tseg = 0, tseg1 = 0, tseg2 = 0; + int spt_error = 1000, spt = 0, sampl_pt; + u64 v64; + + if (!priv->bittiming_const) + return -ENOTSUPP; + + /* Use CIA recommended sample points */ + if (bt->sample_point) { + sampl_pt = bt->sample_point; + } else { + if (bt->bitrate > 800000) + sampl_pt = 750; + else if (bt->bitrate > 500000) + sampl_pt = 800; + else + sampl_pt = 875; + } + + /* tseg even = round down, odd = round up */ + for (tseg = (btc->tseg1_max + btc->tseg2_max) * 2 + 1; + tseg >= (btc->tseg1_min + btc->tseg2_min) * 2; tseg--) { + tsegall = 1 + tseg / 2; + /* Compute all possible tseg choices (tseg=tseg1+tseg2) */ + brp = priv->clock.freq / (tsegall * bt->bitrate) + tseg % 2; + /* chose brp step which is possible in system */ + brp = (brp / btc->brp_inc) * btc->brp_inc; + if ((brp < btc->brp_min) || (brp > btc->brp_max)) + continue; + rate = priv->clock.freq / (brp * tsegall); + error = bt->bitrate - rate; + /* tseg brp biterror */ + if (error < 0) + error = -error; + if (error > best_error) + continue; + best_error = error; + if (error == 0) { + spt = can_update_spt(btc, sampl_pt, tseg / 2, + &tseg1, &tseg2); + error = sampl_pt - spt; + if (error < 0) + error = -error; + if (error > spt_error) + continue; + spt_error = error; + } + best_tseg = tseg / 2; + best_brp = brp; + best_rate = rate; + if (error == 0) + break; + } + + if (best_error) { + /* Error in one-tenth of a percent */ + error = (best_error * 1000) / bt->bitrate; + if (error > CAN_CALC_MAX_ERROR) { + dev_err(dev->dev.parent, + "bitrate error %ld.%ld%% too high\n", + error / 10, error % 10); + return -EDOM; + } else { + dev_warn(dev->dev.parent, "bitrate error %ld.%ld%%\n", + error / 10, error % 10); + } + } + + /* real sample point */ + bt->sample_point = can_update_spt(btc, sampl_pt, best_tseg, + &tseg1, &tseg2); + + v64 = (u64)best_brp * 1000000000UL; + do_div(v64, priv->clock.freq); + bt->tq = (u32)v64; + bt->prop_seg = tseg1 / 2; + bt->phase_seg1 = tseg1 - bt->prop_seg; + bt->phase_seg2 = tseg2; + bt->sjw = 1; + bt->brp = best_brp; + /* real bit-rate */ + bt->bitrate = priv->clock.freq / (bt->brp * (tseg1 + tseg2 + 1)); + + return 0; +} +#else /* !CONFIG_CAN_CALC_BITTIMING */ +static int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt) +{ + dev_err(dev->dev.parent, "bit-timing calculation not available\n"); + return -EINVAL; +} +#endif /* CONFIG_CAN_CALC_BITTIMING */ + +/* + * Checks the validity of the specified bit-timing parameters prop_seg, + * phase_seg1, phase_seg2 and sjw and tries to determine the bitrate + * prescaler value brp. You can find more information in the header + * file linux/can/netlink.h. + */ +static int can_fixup_bittiming(struct net_device *dev, struct can_bittiming *bt) +{ + struct can_priv *priv = netdev_priv(dev); + const struct can_bittiming_const *btc = priv->bittiming_const; + int tseg1, alltseg; + u64 brp64; + + if (!priv->bittiming_const) + return -ENOTSUPP; + + tseg1 = bt->prop_seg + bt->phase_seg1; + if (!bt->sjw) + bt->sjw = 1; + if (bt->sjw > btc->sjw_max || + tseg1 < btc->tseg1_min || tseg1 > btc->tseg1_max || + bt->phase_seg2 < btc->tseg2_min || bt->phase_seg2 > btc->tseg2_max) + return -ERANGE; + + brp64 = (u64)priv->clock.freq * (u64)bt->tq; + if (btc->brp_inc > 1) + do_div(brp64, btc->brp_inc); + brp64 += 500000000UL - 1; + do_div(brp64, 1000000000UL); /* the practicable BRP */ + if (btc->brp_inc > 1) + brp64 *= btc->brp_inc; + bt->brp = (u32)brp64; + + if (bt->brp < btc->brp_min || bt->brp > btc->brp_max) + return -EINVAL; + + alltseg = bt->prop_seg + bt->phase_seg1 + bt->phase_seg2 + 1; + bt->bitrate = priv->clock.freq / (bt->brp * alltseg); + bt->sample_point = ((tseg1 + 1) * 1000) / alltseg; + + return 0; +} + +int can_get_bittiming(struct net_device *dev, struct can_bittiming *bt) +{ + struct can_priv *priv = netdev_priv(dev); + int err; + + /* Check if the CAN device has bit-timing parameters */ + if (priv->bittiming_const) { + + /* Non-expert mode? Check if the bitrate has been pre-defined */ + if (!bt->tq) + /* Determine bit-timing parameters */ + err = can_calc_bittiming(dev, bt); + else + /* Check bit-timing params and calculate proper brp */ + err = can_fixup_bittiming(dev, bt); + if (err) + return err; + } + + return 0; +} + +/* + * Local echo of CAN messages + * + * CAN network devices *should* support a local echo functionality + * (see Documentation/networking/can.txt). To test the handling of CAN + * interfaces that do not support the local echo both driver types are + * implemented. In the case that the driver does not support the echo + * the IFF_ECHO remains clear in dev->flags. This causes the PF_CAN core + * to perform the echo as a fallback solution. + */ +static void can_flush_echo_skb(struct net_device *dev) +{ + struct can_priv *priv = netdev_priv(dev); + struct net_device_stats *stats = &dev->stats; + int i; + + for (i = 0; i < CAN_ECHO_SKB_MAX; i++) { + if (priv->echo_skb[i]) { + kfree_skb(priv->echo_skb[i]); + priv->echo_skb[i] = NULL; + stats->tx_dropped++; + stats->tx_aborted_errors++; + } + } +} + +/* + * Put the skb on the stack to be looped backed locally lateron + * + * The function is typically called in the start_xmit function + * of the device driver. The driver must protect access to + * priv->echo_skb, if necessary. + */ +void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, int idx) +{ + struct can_priv *priv = netdev_priv(dev); + + /* check flag whether this packet has to be looped back */ + if (!(dev->flags & IFF_ECHO) || skb->pkt_type != PACKET_LOOPBACK) { + kfree_skb(skb); + return; + } + + if (!priv->echo_skb[idx]) { + struct sock *srcsk = skb->sk; + + if (atomic_read(&skb->users) != 1) { + struct sk_buff *old_skb = skb; + + skb = skb_clone(old_skb, GFP_ATOMIC); + kfree_skb(old_skb); + if (!skb) + return; + } else + skb_orphan(skb); + + skb->sk = srcsk; + + /* make settings for echo to reduce code in irq context */ + skb->protocol = htons(ETH_P_CAN); + skb->pkt_type = PACKET_BROADCAST; + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->dev = dev; + + /* save this skb for tx interrupt echo handling */ + priv->echo_skb[idx] = skb; + } else { + /* locking problem with netif_stop_queue() ?? */ + dev_err(dev->dev.parent, "%s: BUG! echo_skb is occupied!\n", + __func__); + kfree_skb(skb); + } +} +EXPORT_SYMBOL_GPL(can_put_echo_skb); + +/* + * Get the skb from the stack and loop it back locally + * + * The function is typically called when the TX done interrupt + * is handled in the device driver. The driver must protect + * access to priv->echo_skb, if necessary. + */ +void can_get_echo_skb(struct net_device *dev, int idx) +{ + struct can_priv *priv = netdev_priv(dev); + + if ((dev->flags & IFF_ECHO) && priv->echo_skb[idx]) { + netif_rx(priv->echo_skb[idx]); + priv->echo_skb[idx] = NULL; + } +} +EXPORT_SYMBOL_GPL(can_get_echo_skb); + +/* + * CAN device restart for bus-off recovery + */ +void can_restart(unsigned long data) +{ + struct net_device *dev = (struct net_device *)data; + struct can_priv *priv = netdev_priv(dev); + struct net_device_stats *stats = &dev->stats; + struct sk_buff *skb; + struct can_frame *cf; + int err; + + BUG_ON(netif_carrier_ok(dev)); + + /* + * No synchronization needed because the device is bus-off and + * no messages can come in or go out. + */ + can_flush_echo_skb(dev); + + /* send restart message upstream */ + skb = dev_alloc_skb(sizeof(struct can_frame)); + if (skb == NULL) { + err = -ENOMEM; + goto out; + } + skb->dev = dev; + skb->protocol = htons(ETH_P_CAN); + cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame)); + memset(cf, 0, sizeof(struct can_frame)); + cf->can_id = CAN_ERR_FLAG | CAN_ERR_RESTARTED; + cf->can_dlc = CAN_ERR_DLC; + + netif_rx(skb); + + dev->last_rx = jiffies; + stats->rx_packets++; + stats->rx_bytes += cf->can_dlc; + + dev_dbg(dev->dev.parent, "restarted\n"); + priv->can_stats.restarts++; + + /* Now restart the device */ + err = priv->do_set_mode(dev, CAN_MODE_START); + +out: + netif_carrier_on(dev); + if (err) + dev_err(dev->dev.parent, "Error %d during restart", err); +} + +int can_restart_now(struct net_device *dev) +{ + struct can_priv *priv = netdev_priv(dev); + + /* + * A manual restart is only permitted if automatic restart is + * disabled and the device is in the bus-off state + */ + if (priv->restart_ms) + return -EINVAL; + if (priv->state != CAN_STATE_BUS_OFF) + return -EBUSY; + + /* Runs as soon as possible in the timer context */ + mod_timer(&priv->restart_timer, jiffies); + + return 0; +} + +/* + * CAN bus-off + * + * This functions should be called when the device goes bus-off to + * tell the netif layer that no more packets can be sent or received. + * If enabled, a timer is started to trigger bus-off recovery. + */ +void can_bus_off(struct net_device *dev) +{ + struct can_priv *priv = netdev_priv(dev); + + dev_dbg(dev->dev.parent, "bus-off\n"); + + netif_carrier_off(dev); + priv->can_stats.bus_off++; + + if (priv->restart_ms) + mod_timer(&priv->restart_timer, + jiffies + (priv->restart_ms * HZ) / 1000); +} +EXPORT_SYMBOL_GPL(can_bus_off); + +static void can_setup(struct net_device *dev) +{ + dev->type = ARPHRD_CAN; + dev->mtu = sizeof(struct can_frame); + dev->hard_header_len = 0; + dev->addr_len = 0; + dev->tx_queue_len = 10; + + /* New-style flags. */ + dev->flags = IFF_NOARP; + dev->features = NETIF_F_NO_CSUM; +} + +/* + * Allocate and setup space for the CAN network device + */ +struct net_device *alloc_candev(int sizeof_priv) +{ + struct net_device *dev; + struct can_priv *priv; + + dev = alloc_netdev(sizeof_priv, "can%d", can_setup); + if (!dev) + return NULL; + + priv = netdev_priv(dev); + + priv->state = CAN_STATE_STOPPED; + + init_timer(&priv->restart_timer); + + return dev; +} +EXPORT_SYMBOL_GPL(alloc_candev); + +/* + * Free space of the CAN network device + */ +void free_candev(struct net_device *dev) +{ + free_netdev(dev); +} +EXPORT_SYMBOL_GPL(free_candev); + +/* + * Common open function when the device gets opened. + * + * This function should be called in the open function of the device + * driver. + */ +int open_candev(struct net_device *dev) +{ + struct can_priv *priv = netdev_priv(dev); + + if (!priv->bittiming.tq && !priv->bittiming.bitrate) { + dev_err(dev->dev.parent, "bit-timing not yet defined\n"); + return -EINVAL; + } + + setup_timer(&priv->restart_timer, can_restart, (unsigned long)dev); + + return 0; +} +EXPORT_SYMBOL(open_candev); + +/* + * Common close function for cleanup before the device gets closed. + * + * This function should be called in the close function of the device + * driver. + */ +void close_candev(struct net_device *dev) +{ + struct can_priv *priv = netdev_priv(dev); + + if (del_timer_sync(&priv->restart_timer)) + dev_put(dev); + can_flush_echo_skb(dev); +} +EXPORT_SYMBOL_GPL(close_candev); + +/* + * CAN netlink interface + */ +static const struct nla_policy can_policy[IFLA_CAN_MAX + 1] = { + [IFLA_CAN_STATE] = { .type = NLA_U32 }, + [IFLA_CAN_CTRLMODE] = { .len = sizeof(struct can_ctrlmode) }, + [IFLA_CAN_RESTART_MS] = { .type = NLA_U32 }, + [IFLA_CAN_RESTART] = { .type = NLA_U32 }, + [IFLA_CAN_BITTIMING] = { .len = sizeof(struct can_bittiming) }, + [IFLA_CAN_BITTIMING_CONST] + = { .len = sizeof(struct can_bittiming_const) }, + [IFLA_CAN_CLOCK] = { .len = sizeof(struct can_clock) }, +}; + +static int can_changelink(struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct can_priv *priv = netdev_priv(dev); + int err; + + /* We need synchronization with dev->stop() */ + ASSERT_RTNL(); + + if (data[IFLA_CAN_CTRLMODE]) { + struct can_ctrlmode *cm; + + /* Do not allow changing controller mode while running */ + if (dev->flags & IFF_UP) + return -EBUSY; + cm = nla_data(data[IFLA_CAN_CTRLMODE]); + priv->ctrlmode &= ~cm->mask; + priv->ctrlmode |= cm->flags; + } + + if (data[IFLA_CAN_BITTIMING]) { + struct can_bittiming bt; + + /* Do not allow changing bittiming while running */ + if (dev->flags & IFF_UP) + return -EBUSY; + memcpy(&bt, nla_data(data[IFLA_CAN_BITTIMING]), sizeof(bt)); + if ((!bt.bitrate && !bt.tq) || (bt.bitrate && bt.tq)) + return -EINVAL; + err = can_get_bittiming(dev, &bt); + if (err) + return err; + memcpy(&priv->bittiming, &bt, sizeof(bt)); + + if (priv->do_set_bittiming) { + /* Finally, set the bit-timing registers */ + err = priv->do_set_bittiming(dev); + if (err) + return err; + } + } + + if (data[IFLA_CAN_RESTART_MS]) { + /* Do not allow changing restart delay while running */ + if (dev->flags & IFF_UP) + return -EBUSY; + priv->restart_ms = nla_get_u32(data[IFLA_CAN_RESTART_MS]); + } + + if (data[IFLA_CAN_RESTART]) { + /* Do not allow a restart while not running */ + if (!(dev->flags & IFF_UP)) + return -EINVAL; + err = can_restart_now(dev); + if (err) + return err; + } + + return 0; +} + +static int can_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct can_priv *priv = netdev_priv(dev); + struct can_ctrlmode cm = {.flags = priv->ctrlmode}; + enum can_state state = priv->state; + + if (priv->do_get_state) + priv->do_get_state(dev, &state); + NLA_PUT_U32(skb, IFLA_CAN_STATE, state); + NLA_PUT(skb, IFLA_CAN_CTRLMODE, sizeof(cm), &cm); + NLA_PUT_U32(skb, IFLA_CAN_RESTART_MS, priv->restart_ms); + NLA_PUT(skb, IFLA_CAN_BITTIMING, + sizeof(priv->bittiming), &priv->bittiming); + NLA_PUT(skb, IFLA_CAN_CLOCK, sizeof(cm), &priv->clock); + if (priv->bittiming_const) + NLA_PUT(skb, IFLA_CAN_BITTIMING_CONST, + sizeof(*priv->bittiming_const), priv->bittiming_const); + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static int can_fill_xstats(struct sk_buff *skb, const struct net_device *dev) +{ + struct can_priv *priv = netdev_priv(dev); + + NLA_PUT(skb, IFLA_INFO_XSTATS, + sizeof(priv->can_stats), &priv->can_stats); + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static struct rtnl_link_ops can_link_ops __read_mostly = { + .kind = "can", + .maxtype = IFLA_CAN_MAX, + .policy = can_policy, + .setup = can_setup, + .changelink = can_changelink, + .fill_info = can_fill_info, + .fill_xstats = can_fill_xstats, +}; + +/* + * Register the CAN network device + */ +int register_candev(struct net_device *dev) +{ + dev->rtnl_link_ops = &can_link_ops; + return register_netdev(dev); +} +EXPORT_SYMBOL_GPL(register_candev); + +/* + * Unregister the CAN network device + */ +void unregister_candev(struct net_device *dev) +{ + unregister_netdev(dev); +} +EXPORT_SYMBOL_GPL(unregister_candev); + +static __init int can_dev_init(void) +{ + int err; + + err = rtnl_link_register(&can_link_ops); + if (!err) + printk(KERN_INFO MOD_DESC "\n"); + + return err; +} +module_init(can_dev_init); + +static __exit void can_dev_exit(void) +{ + rtnl_link_unregister(&can_link_ops); +} +module_exit(can_dev_exit); + +MODULE_ALIAS_RTNL_LINK("can"); diff --git a/include/linux/can/Kbuild b/include/linux/can/Kbuild index eff898aac02..8cb05aae661 100644 --- a/include/linux/can/Kbuild +++ b/include/linux/can/Kbuild @@ -1,3 +1,4 @@ header-y += raw.h header-y += bcm.h header-y += error.h +header-y += netlink.h diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h new file mode 100644 index 00000000000..4a37a56f6cd --- /dev/null +++ b/include/linux/can/dev.h @@ -0,0 +1,70 @@ +/* + * linux/can/dev.h + * + * Definitions for the CAN network device driver interface + * + * Copyright (C) 2006 Andrey Volkov + * Varma Electronics Oy + * + * Copyright (C) 2008 Wolfgang Grandegger + * + * Send feedback to + */ + +#ifndef CAN_DEV_H +#define CAN_DEV_H + +#include +#include + +/* + * CAN mode + */ +enum can_mode { + CAN_MODE_STOP = 0, + CAN_MODE_START, + CAN_MODE_SLEEP +}; + +/* + * CAN common private data + */ +#define CAN_ECHO_SKB_MAX 4 + +struct can_priv { + struct can_device_stats can_stats; + + struct can_bittiming bittiming; + struct can_bittiming_const *bittiming_const; + struct can_clock clock; + + enum can_state state; + u32 ctrlmode; + + int restart_ms; + struct timer_list restart_timer; + + struct sk_buff *echo_skb[CAN_ECHO_SKB_MAX]; + + int (*do_set_bittiming)(struct net_device *dev); + int (*do_set_mode)(struct net_device *dev, enum can_mode mode); + int (*do_get_state)(const struct net_device *dev, + enum can_state *state); +}; + +struct net_device *alloc_candev(int sizeof_priv); +void free_candev(struct net_device *dev); + +int open_candev(struct net_device *dev); +void close_candev(struct net_device *dev); + +int register_candev(struct net_device *dev); +void unregister_candev(struct net_device *dev); + +int can_restart_now(struct net_device *dev); +void can_bus_off(struct net_device *dev); + +void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, int idx); +void can_get_echo_skb(struct net_device *dev, int idx); + +#endif /* CAN_DEV_H */ diff --git a/include/linux/can/netlink.h b/include/linux/can/netlink.h new file mode 100644 index 00000000000..9ecbb7871c0 --- /dev/null +++ b/include/linux/can/netlink.h @@ -0,0 +1,113 @@ +/* + * linux/can/netlink.h + * + * Definitions for the CAN netlink interface + * + * Copyright (c) 2009 Wolfgang Grandegger + * + * Send feedback to + * + */ + +#ifndef CAN_NETLINK_H +#define CAN_NETLINK_H + +#include + +/* + * CAN bit-timing parameters + * + * For futher information, please read chapter "8 BIT TIMING + * REQUIREMENTS" of the "Bosch CAN Specification version 2.0" + * at http://www.semiconductors.bosch.de/pdf/can2spec.pdf. + */ +struct can_bittiming { + __u32 bitrate; /* Bit-rate in bits/second */ + __u32 sample_point; /* Sample point in one-tenth of a percent */ + __u32 tq; /* Time quanta (TQ) in nanoseconds */ + __u32 prop_seg; /* Propagation segment in TQs */ + __u32 phase_seg1; /* Phase buffer segment 1 in TQs */ + __u32 phase_seg2; /* Phase buffer segment 2 in TQs */ + __u32 sjw; /* Synchronisation jump width in TQs */ + __u32 brp; /* Bit-rate prescaler */ +}; + +/* + * CAN harware-dependent bit-timing constant + * + * Used for calculating and checking bit-timing parameters + */ +struct can_bittiming_const { + char name[16]; /* Name of the CAN controller hardware */ + __u32 tseg1_min; /* Time segement 1 = prop_seg + phase_seg1 */ + __u32 tseg1_max; + __u32 tseg2_min; /* Time segement 2 = phase_seg2 */ + __u32 tseg2_max; + __u32 sjw_max; /* Synchronisation jump width */ + __u32 brp_min; /* Bit-rate prescaler */ + __u32 brp_max; + __u32 brp_inc; +}; + +/* + * CAN clock parameters + */ +struct can_clock { + __u32 freq; /* CAN system clock frequency in Hz */ +}; + +/* + * CAN operational and error states + */ +enum can_state { + CAN_STATE_ERROR_ACTIVE = 0, /* RX/TX error count < 96 */ + CAN_STATE_ERROR_WARNING, /* RX/TX error count < 128 */ + CAN_STATE_ERROR_PASSIVE, /* RX/TX error count < 256 */ + CAN_STATE_BUS_OFF, /* RX/TX error count >= 256 */ + CAN_STATE_STOPPED, /* Device is stopped */ + CAN_STATE_SLEEPING, /* Device is sleeping */ + CAN_STATE_MAX +}; + +/* + * CAN controller mode + */ +struct can_ctrlmode { + __u32 mask; + __u32 flags; +}; + +#define CAN_CTRLMODE_LOOPBACK 0x1 /* Loopback mode */ +#define CAN_CTRLMODE_LISTENONLY 0x2 /* Listen-only mode */ +#define CAN_CTRLMODE_3_SAMPLES 0x4 /* Triple sampling mode */ + +/* + * CAN device statistics + */ +struct can_device_stats { + __u32 bus_error; /* Bus errors */ + __u32 error_warning; /* Changes to error warning state */ + __u32 error_passive; /* Changes to error passive state */ + __u32 bus_off; /* Changes to bus off state */ + __u32 arbitration_lost; /* Arbitration lost errors */ + __u32 restarts; /* CAN controller re-starts */ +}; + +/* + * CAN netlink interface + */ +enum { + IFLA_CAN_UNSPEC, + IFLA_CAN_BITTIMING, + IFLA_CAN_BITTIMING_CONST, + IFLA_CAN_CLOCK, + IFLA_CAN_STATE, + IFLA_CAN_CTRLMODE, + IFLA_CAN_RESTART_MS, + IFLA_CAN_RESTART, + __IFLA_CAN_MAX +}; + +#define IFLA_CAN_MAX (__IFLA_CAN_MAX - 1) + +#endif /* CAN_NETLINK_H */ -- cgit v1.2.3-70-g09d2 From f534e52f091a7b9f51fb6726710bdf731b663e94 Mon Sep 17 00:00:00 2001 From: Wolfgang Grandegger Date: Fri, 15 May 2009 23:39:31 +0000 Subject: can: SJA1000 generic platform bus driver This driver adds support for the SJA1000 chips connected to the "platform bus", which can be found on various embedded systems. Signed-off-by: Sascha Hauer Signed-off-by: Wolfgang Grandegger Signed-off-by: Marc Kleine-Budde Signed-off-by: Oliver Hartkopp Signed-off-by: David S. Miller --- drivers/net/can/Kconfig | 10 ++ drivers/net/can/sja1000/Makefile | 1 + drivers/net/can/sja1000/sja1000_platform.c | 164 +++++++++++++++++++++++++++++ include/linux/can/platform/sja1000.h | 32 ++++++ 4 files changed, 207 insertions(+) create mode 100644 drivers/net/can/sja1000/sja1000_platform.c create mode 100644 include/linux/can/platform/sja1000.h (limited to 'include') diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig index c7e5ce339ea..8fe79741d3c 100644 --- a/drivers/net/can/Kconfig +++ b/drivers/net/can/Kconfig @@ -41,6 +41,16 @@ config CAN_SJA1000 ---help--- Driver for the SJA1000 CAN controllers from Philips or NXP +config CAN_SJA1000_PLATFORM + depends on CAN_SJA1000 + tristate "Generic Platform Bus based SJA1000 driver" + ---help--- + This driver adds support for the SJA1000 chips connected to + the "platform bus" (Linux abstraction for directly to the + processor attached devices). Which can be found on various + boards from Phytec (http://www.phytec.de) like the PCM027, + PCM038. + config CAN_DEBUG_DEVICES bool "CAN devices debugging messages" depends on CAN diff --git a/drivers/net/can/sja1000/Makefile b/drivers/net/can/sja1000/Makefile index 893ab2cfe24..5115cc90ab5 100644 --- a/drivers/net/can/sja1000/Makefile +++ b/drivers/net/can/sja1000/Makefile @@ -3,5 +3,6 @@ # obj-$(CONFIG_CAN_SJA1000) += sja1000.o +obj-$(CONFIG_CAN_SJA1000_PLATFORM) += sja1000_platform.o ccflags-$(CONFIG_CAN_DEBUG_DEVICES) := -DDEBUG diff --git a/drivers/net/can/sja1000/sja1000_platform.c b/drivers/net/can/sja1000/sja1000_platform.c new file mode 100644 index 00000000000..8017229d6fd --- /dev/null +++ b/drivers/net/can/sja1000/sja1000_platform.c @@ -0,0 +1,164 @@ +/* + * Copyright (C) 2005 Sascha Hauer, Pengutronix + * Copyright (C) 2007 Wolfgang Grandegger + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the version 2 of the GNU General Public License + * as published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sja1000.h" + +#define DRV_NAME "sja1000_platform" + +MODULE_AUTHOR("Sascha Hauer "); +MODULE_DESCRIPTION("Socket-CAN driver for SJA1000 on the platform bus"); +MODULE_LICENSE("GPL v2"); + +static u8 sp_read_reg(const struct net_device *dev, int reg) +{ + return ioread8((void __iomem *)(dev->base_addr + reg)); +} + +static void sp_write_reg(const struct net_device *dev, int reg, u8 val) +{ + iowrite8(val, (void __iomem *)(dev->base_addr + reg)); +} + +static int sp_probe(struct platform_device *pdev) +{ + int err; + void __iomem *addr; + struct net_device *dev; + struct sja1000_priv *priv; + struct resource *res_mem, *res_irq; + struct sja1000_platform_data *pdata; + + pdata = pdev->dev.platform_data; + if (!pdata) { + dev_err(&pdev->dev, "No platform data provided!\n"); + err = -ENODEV; + goto exit; + } + + res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + res_irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (!res_mem || !res_irq) { + err = -ENODEV; + goto exit; + } + + if (!request_mem_region(res_mem->start, resource_size(res_mem), + DRV_NAME)) { + err = -EBUSY; + goto exit; + } + + addr = ioremap_nocache(res_mem->start, resource_size(res_mem)); + if (!addr) { + err = -ENOMEM; + goto exit_release; + } + + dev = alloc_sja1000dev(0); + if (!dev) { + err = -ENOMEM; + goto exit_iounmap; + } + priv = netdev_priv(dev); + + dev->base_addr = (unsigned long)addr; + dev->irq = res_irq->start; + priv->irq_flags = res_irq->flags & IRQF_TRIGGER_MASK; + priv->read_reg = sp_read_reg; + priv->write_reg = sp_write_reg; + priv->can.clock.freq = pdata->clock; + priv->ocr = pdata->ocr; + priv->cdr = pdata->cdr; + + dev_set_drvdata(&pdev->dev, dev); + SET_NETDEV_DEV(dev, &pdev->dev); + + err = register_sja1000dev(dev); + if (err) { + dev_err(&pdev->dev, "registering %s failed (err=%d)\n", + DRV_NAME, err); + goto exit_free; + } + + dev_info(&pdev->dev, "%s device registered (base_addr=%#lx, irq=%d)\n", + DRV_NAME, dev->base_addr, dev->irq); + return 0; + + exit_free: + free_sja1000dev(dev); + exit_iounmap: + iounmap(addr); + exit_release: + release_mem_region(res_mem->start, resource_size(res_mem)); + exit: + return err; +} + +static int sp_remove(struct platform_device *pdev) +{ + struct net_device *dev = dev_get_drvdata(&pdev->dev); + struct resource *res; + + unregister_sja1000dev(dev); + dev_set_drvdata(&pdev->dev, NULL); + + if (dev->base_addr) + iounmap((void __iomem *)dev->base_addr); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + release_mem_region(res->start, resource_size(res)); + + free_sja1000dev(dev); + + return 0; +} + +static struct platform_driver sp_driver = { + .probe = sp_probe, + .remove = sp_remove, + .driver = { + .name = DRV_NAME, + .owner = THIS_MODULE, + }, +}; + +static int __init sp_init(void) +{ + return platform_driver_register(&sp_driver); +} + +static void __exit sp_exit(void) +{ + platform_driver_unregister(&sp_driver); +} + +module_init(sp_init); +module_exit(sp_exit); diff --git a/include/linux/can/platform/sja1000.h b/include/linux/can/platform/sja1000.h new file mode 100644 index 00000000000..37966e630ff --- /dev/null +++ b/include/linux/can/platform/sja1000.h @@ -0,0 +1,32 @@ +#ifndef _CAN_PLATFORM_SJA1000_H_ +#define _CAN_PLATFORM_SJA1000_H_ + +/* clock divider register */ +#define CDR_CLKOUT_MASK 0x07 +#define CDR_CLK_OFF 0x08 /* Clock off (CLKOUT pin) */ +#define CDR_RXINPEN 0x20 /* TX1 output is RX irq output */ +#define CDR_CBP 0x40 /* CAN input comparator bypass */ +#define CDR_PELICAN 0x80 /* PeliCAN mode */ + +/* output control register */ +#define OCR_MODE_BIPHASE 0x00 +#define OCR_MODE_TEST 0x01 +#define OCR_MODE_NORMAL 0x02 +#define OCR_MODE_CLOCK 0x03 +#define OCR_TX0_INVERT 0x04 +#define OCR_TX0_PULLDOWN 0x08 +#define OCR_TX0_PULLUP 0x10 +#define OCR_TX0_PUSHPULL 0x18 +#define OCR_TX1_INVERT 0x20 +#define OCR_TX1_PULLDOWN 0x40 +#define OCR_TX1_PULLUP 0x80 +#define OCR_TX1_PUSHPULL 0xc0 + +struct sja1000_platform_data { + u32 clock; /* CAN bus oscillator frequency in Hz */ + + u8 ocr; /* output control register */ + u8 cdr; /* clock divider register */ +}; + +#endif /* !_CAN_PLATFORM_SJA1000_H_ */ -- cgit v1.2.3-70-g09d2 From 69e3c75f4d541a6eb151b3ef91f34033cb3ad6e1 Mon Sep 17 00:00:00 2001 From: Johann Baudy Date: Mon, 18 May 2009 22:11:22 -0700 Subject: net: TX_RING and packet mmap New packet socket feature that makes packet socket more efficient for transmission. - It reduces number of system call through a PACKET_TX_RING mechanism, based on PACKET_RX_RING (Circular buffer allocated in kernel space which is mmapped from user space). - It minimizes CPU copy using fragmented SKB (almost zero copy). Signed-off-by: Johann Baudy Signed-off-by: David S. Miller --- Documentation/networking/packet_mmap.txt | 140 +++++++- include/linux/if_packet.h | 20 +- include/linux/skbuff.h | 3 + net/packet/af_packet.c | 588 +++++++++++++++++++++++++------ 4 files changed, 616 insertions(+), 135 deletions(-) (limited to 'include') diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt index 07c53d59603..a22fd85e379 100644 --- a/Documentation/networking/packet_mmap.txt +++ b/Documentation/networking/packet_mmap.txt @@ -4,16 +4,18 @@ This file documents the CONFIG_PACKET_MMAP option available with the PACKET socket interface on 2.4 and 2.6 kernels. This type of sockets is used for -capture network traffic with utilities like tcpdump or any other that uses -the libpcap library. - -You can find the latest version of this document at +capture network traffic with utilities like tcpdump or any other that needs +raw access to network interface. +You can find the latest version of this document at: http://pusa.uv.es/~ulisses/packet_mmap/ -Please send me your comments to +Howto can be found at: + http://wiki.gnu-log.net (packet_mmap) +Please send your comments to Ulisses Alonso Camaró + Johann Baudy ------------------------------------------------------------------------------- + Why use PACKET_MMAP @@ -25,19 +27,24 @@ to capture each packet, it requires two if you want to get packet's timestamp (like libpcap always does). In the other hand PACKET_MMAP is very efficient. PACKET_MMAP provides a size -configurable circular buffer mapped in user space. This way reading packets just -needs to wait for them, most of the time there is no need to issue a single -system call. By using a shared buffer between the kernel and the user -also has the benefit of minimizing packet copies. - -It's fine to use PACKET_MMAP to improve the performance of the capture process, -but it isn't everything. At least, if you are capturing at high speeds (this -is relative to the cpu speed), you should check if the device driver of your -network interface card supports some sort of interrupt load mitigation or -(even better) if it supports NAPI, also make sure it is enabled. +configurable circular buffer mapped in user space that can be used to either +send or receive packets. This way reading packets just needs to wait for them, +most of the time there is no need to issue a single system call. Concerning +transmission, multiple packets can be sent through one system call to get the +highest bandwidth. +By using a shared buffer between the kernel and the user also has the benefit +of minimizing packet copies. + +It's fine to use PACKET_MMAP to improve the performance of the capture and +transmission process, but it isn't everything. At least, if you are capturing +at high speeds (this is relative to the cpu speed), you should check if the +device driver of your network interface card supports some sort of interrupt +load mitigation or (even better) if it supports NAPI, also make sure it is +enabled. For transmission, check the MTU (Maximum Transmission Unit) used and +supported by devices of your network. -------------------------------------------------------------------------------- -+ How to use CONFIG_PACKET_MMAP ++ How to use CONFIG_PACKET_MMAP to improve capture process -------------------------------------------------------------------------------- From the user standpoint, you should use the higher level libpcap library, which @@ -57,7 +64,7 @@ the low level details or want to improve libpcap by including PACKET_MMAP support. -------------------------------------------------------------------------------- -+ How to use CONFIG_PACKET_MMAP directly ++ How to use CONFIG_PACKET_MMAP directly to improve capture process -------------------------------------------------------------------------------- From the system calls stand point, the use of PACKET_MMAP involves @@ -66,6 +73,7 @@ the following process: [setup] socket() -------> creation of the capture socket setsockopt() ---> allocation of the circular buffer (ring) + option: PACKET_RX_RING mmap() ---------> mapping of the allocated buffer to the user process @@ -96,6 +104,65 @@ Next I will describe PACKET_MMAP settings and it's constraints, also the mapping of the circular buffer in the user process and the use of this buffer. +-------------------------------------------------------------------------------- ++ How to use CONFIG_PACKET_MMAP directly to improve transmission process +-------------------------------------------------------------------------------- +Transmission process is similar to capture as shown below. + +[setup] socket() -------> creation of the transmission socket + setsockopt() ---> allocation of the circular buffer (ring) + option: PACKET_TX_RING + bind() ---------> bind transmission socket with a network interface + mmap() ---------> mapping of the allocated buffer to the + user process + +[transmission] poll() ---------> wait for free packets (optional) + send() ---------> send all packets that are set as ready in + the ring + The flag MSG_DONTWAIT can be used to return + before end of transfer. + +[shutdown] close() --------> destruction of the transmission socket and + deallocation of all associated resources. + +Binding the socket to your network interface is mandatory (with zero copy) to +know the header size of frames used in the circular buffer. + +As capture, each frame contains two parts: + + -------------------- +| struct tpacket_hdr | Header. It contains the status of +| | of this frame +|--------------------| +| data buffer | +. . Data that will be sent over the network interface. +. . + -------------------- + + bind() associates the socket to your network interface thanks to + sll_ifindex parameter of struct sockaddr_ll. + + Initialization example: + + struct sockaddr_ll my_addr; + struct ifreq s_ifr; + ... + + strncpy (s_ifr.ifr_name, "eth0", sizeof(s_ifr.ifr_name)); + + /* get interface index of eth0 */ + ioctl(this->socket, SIOCGIFINDEX, &s_ifr); + + /* fill sockaddr_ll struct to prepare binding */ + my_addr.sll_family = AF_PACKET; + my_addr.sll_protocol = ETH_P_ALL; + my_addr.sll_ifindex = s_ifr.ifr_ifindex; + + /* bind socket to eth0 */ + bind(this->socket, (struct sockaddr *)&my_addr, sizeof(struct sockaddr_ll)); + + A complete tutorial is available at: http://wiki.gnu-log.net/ + -------------------------------------------------------------------------------- + PACKET_MMAP settings -------------------------------------------------------------------------------- @@ -103,7 +170,10 @@ the use of this buffer. To setup PACKET_MMAP from user level code is done with a call like + - Capture process setsockopt(fd, SOL_PACKET, PACKET_RX_RING, (void *) &req, sizeof(req)) + - Transmission process + setsockopt(fd, SOL_PACKET, PACKET_TX_RING, (void *) &req, sizeof(req)) The most significant argument in the previous call is the req parameter, this parameter must to have the following structure: @@ -117,11 +187,11 @@ this parameter must to have the following structure: }; This structure is defined in /usr/include/linux/if_packet.h and establishes a -circular buffer (ring) of unswappable memory mapped in the capture process. +circular buffer (ring) of unswappable memory. Being mapped in the capture process allows reading the captured frames and related meta-information like timestamps without requiring a system call. -Captured frames are grouped in blocks. Each block is a physically contiguous +Frames are grouped in blocks. Each block is a physically contiguous region of memory and holds tp_block_size/tp_frame_size frames. The total number of blocks is tp_block_nr. Note that tp_frame_nr is a redundant parameter because @@ -336,6 +406,7 @@ struct tpacket_hdr). If this field is 0 means that the frame is ready to be used for the kernel, If not, there is a frame the user can read and the following flags apply: ++++ Capture process: from include/linux/if_packet.h #define TP_STATUS_COPY 2 @@ -391,6 +462,37 @@ packets are in the ring: It doesn't incur in a race condition to first check the status value and then poll for frames. + +++ Transmission process +Those defines are also used for transmission: + + #define TP_STATUS_AVAILABLE 0 // Frame is available + #define TP_STATUS_SEND_REQUEST 1 // Frame will be sent on next send() + #define TP_STATUS_SENDING 2 // Frame is currently in transmission + #define TP_STATUS_WRONG_FORMAT 4 // Frame format is not correct + +First, the kernel initializes all frames to TP_STATUS_AVAILABLE. To send a +packet, the user fills a data buffer of an available frame, sets tp_len to +current data buffer size and sets its status field to TP_STATUS_SEND_REQUEST. +This can be done on multiple frames. Once the user is ready to transmit, it +calls send(). Then all buffers with status equal to TP_STATUS_SEND_REQUEST are +forwarded to the network device. The kernel updates each status of sent +frames with TP_STATUS_SENDING until the end of transfer. +At the end of each transfer, buffer status returns to TP_STATUS_AVAILABLE. + + header->tp_len = in_i_size; + header->tp_status = TP_STATUS_SEND_REQUEST; + retval = send(this->socket, NULL, 0, 0); + +The user can also use poll() to check if a buffer is available: +(status == TP_STATUS_SENDING) + + struct pollfd pfd; + pfd.fd = fd; + pfd.revents = 0; + pfd.events = POLLOUT; + retval = poll(&pfd, 1, timeout); + -------------------------------------------------------------------------------- + THANKS -------------------------------------------------------------------------------- diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h index 18db0668065..5b2badeb949 100644 --- a/include/linux/if_packet.h +++ b/include/linux/if_packet.h @@ -46,6 +46,8 @@ struct sockaddr_ll #define PACKET_VERSION 10 #define PACKET_HDRLEN 11 #define PACKET_RESERVE 12 +#define PACKET_TX_RING 13 +#define PACKET_LOSS 14 struct tpacket_stats { @@ -63,14 +65,22 @@ struct tpacket_auxdata __u16 tp_vlan_tci; }; +/* Rx ring - header status */ +#define TP_STATUS_KERNEL 0x0 +#define TP_STATUS_USER 0x1 +#define TP_STATUS_COPY 0x2 +#define TP_STATUS_LOSING 0x4 +#define TP_STATUS_CSUMNOTREADY 0x8 + +/* Tx ring - header status */ +#define TP_STATUS_AVAILABLE 0x0 +#define TP_STATUS_SEND_REQUEST 0x1 +#define TP_STATUS_SENDING 0x2 +#define TP_STATUS_WRONG_FORMAT 0x4 + struct tpacket_hdr { unsigned long tp_status; -#define TP_STATUS_KERNEL 0 -#define TP_STATUS_USER 1 -#define TP_STATUS_COPY 2 -#define TP_STATUS_LOSING 4 -#define TP_STATUS_CSUMNOTREADY 8 unsigned int tp_len; unsigned int tp_snaplen; unsigned short tp_mac; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 1b5c3d298f4..aff494ba6a3 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -203,6 +203,9 @@ struct skb_shared_info { #ifdef CONFIG_HAS_DMA dma_addr_t dma_maps[MAX_SKB_FRAGS + 1]; #endif + /* Intermediate layers must ensure that destructor_arg + * remains valid until skb destructor */ + void * destructor_arg; }; /* We divide dataref into two halves. The higher 16 bits hold references diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index f546e81acc4..766e6b41f7c 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -39,6 +39,7 @@ * will simply extend the hardware address * byte arrays at the end of sockaddr_ll * and packet_mreq. + * Johann Baudy : Added TX RING. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -157,7 +158,25 @@ struct packet_mreq_max }; #ifdef CONFIG_PACKET_MMAP -static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing); +static int packet_set_ring(struct sock *sk, struct tpacket_req *req, + int closing, int tx_ring); + +struct packet_ring_buffer { + char * *pg_vec; + unsigned int head; + unsigned int frames_per_block; + unsigned int frame_size; + unsigned int frame_max; + + unsigned int pg_vec_order; + unsigned int pg_vec_pages; + unsigned int pg_vec_len; + + atomic_t pending; +}; + +struct packet_sock; +static int tpacket_snd(struct packet_sock *po, struct msghdr *msg); #endif static void packet_flush_mclist(struct sock *sk); @@ -167,11 +186,8 @@ struct packet_sock { struct sock sk; struct tpacket_stats stats; #ifdef CONFIG_PACKET_MMAP - char * *pg_vec; - unsigned int head; - unsigned int frames_per_block; - unsigned int frame_size; - unsigned int frame_max; + struct packet_ring_buffer rx_ring; + struct packet_ring_buffer tx_ring; int copy_thresh; #endif struct packet_type prot_hook; @@ -185,12 +201,10 @@ struct packet_sock { struct packet_mclist *mclist; #ifdef CONFIG_PACKET_MMAP atomic_t mapped; - unsigned int pg_vec_order; - unsigned int pg_vec_pages; - unsigned int pg_vec_len; enum tpacket_versions tp_version; unsigned int tp_hdrlen; unsigned int tp_reserve; + unsigned int tp_loss:1; #endif }; @@ -206,36 +220,33 @@ struct packet_skb_cb { #ifdef CONFIG_PACKET_MMAP -static void *packet_lookup_frame(struct packet_sock *po, unsigned int position, - int status) +static void __packet_set_status(struct packet_sock *po, void *frame, int status) { - unsigned int pg_vec_pos, frame_offset; union { struct tpacket_hdr *h1; struct tpacket2_hdr *h2; void *raw; } h; - pg_vec_pos = position / po->frames_per_block; - frame_offset = position % po->frames_per_block; - - h.raw = po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size); + h.raw = frame; switch (po->tp_version) { case TPACKET_V1: - if (status != (h.h1->tp_status ? TP_STATUS_USER : - TP_STATUS_KERNEL)) - return NULL; + h.h1->tp_status = status; + flush_dcache_page(virt_to_page(&h.h1->tp_status)); break; case TPACKET_V2: - if (status != (h.h2->tp_status ? TP_STATUS_USER : - TP_STATUS_KERNEL)) - return NULL; + h.h2->tp_status = status; + flush_dcache_page(virt_to_page(&h.h2->tp_status)); break; + default: + printk(KERN_ERR "TPACKET version not supported\n"); + BUG(); } - return h.raw; + + smp_wmb(); } -static void __packet_set_status(struct packet_sock *po, void *frame, int status) +static int __packet_get_status(struct packet_sock *po, void *frame) { union { struct tpacket_hdr *h1; @@ -243,16 +254,66 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status) void *raw; } h; + smp_rmb(); + h.raw = frame; switch (po->tp_version) { case TPACKET_V1: - h.h1->tp_status = status; - break; + flush_dcache_page(virt_to_page(&h.h1->tp_status)); + return h.h1->tp_status; case TPACKET_V2: - h.h2->tp_status = status; - break; + flush_dcache_page(virt_to_page(&h.h2->tp_status)); + return h.h2->tp_status; + default: + printk(KERN_ERR "TPACKET version not supported\n"); + BUG(); + return 0; } } + +static void *packet_lookup_frame(struct packet_sock *po, + struct packet_ring_buffer *rb, + unsigned int position, + int status) +{ + unsigned int pg_vec_pos, frame_offset; + union { + struct tpacket_hdr *h1; + struct tpacket2_hdr *h2; + void *raw; + } h; + + pg_vec_pos = position / rb->frames_per_block; + frame_offset = position % rb->frames_per_block; + + h.raw = rb->pg_vec[pg_vec_pos] + (frame_offset * rb->frame_size); + + if (status != __packet_get_status(po, h.raw)) + return NULL; + + return h.raw; +} + +static inline void *packet_current_frame(struct packet_sock *po, + struct packet_ring_buffer *rb, + int status) +{ + return packet_lookup_frame(po, rb, rb->head, status); +} + +static inline void *packet_previous_frame(struct packet_sock *po, + struct packet_ring_buffer *rb, + int status) +{ + unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max; + return packet_lookup_frame(po, rb, previous, status); +} + +static inline void packet_increment_head(struct packet_ring_buffer *buff) +{ + buff->head = buff->head != buff->frame_max ? buff->head+1 : 0; +} + #endif static inline struct packet_sock *pkt_sk(struct sock *sk) @@ -648,7 +709,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe macoff = netoff - maclen; } - if (macoff + snaplen > po->frame_size) { + if (macoff + snaplen > po->rx_ring.frame_size) { if (po->copy_thresh && atomic_read(&sk->sk_rmem_alloc) + skb->truesize < (unsigned)sk->sk_rcvbuf) { @@ -661,16 +722,16 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe if (copy_skb) skb_set_owner_r(copy_skb, sk); } - snaplen = po->frame_size - macoff; + snaplen = po->rx_ring.frame_size - macoff; if ((int)snaplen < 0) snaplen = 0; } spin_lock(&sk->sk_receive_queue.lock); - h.raw = packet_lookup_frame(po, po->head, TP_STATUS_KERNEL); + h.raw = packet_current_frame(po, &po->rx_ring, TP_STATUS_KERNEL); if (!h.raw) goto ring_is_full; - po->head = po->head != po->frame_max ? po->head+1 : 0; + packet_increment_head(&po->rx_ring); po->stats.tp_packets++; if (copy_skb) { status |= TP_STATUS_COPY; @@ -727,7 +788,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe __packet_set_status(po, h.raw, status); smp_mb(); - { struct page *p_start, *p_end; u8 *h_end = h.raw + macoff + snaplen - 1; @@ -760,10 +820,249 @@ ring_is_full: goto drop_n_restore; } -#endif +static void tpacket_destruct_skb(struct sk_buff *skb) +{ + struct packet_sock *po = pkt_sk(skb->sk); + void * ph; + BUG_ON(skb == NULL); -static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, + if (likely(po->tx_ring.pg_vec)) { + ph = skb_shinfo(skb)->destructor_arg; + BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING); + BUG_ON(atomic_read(&po->tx_ring.pending) == 0); + atomic_dec(&po->tx_ring.pending); + __packet_set_status(po, ph, TP_STATUS_AVAILABLE); + } + + sock_wfree(skb); +} + +static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff * skb, + void * frame, struct net_device *dev, int size_max, + __be16 proto, unsigned char * addr) +{ + union { + struct tpacket_hdr *h1; + struct tpacket2_hdr *h2; + void *raw; + } ph; + int to_write, offset, len, tp_len, nr_frags, len_max; + struct socket *sock = po->sk.sk_socket; + struct page *page; + void *data; + int err; + + ph.raw = frame; + + skb->protocol = proto; + skb->dev = dev; + skb->priority = po->sk.sk_priority; + skb_shinfo(skb)->destructor_arg = ph.raw; + + switch (po->tp_version) { + case TPACKET_V2: + tp_len = ph.h2->tp_len; + break; + default: + tp_len = ph.h1->tp_len; + break; + } + if (unlikely(tp_len > size_max)) { + printk(KERN_ERR "packet size is too long (%d > %d)\n", + tp_len, size_max); + return -EMSGSIZE; + } + + skb_reserve(skb, LL_RESERVED_SPACE(dev)); + skb_reset_network_header(skb); + + data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll); + to_write = tp_len; + + if (sock->type == SOCK_DGRAM) { + err = dev_hard_header(skb, dev, ntohs(proto), addr, + NULL, tp_len); + if (unlikely(err < 0)) + return -EINVAL; + } else if (dev->hard_header_len ) { + /* net device doesn't like empty head */ + if (unlikely(tp_len <= dev->hard_header_len)) { + printk(KERN_ERR "packet size is too short " + "(%d < %d)\n", tp_len, + dev->hard_header_len); + return -EINVAL; + } + + skb_push(skb, dev->hard_header_len); + err = skb_store_bits(skb, 0, data, + dev->hard_header_len); + if (unlikely(err)) + return err; + + data += dev->hard_header_len; + to_write -= dev->hard_header_len; + } + + err = -EFAULT; + page = virt_to_page(data); + offset = offset_in_page(data); + len_max = PAGE_SIZE - offset; + len = ((to_write > len_max) ? len_max : to_write); + + skb->data_len = to_write; + skb->len += to_write; + skb->truesize += to_write; + atomic_add(to_write, &po->sk.sk_wmem_alloc); + + while (likely(to_write)) { + nr_frags = skb_shinfo(skb)->nr_frags; + + if (unlikely(nr_frags >= MAX_SKB_FRAGS)) { + printk(KERN_ERR "Packet exceed the number " + "of skb frags(%lu)\n", + MAX_SKB_FRAGS); + return -EFAULT; + } + + flush_dcache_page(page); + get_page(page); + skb_fill_page_desc(skb, + nr_frags, + page++, offset, len); + to_write -= len; + offset = 0; + len_max = PAGE_SIZE; + len = ((to_write > len_max) ? len_max : to_write); + } + + return tp_len; +} + +static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) +{ + struct socket *sock; + struct sk_buff *skb; + struct net_device *dev; + __be16 proto; + int ifindex, err, reserve = 0; + void * ph; + struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name; + int tp_len, size_max; + unsigned char *addr; + int len_sum = 0; + int status = 0; + + sock = po->sk.sk_socket; + + mutex_lock(&po->pg_vec_lock); + + err = -EBUSY; + if (saddr == NULL) { + ifindex = po->ifindex; + proto = po->num; + addr = NULL; + } else { + err = -EINVAL; + if (msg->msg_namelen < sizeof(struct sockaddr_ll)) + goto out; + if (msg->msg_namelen < (saddr->sll_halen + + offsetof(struct sockaddr_ll, + sll_addr))) + goto out; + ifindex = saddr->sll_ifindex; + proto = saddr->sll_protocol; + addr = saddr->sll_addr; + } + + dev = dev_get_by_index(sock_net(&po->sk), ifindex); + err = -ENXIO; + if (unlikely(dev == NULL)) + goto out; + + reserve = dev->hard_header_len; + + err = -ENETDOWN; + if (unlikely(!(dev->flags & IFF_UP))) + goto out_put; + + size_max = po->tx_ring.frame_size + - sizeof(struct skb_shared_info) + - po->tp_hdrlen + - LL_ALLOCATED_SPACE(dev) + - sizeof(struct sockaddr_ll); + + if (size_max > dev->mtu + reserve) + size_max = dev->mtu + reserve; + + do { + ph = packet_current_frame(po, &po->tx_ring, + TP_STATUS_SEND_REQUEST); + + if (unlikely(ph == NULL)) { + schedule(); + continue; + } + + status = TP_STATUS_SEND_REQUEST; + skb = sock_alloc_send_skb(&po->sk, + LL_ALLOCATED_SPACE(dev) + + sizeof(struct sockaddr_ll), + 0, &err); + + if (unlikely(skb == NULL)) + goto out_status; + + tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto, + addr); + + if (unlikely(tp_len < 0)) { + if (po->tp_loss) { + __packet_set_status(po, ph, + TP_STATUS_AVAILABLE); + packet_increment_head(&po->tx_ring); + kfree_skb(skb); + continue; + } else { + status = TP_STATUS_WRONG_FORMAT; + err = tp_len; + goto out_status; + } + } + + skb->destructor = tpacket_destruct_skb; + __packet_set_status(po, ph, TP_STATUS_SENDING); + atomic_inc(&po->tx_ring.pending); + + status = TP_STATUS_SEND_REQUEST; + err = dev_queue_xmit(skb); + if (unlikely(err > 0 && (err = net_xmit_errno(err)) != 0)) + goto out_xmit; + packet_increment_head(&po->tx_ring); + len_sum += tp_len; + } + while (likely((ph != NULL) || ((!(msg->msg_flags & MSG_DONTWAIT)) + && (atomic_read(&po->tx_ring.pending)))) + ); + + err = len_sum; + goto out_put; + +out_xmit: + skb->destructor = sock_wfree; + atomic_dec(&po->tx_ring.pending); +out_status: + __packet_set_status(po, ph, status); + kfree_skb(skb); +out_put: + dev_put(dev); +out: + mutex_unlock(&po->pg_vec_lock); + return err; +} +#endif + +static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; @@ -854,6 +1153,19 @@ out: return err; } +static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *msg, size_t len) +{ +#ifdef CONFIG_PACKET_MMAP + struct sock *sk = sock->sk; + struct packet_sock *po = pkt_sk(sk); + if (po->tx_ring.pg_vec) + return tpacket_snd(po, msg); + else +#endif + return packet_snd(sock, msg, len); +} + /* * Close a PACKET socket. This is fairly simple. We immediately go * to 'closed' state and remove our protocol entry in the device list. @@ -864,6 +1176,9 @@ static int packet_release(struct socket *sock) struct sock *sk = sock->sk; struct packet_sock *po; struct net *net; +#ifdef CONFIG_PACKET_MMAP + struct tpacket_req req; +#endif if (!sk) return 0; @@ -893,11 +1208,13 @@ static int packet_release(struct socket *sock) packet_flush_mclist(sk); #ifdef CONFIG_PACKET_MMAP - if (po->pg_vec) { - struct tpacket_req req; - memset(&req, 0, sizeof(req)); - packet_set_ring(sk, &req, 1); - } + memset(&req, 0, sizeof(req)); + + if (po->rx_ring.pg_vec) + packet_set_ring(sk, &req, 1, 0); + + if (po->tx_ring.pg_vec) + packet_set_ring(sk, &req, 1, 1); #endif /* @@ -1391,7 +1708,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (level != SOL_PACKET) return -ENOPROTOOPT; - switch(optname) { + switch (optname) { case PACKET_ADD_MEMBERSHIP: case PACKET_DROP_MEMBERSHIP: { @@ -1415,6 +1732,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv #ifdef CONFIG_PACKET_MMAP case PACKET_RX_RING: + case PACKET_TX_RING: { struct tpacket_req req; @@ -1422,7 +1740,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv return -EINVAL; if (copy_from_user(&req,optval,sizeof(req))) return -EFAULT; - return packet_set_ring(sk, &req, 0); + return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING); } case PACKET_COPY_THRESH: { @@ -1442,7 +1760,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (optlen != sizeof(val)) return -EINVAL; - if (po->pg_vec) + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) return -EBUSY; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; @@ -1461,13 +1779,26 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (optlen != sizeof(val)) return -EINVAL; - if (po->pg_vec) + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) return -EBUSY; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; po->tp_reserve = val; return 0; } + case PACKET_LOSS: + { + unsigned int val; + + if (optlen != sizeof(val)) + return -EINVAL; + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) + return -EBUSY; + if (copy_from_user(&val, optval, sizeof(val))) + return -EFAULT; + po->tp_loss = !!val; + return 0; + } #endif case PACKET_AUXDATA: { @@ -1517,7 +1848,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, if (len < 0) return -EINVAL; - switch(optname) { + switch (optname) { case PACKET_STATISTICS: if (len > sizeof(struct tpacket_stats)) len = sizeof(struct tpacket_stats); @@ -1573,6 +1904,12 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, val = po->tp_reserve; data = &val; break; + case PACKET_LOSS: + if (len > sizeof(unsigned int)) + len = sizeof(unsigned int); + val = po->tp_loss; + data = &val; + break; #endif default: return -ENOPROTOOPT; @@ -1643,7 +1980,7 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd, { struct sock *sk = sock->sk; - switch(cmd) { + switch (cmd) { case SIOCOUTQ: { int amount = atomic_read(&sk->sk_wmem_alloc); @@ -1705,13 +2042,17 @@ static unsigned int packet_poll(struct file * file, struct socket *sock, unsigned int mask = datagram_poll(file, sock, wait); spin_lock_bh(&sk->sk_receive_queue.lock); - if (po->pg_vec) { - unsigned last = po->head ? po->head-1 : po->frame_max; - - if (packet_lookup_frame(po, last, TP_STATUS_USER)) + if (po->rx_ring.pg_vec) { + if (!packet_previous_frame(po, &po->rx_ring, TP_STATUS_KERNEL)) mask |= POLLIN | POLLRDNORM; } spin_unlock_bh(&sk->sk_receive_queue.lock); + spin_lock_bh(&sk->sk_write_queue.lock); + if (po->tx_ring.pg_vec) { + if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE)) + mask |= POLLOUT | POLLWRNORM; + } + spin_unlock_bh(&sk->sk_write_queue.lock); return mask; } @@ -1788,21 +2129,33 @@ out_free_pgvec: goto out; } -static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing) +static int packet_set_ring(struct sock *sk, struct tpacket_req *req, + int closing, int tx_ring) { char **pg_vec = NULL; struct packet_sock *po = pkt_sk(sk); int was_running, order = 0; + struct packet_ring_buffer *rb; + struct sk_buff_head *rb_queue; __be16 num; - int err = 0; + int err; - if (req->tp_block_nr) { - int i; + rb = tx_ring ? &po->tx_ring : &po->rx_ring; + rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; - /* Sanity tests and some calculations */ + err = -EBUSY; + if (!closing) { + if (atomic_read(&po->mapped)) + goto out; + if (atomic_read(&rb->pending)) + goto out; + } - if (unlikely(po->pg_vec)) - return -EBUSY; + if (req->tp_block_nr) { + /* Sanity tests and some calculations */ + err = -EBUSY; + if (unlikely(rb->pg_vec)) + goto out; switch (po->tp_version) { case TPACKET_V1: @@ -1813,42 +2166,35 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing break; } + err = -EINVAL; if (unlikely((int)req->tp_block_size <= 0)) - return -EINVAL; + goto out; if (unlikely(req->tp_block_size & (PAGE_SIZE - 1))) - return -EINVAL; + goto out; if (unlikely(req->tp_frame_size < po->tp_hdrlen + - po->tp_reserve)) - return -EINVAL; + po->tp_reserve)) + goto out; if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1))) - return -EINVAL; + goto out; - po->frames_per_block = req->tp_block_size/req->tp_frame_size; - if (unlikely(po->frames_per_block <= 0)) - return -EINVAL; - if (unlikely((po->frames_per_block * req->tp_block_nr) != - req->tp_frame_nr)) - return -EINVAL; + rb->frames_per_block = req->tp_block_size/req->tp_frame_size; + if (unlikely(rb->frames_per_block <= 0)) + goto out; + if (unlikely((rb->frames_per_block * req->tp_block_nr) != + req->tp_frame_nr)) + goto out; err = -ENOMEM; order = get_order(req->tp_block_size); pg_vec = alloc_pg_vec(req, order); if (unlikely(!pg_vec)) goto out; - - for (i = 0; i < req->tp_block_nr; i++) { - void *ptr = pg_vec[i]; - int k; - - for (k = 0; k < po->frames_per_block; k++) { - __packet_set_status(po, ptr, TP_STATUS_KERNEL); - ptr += req->tp_frame_size; - } - } - /* Done */ - } else { + } + /* Done */ + else { + err = -EINVAL; if (unlikely(req->tp_frame_nr)) - return -EINVAL; + goto out; } lock_sock(sk); @@ -1872,23 +2218,24 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing if (closing || atomic_read(&po->mapped) == 0) { err = 0; #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; }) - - spin_lock_bh(&sk->sk_receive_queue.lock); - pg_vec = XC(po->pg_vec, pg_vec); - po->frame_max = (req->tp_frame_nr - 1); - po->head = 0; - po->frame_size = req->tp_frame_size; - spin_unlock_bh(&sk->sk_receive_queue.lock); - - order = XC(po->pg_vec_order, order); - req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr); - - po->pg_vec_pages = req->tp_block_size/PAGE_SIZE; - po->prot_hook.func = po->pg_vec ? tpacket_rcv : packet_rcv; - skb_queue_purge(&sk->sk_receive_queue); + spin_lock_bh(&rb_queue->lock); + pg_vec = XC(rb->pg_vec, pg_vec); + rb->frame_max = (req->tp_frame_nr - 1); + rb->head = 0; + rb->frame_size = req->tp_frame_size; + spin_unlock_bh(&rb_queue->lock); + + order = XC(rb->pg_vec_order, order); + req->tp_block_nr = XC(rb->pg_vec_len, req->tp_block_nr); + + rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE; + po->prot_hook.func = (po->rx_ring.pg_vec) ? + tpacket_rcv : packet_rcv; + skb_queue_purge(rb_queue); #undef XC if (atomic_read(&po->mapped)) - printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped)); + printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", + atomic_read(&po->mapped)); } mutex_unlock(&po->pg_vec_lock); @@ -1909,11 +2256,13 @@ out: return err; } -static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma) +static int packet_mmap(struct file *file, struct socket *sock, + struct vm_area_struct *vma) { struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); - unsigned long size; + unsigned long size, expected_size; + struct packet_ring_buffer *rb; unsigned long start; int err = -EINVAL; int i; @@ -1921,26 +2270,43 @@ static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_st if (vma->vm_pgoff) return -EINVAL; - size = vma->vm_end - vma->vm_start; - mutex_lock(&po->pg_vec_lock); - if (po->pg_vec == NULL) + + expected_size = 0; + for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) { + if (rb->pg_vec) { + expected_size += rb->pg_vec_len + * rb->pg_vec_pages + * PAGE_SIZE; + } + } + + if (expected_size == 0) goto out; - if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE) + + size = vma->vm_end - vma->vm_start; + if (size != expected_size) goto out; start = vma->vm_start; - for (i = 0; i < po->pg_vec_len; i++) { - struct page *page = virt_to_page(po->pg_vec[i]); - int pg_num; - - for (pg_num = 0; pg_num < po->pg_vec_pages; pg_num++, page++) { - err = vm_insert_page(vma, start, page); - if (unlikely(err)) - goto out; - start += PAGE_SIZE; + for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) { + if (rb->pg_vec == NULL) + continue; + + for (i = 0; i < rb->pg_vec_len; i++) { + struct page *page = virt_to_page(rb->pg_vec[i]); + int pg_num; + + for (pg_num = 0; pg_num < rb->pg_vec_pages; + pg_num++,page++) { + err = vm_insert_page(vma, start, page); + if (unlikely(err)) + goto out; + start += PAGE_SIZE; + } } } + atomic_inc(&po->mapped); vma->vm_ops = &packet_mmap_ops; err = 0; -- cgit v1.2.3-70-g09d2 From 690cc3ffe33ac4a2857583c22d4c6244ae11684d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 13 May 2009 16:55:10 +0000 Subject: syscall: Implement a convinience function restart_syscall Currently when we have a signal pending we have the functionality to restart that the current system call. There are other cases such as nasty lock ordering issues where it makes sense to have a simple fix that uses try lock and restarts the system call. Buying time to figure out how to rework the locking strategy. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- include/linux/sched.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index b4c38bc8049..d853f6bb0ba 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2178,6 +2178,12 @@ static inline int test_tsk_need_resched(struct task_struct *tsk) return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); } +static inline int restart_syscall(void) +{ + set_tsk_thread_flag(current, TIF_SIGPENDING); + return -ERESTARTNOINTR; +} + static inline int signal_pending(struct task_struct *p) { return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING)); -- cgit v1.2.3-70-g09d2 From 93f154b594fe47e4a7e5358b309add449a046cd3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 18 May 2009 22:19:19 -0700 Subject: net: release dst entry in dev_hard_start_xmit() One point of contention in high network loads is the dst_release() performed when a transmited skb is freed. This is because NIC tx completion calls dev_kree_skb() long after original call to dev_queue_xmit(skb). CPU cache is cold and the atomic op in dst_release() stalls. On SMP, this is quite visible if one CPU is 100% handling softirqs for a network device, since dst_clone() is done by other cpus, involving cache line ping pongs. It seems right place to release dst is in dev_hard_start_xmit(), for most devices but ones that are virtual, and some exceptions. David Miller suggested to define a new device flag, set in alloc_netdev_mq() (so that most devices set it at init time), and carefuly unset in devices which dont want a NULL skb->dst in their ndo_start_xmit(). List of devices that must clear this flag is : - loopback device, because it calls netif_rx() and quoting Patrick : "ip_route_input() doesn't accept loopback addresses, so loopback packets already need to have a dst_entry attached." - appletalk/ipddp.c : needs skb->dst in its xmit function - And all devices that call again dev_queue_xmit() from their xmit function (as some classifiers need skb->dst) : bonding, vlan, macvlan, eql, ifb, hdlc_fr Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/appletalk/ipddp.c | 1 + drivers/net/bonding/bond_main.c | 1 + drivers/net/eql.c | 1 + drivers/net/ifb.c | 1 + drivers/net/loopback.c | 1 + drivers/net/macvlan.c | 1 + drivers/net/wan/hdlc_fr.c | 1 + include/linux/if.h | 3 +++ net/8021q/vlan_dev.c | 1 + net/core/dev.c | 9 +++++++++ 10 files changed, 20 insertions(+) (limited to 'include') diff --git a/drivers/net/appletalk/ipddp.c b/drivers/net/appletalk/ipddp.c index da64ba88d7f..f939e92fcf8 100644 --- a/drivers/net/appletalk/ipddp.c +++ b/drivers/net/appletalk/ipddp.c @@ -71,6 +71,7 @@ static struct net_device * __init ipddp_init(void) if (!dev) return ERR_PTR(-ENOMEM); + dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; strcpy(dev->name, "ipddp%d"); if (version_printed++ == 0) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 96d7689995c..92a9d69c565 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -5148,6 +5148,7 @@ int bond_create(char *name, struct bond_params *params) goto out_rtnl; } + bond_dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; if (!name) { res = dev_alloc_name(bond_dev, "bond%d"); if (res < 0) diff --git a/drivers/net/eql.c b/drivers/net/eql.c index 5210bb1027c..19b7dd98394 100644 --- a/drivers/net/eql.c +++ b/drivers/net/eql.c @@ -194,6 +194,7 @@ static void __init eql_setup(struct net_device *dev) dev->type = ARPHRD_SLIP; dev->tx_queue_len = 5; /* Hands them off fast */ + dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; } static int eql_open(struct net_device *dev) diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index 60a26300193..96713ef0629 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -156,6 +156,7 @@ static void ifb_setup(struct net_device *dev) dev->flags |= IFF_NOARP; dev->flags &= ~IFF_MULTICAST; + dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; random_ether_addr(dev->dev_addr); } diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 6f71157bea8..da472c68748 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -170,6 +170,7 @@ static void loopback_setup(struct net_device *dev) dev->tx_queue_len = 0; dev->type = ARPHRD_LOOPBACK; /* 0x0001*/ dev->flags = IFF_LOOPBACK; + dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_TSO | NETIF_F_NO_CSUM diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 329cd50d0e2..d5334b41e4b 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -414,6 +414,7 @@ static void macvlan_setup(struct net_device *dev) { ether_setup(dev); + dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; dev->netdev_ops = &macvlan_netdev_ops; dev->destructor = free_netdev; dev->header_ops = &macvlan_hard_header_ops, diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c index 80053010109..bfa0161a02d 100644 --- a/drivers/net/wan/hdlc_fr.c +++ b/drivers/net/wan/hdlc_fr.c @@ -1054,6 +1054,7 @@ static void pvc_setup(struct net_device *dev) dev->flags = IFF_POINTOPOINT; dev->hard_header_len = 10; dev->addr_len = 2; + dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; } static const struct net_device_ops pvc_ops = { diff --git a/include/linux/if.h b/include/linux/if.h index 1108f3e099e..b9a6229f3be 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -67,6 +67,9 @@ #define IFF_ISATAP 0x80 /* ISATAP interface (RFC4214) */ #define IFF_MASTER_ARPMON 0x100 /* bonding master, ARP mon in use */ #define IFF_WAN_HDLC 0x200 /* WAN HDLC device */ +#define IFF_XMIT_DST_RELEASE 0x400 /* dev_hard_start_xmit() is allowed to + * release skb->dst + */ #define IF_GET_IFACE 0x0001 /* for querying only */ #define IF_GET_PROTO 0x0002 diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 8faacee6863..ff7572ac548 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -739,6 +739,7 @@ void vlan_setup(struct net_device *dev) ether_setup(dev); dev->priv_flags |= IFF_802_1Q_VLAN; + dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; dev->tx_queue_len = 0; dev->netdev_ops = &vlan_netdev_ops; diff --git a/net/core/dev.c b/net/core/dev.c index 6d3630d1627..92ebeca2990 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1688,6 +1688,14 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, goto gso; } + /* + * If device doesnt need skb->dst, release it right now while + * its hot in this cpu cache + */ + if ((dev->priv_flags & IFF_XMIT_DST_RELEASE) && skb->dst) { + dst_release(skb->dst); + skb->dst = NULL; + } rc = ops->ndo_start_xmit(skb, dev); /* * TODO: if skb_orphan() was called by @@ -5045,6 +5053,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, netdev_init_queues(dev); INIT_LIST_HEAD(&dev->napi_list); + dev->priv_flags = IFF_XMIT_DST_RELEASE; setup(dev); strcpy(dev->name, name); return dev; -- cgit v1.2.3-70-g09d2 From bfa568d19a3faed3b94978ad48ac15d1b0d7e5bc Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Sat, 2 May 2009 06:16:47 +0400 Subject: powerpc/85xx: Add PCI IDs for MPC8569 family processors This patch adds PCI IDs for MPC8569 and MPC8569E processors, plus adds appropriate quirks for these IDs, and thus makes PCI-E actually work on MPC8569E-MDS boards. Signed-off-by: Anton Vorontsov Signed-off-by: Kumar Gala --- arch/powerpc/sysdev/fsl_pci.c | 2 ++ include/linux/pci_ids.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 376603df7c4..94d8b3feb1e 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -285,6 +285,8 @@ DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8543, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8547E, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8545E, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8545, quirk_fsl_pcie_header); +DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8569E, quirk_fsl_pcie_header); +DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8569, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8568E, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8568, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8567E, quirk_fsl_pcie_header); diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 06ba90c211a..7cc5b80327f 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2274,6 +2274,8 @@ #define PCI_DEVICE_ID_MPC8547E 0x0018 #define PCI_DEVICE_ID_MPC8545E 0x0019 #define PCI_DEVICE_ID_MPC8545 0x001a +#define PCI_DEVICE_ID_MPC8569E 0x0061 +#define PCI_DEVICE_ID_MPC8569 0x0060 #define PCI_DEVICE_ID_MPC8568E 0x0020 #define PCI_DEVICE_ID_MPC8568 0x0021 #define PCI_DEVICE_ID_MPC8567E 0x0022 -- cgit v1.2.3-70-g09d2 From 01af9507ff36578dad89b1cc88ff37ac18e719cb Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Wed, 15 Apr 2009 14:38:40 -0500 Subject: powerpc/85xx: Add P2020DS board support The P2020 is a dual e500v2 core based SOC with: * 3 PCIe controllers * 2 General purpose DMA controllers * 2 sRIO controllers * 3 eTSECS * USB 2.0 * SDHC * SPI, I2C, DUART * enhanced localbus * and optional Security (P2020E) security w/XOR acceleration The p2020 DS reference board is pretty similar to the existing MPC85xx DS boards and has a ULI 1575 connected on one of the PCIe controllers. Signed-off-by: Ted Peters Signed-off-by: Kumar Gala --- arch/powerpc/boot/dts/p2020ds.dts | 704 +++++++++++++++++++++++++++++++ arch/powerpc/platforms/85xx/mpc85xx_ds.c | 43 +- arch/powerpc/platforms/fsl_uli1575.c | 1 + arch/powerpc/sysdev/fsl_pci.c | 2 + include/linux/pci_ids.h | 2 + 5 files changed, 747 insertions(+), 5 deletions(-) create mode 100644 arch/powerpc/boot/dts/p2020ds.dts (limited to 'include') diff --git a/arch/powerpc/boot/dts/p2020ds.dts b/arch/powerpc/boot/dts/p2020ds.dts new file mode 100644 index 00000000000..11019142813 --- /dev/null +++ b/arch/powerpc/boot/dts/p2020ds.dts @@ -0,0 +1,704 @@ +/* + * P2020 DS Device Tree Source + * + * Copyright 2009 Freescale Semiconductor Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +/dts-v1/; +/ { + model = "fsl,P2020"; + compatible = "fsl,P2020DS"; + #address-cells = <2>; + #size-cells = <2>; + + aliases { + ethernet0 = &enet0; + ethernet1 = &enet1; + ethernet2 = &enet2; + serial0 = &serial0; + serial1 = &serial1; + pci0 = &pci0; + pci1 = &pci1; + pci2 = &pci2; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + PowerPC,P2020@0 { + device_type = "cpu"; + reg = <0x0>; + next-level-cache = <&L2>; + }; + + PowerPC,P2020@1 { + device_type = "cpu"; + reg = <0x1>; + next-level-cache = <&L2>; + }; + }; + + memory { + device_type = "memory"; + }; + + localbus@ffe05000 { + #address-cells = <2>; + #size-cells = <1>; + compatible = "fsl,elbc", "simple-bus"; + reg = <0 0xffe05000 0 0x1000>; + interrupts = <19 2>; + interrupt-parent = <&mpic>; + + ranges = <0x0 0x0 0x0 0xe8000000 0x08000000 + 0x1 0x0 0x0 0xe0000000 0x08000000 + 0x2 0x0 0x0 0xffa00000 0x00040000 + 0x3 0x0 0x0 0xffdf0000 0x00008000 + 0x4 0x0 0x0 0xffa40000 0x00040000 + 0x5 0x0 0x0 0xffa80000 0x00040000 + 0x6 0x0 0x0 0xffac0000 0x00040000>; + + nor@0,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "cfi-flash"; + reg = <0x0 0x0 0x8000000>; + bank-width = <2>; + device-width = <1>; + + ramdisk@0 { + reg = <0x0 0x03000000>; + read-only; + }; + + diagnostic@3000000 { + reg = <0x03000000 0x00e00000>; + read-only; + }; + + dink@3e00000 { + reg = <0x03e00000 0x00200000>; + read-only; + }; + + kernel@4000000 { + reg = <0x04000000 0x00400000>; + read-only; + }; + + jffs2@4400000 { + reg = <0x04400000 0x03b00000>; + }; + + dtb@7f00000 { + reg = <0x07f00000 0x00080000>; + read-only; + }; + + u-boot@7f80000 { + reg = <0x07f80000 0x00080000>; + read-only; + }; + }; + + nand@2,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,elbc-fcm-nand"; + reg = <0x2 0x0 0x40000>; + + u-boot@0 { + reg = <0x0 0x02000000>; + read-only; + }; + + jffs2@2000000 { + reg = <0x02000000 0x10000000>; + }; + + ramdisk@12000000 { + reg = <0x12000000 0x08000000>; + read-only; + }; + + kernel@1a000000 { + reg = <0x1a000000 0x04000000>; + }; + + dtb@1e000000 { + reg = <0x1e000000 0x01000000>; + read-only; + }; + + empty@1f000000 { + reg = <0x1f000000 0x21000000>; + }; + }; + + nand@4,0 { + compatible = "fsl,elbc-fcm-nand"; + reg = <0x4 0x0 0x40000>; + }; + + nand@5,0 { + compatible = "fsl,elbc-fcm-nand"; + reg = <0x5 0x0 0x40000>; + }; + + nand@6,0 { + compatible = "fsl,elbc-fcm-nand"; + reg = <0x6 0x0 0x40000>; + }; + }; + + soc@ffe00000 { + #address-cells = <1>; + #size-cells = <1>; + device_type = "soc"; + compatible = "fsl,p2020-immr", "simple-bus"; + ranges = <0x0 0 0xffe00000 0x100000>; + bus-frequency = <0>; // Filled out by uboot. + + ecm-law@0 { + compatible = "fsl,ecm-law"; + reg = <0x0 0x1000>; + fsl,num-laws = <12>; + }; + + ecm@1000 { + compatible = "fsl,p2020-ecm", "fsl,ecm"; + reg = <0x1000 0x1000>; + interrupts = <17 2>; + interrupt-parent = <&mpic>; + }; + + memory-controller@2000 { + compatible = "fsl,p2020-memory-controller"; + reg = <0x2000 0x1000>; + interrupt-parent = <&mpic>; + interrupts = <18 2>; + }; + + i2c@3000 { + #address-cells = <1>; + #size-cells = <0>; + cell-index = <0>; + compatible = "fsl-i2c"; + reg = <0x3000 0x100>; + interrupts = <43 2>; + interrupt-parent = <&mpic>; + dfsrr; + }; + + i2c@3100 { + #address-cells = <1>; + #size-cells = <0>; + cell-index = <1>; + compatible = "fsl-i2c"; + reg = <0x3100 0x100>; + interrupts = <43 2>; + interrupt-parent = <&mpic>; + dfsrr; + }; + + serial0: serial@4500 { + cell-index = <0>; + device_type = "serial"; + compatible = "ns16550"; + reg = <0x4500 0x100>; + clock-frequency = <0>; + interrupts = <42 2>; + interrupt-parent = <&mpic>; + }; + + serial1: serial@4600 { + cell-index = <1>; + device_type = "serial"; + compatible = "ns16550"; + reg = <0x4600 0x100>; + clock-frequency = <0>; + interrupts = <42 2>; + interrupt-parent = <&mpic>; + }; + + spi@7000 { + compatible = "fsl,espi"; + reg = <0x7000 0x1000>; + interrupts = <59 0x2>; + interrupt-parent = <&mpic>; + }; + + dma@c300 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,eloplus-dma"; + reg = <0xc300 0x4>; + ranges = <0x0 0xc100 0x200>; + cell-index = <1>; + dma-channel@0 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x0 0x80>; + cell-index = <0>; + interrupt-parent = <&mpic>; + interrupts = <76 2>; + }; + dma-channel@80 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x80 0x80>; + cell-index = <1>; + interrupt-parent = <&mpic>; + interrupts = <77 2>; + }; + dma-channel@100 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x100 0x80>; + cell-index = <2>; + interrupt-parent = <&mpic>; + interrupts = <78 2>; + }; + dma-channel@180 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x180 0x80>; + cell-index = <3>; + interrupt-parent = <&mpic>; + interrupts = <79 2>; + }; + }; + + gpio: gpio-controller@f000 { + #gpio-cells = <2>; + compatible = "fsl,mpc8572-gpio"; + reg = <0xf000 0x100>; + interrupts = <47 0x2>; + interrupt-parent = <&mpic>; + gpio-controller; + }; + + L2: l2-cache-controller@20000 { + compatible = "fsl,p2020-l2-cache-controller"; + reg = <0x20000 0x1000>; + cache-line-size = <32>; // 32 bytes + cache-size = <0x80000>; // L2, 512k + interrupt-parent = <&mpic>; + interrupts = <16 2>; + }; + + dma@21300 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,eloplus-dma"; + reg = <0x21300 0x4>; + ranges = <0x0 0x21100 0x200>; + cell-index = <0>; + dma-channel@0 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x0 0x80>; + cell-index = <0>; + interrupt-parent = <&mpic>; + interrupts = <20 2>; + }; + dma-channel@80 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x80 0x80>; + cell-index = <1>; + interrupt-parent = <&mpic>; + interrupts = <21 2>; + }; + dma-channel@100 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x100 0x80>; + cell-index = <2>; + interrupt-parent = <&mpic>; + interrupts = <22 2>; + }; + dma-channel@180 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x180 0x80>; + cell-index = <3>; + interrupt-parent = <&mpic>; + interrupts = <23 2>; + }; + }; + + usb@22000 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl-usb2-dr"; + reg = <0x22000 0x1000>; + interrupt-parent = <&mpic>; + interrupts = <28 0x2>; + phy_type = "ulpi"; + }; + + enet0: ethernet@24000 { + #address-cells = <1>; + #size-cells = <1>; + cell-index = <0>; + device_type = "network"; + model = "eTSEC"; + compatible = "gianfar"; + reg = <0x24000 0x1000>; + ranges = <0x0 0x24000 0x1000>; + local-mac-address = [ 00 00 00 00 00 00 ]; + interrupts = <29 2 30 2 34 2>; + interrupt-parent = <&mpic>; + tbi-handle = <&tbi0>; + phy-handle = <&phy0>; + phy-connection-type = "rgmii-id"; + + mdio@520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-mdio"; + reg = <0x520 0x20>; + + phy0: ethernet-phy@0 { + interrupt-parent = <&mpic>; + interrupts = <3 1>; + reg = <0x0>; + }; + phy1: ethernet-phy@1 { + interrupt-parent = <&mpic>; + interrupts = <3 1>; + reg = <0x1>; + }; + phy2: ethernet-phy@2 { + interrupt-parent = <&mpic>; + interrupts = <3 1>; + reg = <0x2>; + }; + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + }; + + enet1: ethernet@25000 { + #address-cells = <1>; + #size-cells = <1>; + cell-index = <1>; + device_type = "network"; + model = "eTSEC"; + compatible = "gianfar"; + reg = <0x25000 0x1000>; + ranges = <0x0 0x25000 0x1000>; + local-mac-address = [ 00 00 00 00 00 00 ]; + interrupts = <35 2 36 2 40 2>; + interrupt-parent = <&mpic>; + tbi-handle = <&tbi1>; + phy-handle = <&phy1>; + phy-connection-type = "rgmii-id"; + + mdio@520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x520 0x20>; + + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + }; + + enet2: ethernet@26000 { + #address-cells = <1>; + #size-cells = <1>; + cell-index = <2>; + device_type = "network"; + model = "eTSEC"; + compatible = "gianfar"; + reg = <0x26000 0x1000>; + ranges = <0x0 0x26000 0x1000>; + local-mac-address = [ 00 00 00 00 00 00 ]; + interrupts = <31 2 32 2 33 2>; + interrupt-parent = <&mpic>; + tbi-handle = <&tbi2>; + phy-handle = <&phy2>; + phy-connection-type = "rgmii-id"; + + mdio@520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x520 0x20>; + + tbi2: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + }; + + sdhci@2e000 { + compatible = "fsl,p2020-esdhc", "fsl,esdhc"; + reg = <0x2e000 0x1000>; + interrupts = <72 0x2>; + interrupt-parent = <&mpic>; + /* Filled in by U-Boot */ + clock-frequency = <0>; + }; + + crypto@30000 { + compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4", + "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0"; + reg = <0x30000 0x10000>; + interrupts = <45 2 58 2>; + interrupt-parent = <&mpic>; + fsl,num-channels = <4>; + fsl,channel-fifo-len = <24>; + fsl,exec-units-mask = <0xbfe>; + fsl,descriptor-types-mask = <0x3ab0ebf>; + }; + + mpic: pic@40000 { + interrupt-controller; + #address-cells = <0>; + #interrupt-cells = <2>; + reg = <0x40000 0x40000>; + compatible = "chrp,open-pic"; + device_type = "open-pic"; + }; + + msi@41600 { + compatible = "fsl,mpic-msi"; + reg = <0x41600 0x80>; + msi-available-ranges = <0 0x100>; + interrupts = < + 0xe0 0 + 0xe1 0 + 0xe2 0 + 0xe3 0 + 0xe4 0 + 0xe5 0 + 0xe6 0 + 0xe7 0>; + interrupt-parent = <&mpic>; + }; + + global-utilities@e0000 { //global utilities block + compatible = "fsl,p2020-guts"; + reg = <0xe0000 0x1000>; + fsl,has-rstcr; + }; + }; + + pci0: pcie@ffe08000 { + compatible = "fsl,mpc8548-pcie"; + device_type = "pci"; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + reg = <0 0xffe08000 0 0x1000>; + bus-range = <0 255>; + ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000 + 0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>; + clock-frequency = <33333333>; + interrupt-parent = <&mpic>; + interrupts = <24 2>; + interrupt-map-mask = <0xf800 0x0 0x0 0x7>; + interrupt-map = < + /* IDSEL 0x0 */ + 0000 0x0 0x0 0x1 &mpic 0x8 0x1 + 0000 0x0 0x0 0x2 &mpic 0x9 0x1 + 0000 0x0 0x0 0x3 &mpic 0xa 0x1 + 0000 0x0 0x0 0x4 &mpic 0xb 0x1 + >; + pcie@0 { + reg = <0x0 0x0 0x0 0x0 0x0>; + #size-cells = <2>; + #address-cells = <3>; + device_type = "pci"; + ranges = <0x2000000 0x0 0x80000000 + 0x2000000 0x0 0x80000000 + 0x0 0x20000000 + + 0x1000000 0x0 0x0 + 0x1000000 0x0 0x0 + 0x0 0x10000>; + }; + }; + + pci1: pcie@ffe09000 { + compatible = "fsl,mpc8548-pcie"; + device_type = "pci"; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + reg = <0 0xffe09000 0 0x1000>; + bus-range = <0 255>; + ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000 + 0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>; + clock-frequency = <33333333>; + interrupt-parent = <&mpic>; + interrupts = <25 2>; + interrupt-map-mask = <0xff00 0x0 0x0 0x7>; + interrupt-map = < + + // IDSEL 0x11 func 0 - PCI slot 1 + 0x8800 0x0 0x0 0x1 &i8259 0x9 0x2 + 0x8800 0x0 0x0 0x2 &i8259 0xa 0x2 + + // IDSEL 0x11 func 1 - PCI slot 1 + 0x8900 0x0 0x0 0x1 &i8259 0x9 0x2 + 0x8900 0x0 0x0 0x2 &i8259 0xa 0x2 + + // IDSEL 0x11 func 2 - PCI slot 1 + 0x8a00 0x0 0x0 0x1 &i8259 0x9 0x2 + 0x8a00 0x0 0x0 0x2 &i8259 0xa 0x2 + + // IDSEL 0x11 func 3 - PCI slot 1 + 0x8b00 0x0 0x0 0x1 &i8259 0x9 0x2 + 0x8b00 0x0 0x0 0x2 &i8259 0xa 0x2 + + // IDSEL 0x11 func 4 - PCI slot 1 + 0x8c00 0x0 0x0 0x1 &i8259 0x9 0x2 + 0x8c00 0x0 0x0 0x2 &i8259 0xa 0x2 + + // IDSEL 0x11 func 5 - PCI slot 1 + 0x8d00 0x0 0x0 0x1 &i8259 0x9 0x2 + 0x8d00 0x0 0x0 0x2 &i8259 0xa 0x2 + + // IDSEL 0x11 func 6 - PCI slot 1 + 0x8e00 0x0 0x0 0x1 &i8259 0x9 0x2 + 0x8e00 0x0 0x0 0x2 &i8259 0xa 0x2 + + // IDSEL 0x11 func 7 - PCI slot 1 + 0x8f00 0x0 0x0 0x1 &i8259 0x9 0x2 + 0x8f00 0x0 0x0 0x2 &i8259 0xa 0x2 + + // IDSEL 0x1d Audio + 0xe800 0x0 0x0 0x1 &i8259 0x6 0x2 + + // IDSEL 0x1e Legacy + 0xf000 0x0 0x0 0x1 &i8259 0x7 0x2 + 0xf100 0x0 0x0 0x1 &i8259 0x7 0x2 + + // IDSEL 0x1f IDE/SATA + 0xf800 0x0 0x0 0x1 &i8259 0xe 0x2 + 0xf900 0x0 0x0 0x1 &i8259 0x5 0x2 + >; + + pcie@0 { + reg = <0x0 0x0 0x0 0x0 0x0>; + #size-cells = <2>; + #address-cells = <3>; + device_type = "pci"; + ranges = <0x2000000 0x0 0xa0000000 + 0x2000000 0x0 0xa0000000 + 0x0 0x20000000 + + 0x1000000 0x0 0x0 + 0x1000000 0x0 0x0 + 0x0 0x10000>; + uli1575@0 { + reg = <0x0 0x0 0x0 0x0 0x0>; + #size-cells = <2>; + #address-cells = <3>; + ranges = <0x2000000 0x0 0xa0000000 + 0x2000000 0x0 0xa0000000 + 0x0 0x20000000 + + 0x1000000 0x0 0x0 + 0x1000000 0x0 0x0 + 0x0 0x10000>; + isa@1e { + device_type = "isa"; + #interrupt-cells = <2>; + #size-cells = <1>; + #address-cells = <2>; + reg = <0xf000 0x0 0x0 0x0 0x0>; + ranges = <0x1 0x0 0x1000000 0x0 0x0 + 0x1000>; + interrupt-parent = <&i8259>; + + i8259: interrupt-controller@20 { + reg = <0x1 0x20 0x2 + 0x1 0xa0 0x2 + 0x1 0x4d0 0x2>; + interrupt-controller; + device_type = "interrupt-controller"; + #address-cells = <0>; + #interrupt-cells = <2>; + compatible = "chrp,iic"; + interrupts = <4 1>; + interrupt-parent = <&mpic>; + }; + + i8042@60 { + #size-cells = <0>; + #address-cells = <1>; + reg = <0x1 0x60 0x1 0x1 0x64 0x1>; + interrupts = <1 3 12 3>; + interrupt-parent = + <&i8259>; + + keyboard@0 { + reg = <0x0>; + compatible = "pnpPNP,303"; + }; + + mouse@1 { + reg = <0x1>; + compatible = "pnpPNP,f03"; + }; + }; + + rtc@70 { + compatible = "pnpPNP,b00"; + reg = <0x1 0x70 0x2>; + }; + + gpio@400 { + reg = <0x1 0x400 0x80>; + }; + }; + }; + }; + + }; + + pci2: pcie@ffe0a000 { + compatible = "fsl,mpc8548-pcie"; + device_type = "pci"; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + reg = <0 0xffe0a000 0 0x1000>; + bus-range = <0 255>; + ranges = <0x2000000 0x0 0xc0000000 0 0xc0000000 0x0 0x20000000 + 0x1000000 0x0 0x00000000 0 0xffc20000 0x0 0x10000>; + clock-frequency = <33333333>; + interrupt-parent = <&mpic>; + interrupts = <26 2>; + interrupt-map-mask = <0xf800 0x0 0x0 0x7>; + interrupt-map = < + /* IDSEL 0x0 */ + 0000 0x0 0x0 0x1 &mpic 0x0 0x1 + 0000 0x0 0x0 0x2 &mpic 0x1 0x1 + 0000 0x0 0x0 0x3 &mpic 0x2 0x1 + 0000 0x0 0x0 0x4 &mpic 0x3 0x1 + >; + pcie@0 { + reg = <0x0 0x0 0x0 0x0 0x0>; + #size-cells = <2>; + #address-cells = <3>; + device_type = "pci"; + ranges = <0x2000000 0x0 0xc0000000 + 0x2000000 0x0 0xc0000000 + 0x0 0x20000000 + + 0x1000000 0x0 0x0 + 0x1000000 0x0 0x0 + 0x0 0x10000>; + }; + }; +}; diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/arch/powerpc/platforms/85xx/mpc85xx_ds.c index de66de7a9ca..53d5851a6c9 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_ds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c @@ -163,7 +163,8 @@ static void __init mpc85xx_ds_setup_arch(void) #ifdef CONFIG_PCI for_each_node_by_type(np, "pci") { if (of_device_is_compatible(np, "fsl,mpc8540-pci") || - of_device_is_compatible(np, "fsl,mpc8548-pcie")) { + of_device_is_compatible(np, "fsl,mpc8548-pcie") || + of_device_is_compatible(np, "fsl,p2020-pcie")) { struct resource rsrc; of_address_to_resource(np, 0, &rsrc); if ((rsrc.start & 0xfffff) == primary_phb_addr) @@ -195,9 +196,9 @@ static int __init mpc8544_ds_probe(void) primary_phb_addr = 0xb000; #endif return 1; - } else { - return 0; } + + return 0; } static struct of_device_id __initdata mpc85xxds_ids[] = { @@ -214,6 +215,7 @@ static int __init mpc85xxds_publish_devices(void) } machine_device_initcall(mpc8544_ds, mpc85xxds_publish_devices); machine_device_initcall(mpc8572_ds, mpc85xxds_publish_devices); +machine_device_initcall(p2020_ds, mpc85xxds_publish_devices); /* * Called very early, device-tree isn't unflattened @@ -227,9 +229,26 @@ static int __init mpc8572_ds_probe(void) primary_phb_addr = 0x8000; #endif return 1; - } else { - return 0; } + + return 0; +} + +/* + * Called very early, device-tree isn't unflattened + */ +static int __init p2020_ds_probe(void) +{ + unsigned long root = of_get_flat_dt_root(); + + if (of_flat_dt_is_compatible(root, "fsl,P2020DS")) { +#ifdef CONFIG_PCI + primary_phb_addr = 0x9000; +#endif + return 1; + } + + return 0; } define_machine(mpc8544_ds) { @@ -259,3 +278,17 @@ define_machine(mpc8572_ds) { .calibrate_decr = generic_calibrate_decr, .progress = udbg_progress, }; + +define_machine(p2020_ds) { + .name = "P2020 DS", + .probe = p2020_ds_probe, + .setup_arch = mpc85xx_ds_setup_arch, + .init_IRQ = mpc85xx_ds_pic_init, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, +#endif + .get_irq = mpic_get_irq, + .restart = fsl_rstcr_restart, + .calibrate_decr = generic_calibrate_decr, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/fsl_uli1575.c b/arch/powerpc/platforms/fsl_uli1575.c index efd41f40984..fd23a1d4b39 100644 --- a/arch/powerpc/platforms/fsl_uli1575.c +++ b/arch/powerpc/platforms/fsl_uli1575.c @@ -55,6 +55,7 @@ static inline bool is_quirk_valid(void) { return (machine_is(mpc86xx_hpcn) || machine_is(mpc8544_ds) || + machine_is(p2020_ds) || machine_is(mpc8572_ds)); } diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 94d8b3feb1e..b20171d9df0 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -302,6 +302,8 @@ DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8536, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8641, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8641D, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8610, quirk_fsl_pcie_header); +DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P2020E, quirk_fsl_pcie_header); +DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P2020, quirk_fsl_pcie_header); #endif /* CONFIG_PPC_85xx || CONFIG_PPC_86xx */ #if defined(CONFIG_PPC_83xx) || defined(CONFIG_PPC_MPC512x) diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 7cc5b80327f..bfa0402d343 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2288,6 +2288,8 @@ #define PCI_DEVICE_ID_MPC8572 0x0041 #define PCI_DEVICE_ID_MPC8536E 0x0050 #define PCI_DEVICE_ID_MPC8536 0x0051 +#define PCI_DEVICE_ID_P2020E 0x0070 +#define PCI_DEVICE_ID_P2020 0x0071 #define PCI_DEVICE_ID_MPC8641 0x7010 #define PCI_DEVICE_ID_MPC8641D 0x7011 #define PCI_DEVICE_ID_MPC8610 0x7018 -- cgit v1.2.3-70-g09d2 From 4200efd9acda4accf24640f1e77d24fdcdb524df Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 19 May 2009 09:22:19 +0200 Subject: sched: properly define the sched_group::cpumask and sched_domain::span fields Properly document the variable-size structure tricks we are doing wrt. struct sched_group and sched_domain, and use the field[0] GCC extension instead of defining a vla array. Dont use unions for this, as pointed out by Linus. [ Impact: cleanup, un-confuse Sparse and LLVM ] Reported-by: Jeff Garzik Acked-by: Linus Torvalds LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/sched.h | 25 ++++++++++++++++++++++--- kernel/sched.c | 5 +++-- 2 files changed, 25 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index de7b3b21777..dbb1043e865 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -839,7 +839,17 @@ struct sched_group { */ u32 reciprocal_cpu_power; - unsigned long cpumask[]; + /* + * The CPUs this group covers. + * + * NOTE: this field is variable length. (Allocated dynamically + * by attaching extra space to the end of the structure, + * depending on how many CPUs the kernel has booted up with) + * + * It is also be embedded into static data structures at build + * time. (See 'struct static_sched_group' in kernel/sched.c) + */ + unsigned long cpumask[0]; }; static inline struct cpumask *sched_group_cpus(struct sched_group *sg) @@ -925,8 +935,17 @@ struct sched_domain { char *name; #endif - /* span of all CPUs in this domain */ - unsigned long span[]; + /* + * Span of all CPUs in this domain. + * + * NOTE: this field is variable length. (Allocated dynamically + * by attaching extra space to the end of the structure, + * depending on how many CPUs the kernel has booted up with) + * + * It is also be embedded into static data structures at build + * time. (See 'struct static_sched_domain' in kernel/sched.c) + */ + unsigned long span[0]; }; static inline struct cpumask *sched_domain_span(struct sched_domain *sd) diff --git a/kernel/sched.c b/kernel/sched.c index 497c09ba61e..228acae8821 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7948,8 +7948,9 @@ int sched_smt_power_savings = 0, sched_mc_power_savings = 0; /* * The cpus mask in sched_group and sched_domain hangs off the end. - * FIXME: use cpumask_var_t or dynamic percpu alloc to avoid wasting space - * for nr_cpu_ids < CONFIG_NR_CPUS. + * + * ( See the the comments in include/linux/sched.h:struct sched_group + * and struct sched_domain. ) */ struct static_sched_group { struct sched_group sg; -- cgit v1.2.3-70-g09d2 From 79eb63e9e5875b84341a3a05f8e6ae9cdb4bb6f6 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 17 May 2009 18:57:15 +0300 Subject: block: Add blk_make_request(), takes bio, returns a request New block API: given a struct bio allocates a new request. This is the parallel of generic_make_request for BLOCK_PC commands users. The passed bio may be a chained-bio. The bio is bounced if needed inside the call to this member. This is in the effort of un-exporting blk_rq_append_bio(). Signed-off-by: Boaz Harrosh CC: Jeff Garzik Signed-off-by: Jens Axboe --- block/blk-core.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/blkdev.h | 2 ++ 2 files changed, 47 insertions(+) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index e3f7e6a3a09..bec1d69952d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -890,6 +890,51 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) } EXPORT_SYMBOL(blk_get_request); +/** + * blk_make_request - given a bio, allocate a corresponding struct request. + * + * @bio: The bio describing the memory mappings that will be submitted for IO. + * It may be a chained-bio properly constructed by block/bio layer. + * + * blk_make_request is the parallel of generic_make_request for BLOCK_PC + * type commands. Where the struct request needs to be farther initialized by + * the caller. It is passed a &struct bio, which describes the memory info of + * the I/O transfer. + * + * The caller of blk_make_request must make sure that bi_io_vec + * are set to describe the memory buffers. That bio_data_dir() will return + * the needed direction of the request. (And all bio's in the passed bio-chain + * are properly set accordingly) + * + * If called under none-sleepable conditions, mapped bio buffers must not + * need bouncing, by calling the appropriate masked or flagged allocator, + * suitable for the target device. Otherwise the call to blk_queue_bounce will + * BUG. + */ +struct request *blk_make_request(struct request_queue *q, struct bio *bio, + gfp_t gfp_mask) +{ + struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask); + + if (unlikely(!rq)) + return ERR_PTR(-ENOMEM); + + for_each_bio(bio) { + struct bio *bounce_bio = bio; + int ret; + + blk_queue_bounce(q, &bounce_bio); + ret = blk_rq_append_bio(q, rq, bounce_bio); + if (unlikely(ret)) { + blk_put_request(rq); + return ERR_PTR(ret); + } + } + + return rq; +} +EXPORT_SYMBOL(blk_make_request); + /** * blk_requeue_request - put a request back on queue * @q: request queue where request should be inserted diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f9d60a78c08..88a83e112c9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -740,6 +740,8 @@ extern void blk_rq_init(struct request_queue *q, struct request *rq); extern void blk_put_request(struct request *); extern void __blk_put_request(struct request_queue *, struct request *); extern struct request *blk_get_request(struct request_queue *, int, gfp_t); +extern struct request *blk_make_request(struct request_queue *, struct bio *, + gfp_t); extern void blk_insert_request(struct request_queue *, struct request *, int, void *); extern void blk_requeue_request(struct request_queue *, struct request *); extern int blk_rq_check_limits(struct request_queue *q, struct request *rq); -- cgit v1.2.3-70-g09d2 From a411f4bbb89f1f08687b344064d6775bce1e4658 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 17 May 2009 19:00:01 +0300 Subject: block: Un-export blk_rq_append_bio OSD was the last in-tree user of blk_rq_append_bio(). Now that it is fixed blk_rq_append_bio is un-exported and is only used internally by block layer. Signed-off-by: Boaz Harrosh Signed-off-by: Jens Axboe --- block/blk-map.c | 1 - block/blk.h | 2 ++ include/linux/blkdev.h | 6 ------ 3 files changed, 2 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/block/blk-map.c b/block/blk-map.c index caa05a66774..ef2492adca7 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -24,7 +24,6 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq, } return 0; } -EXPORT_SYMBOL(blk_rq_append_bio); static int __blk_rq_unmap_user(struct bio *bio) { diff --git a/block/blk.h b/block/blk.h index 9e0042ca949..c863ec2281e 100644 --- a/block/blk.h +++ b/block/blk.h @@ -13,6 +13,8 @@ extern struct kobj_type blk_queue_ktype; void init_request_from_bio(struct request *req, struct bio *bio); void blk_rq_bio_prep(struct request_queue *q, struct request *rq, struct bio *bio); +int blk_rq_append_bio(struct request_queue *q, struct request *rq, + struct bio *bio); void blk_dequeue_request(struct request *rq); void __blk_queue_free_tags(struct request_queue *q); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 88a83e112c9..564445be7a6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -757,12 +757,6 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, struct scsi_ioctl_command __user *); -/* - * Temporary export, until SCSI gets fixed up. - */ -extern int blk_rq_append_bio(struct request_queue *q, struct request *rq, - struct bio *bio); - /* * A queue has just exitted congestion. Note this in the global counter of * congested queues, and wake up anyone who was waiting for requests to be -- cgit v1.2.3-70-g09d2 From c44d70a340554a33071339064a303ac0f1a31623 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 17 May 2009 11:24:08 +0200 Subject: perf_counter: fix counter inheritance race Context rotation should not occur when we are in the middle of walking the counter list when inheriting counters ... [ Impact: fix occasionally incorrect perf stat results ] Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Cc: Arnaldo Carvalho de Melo Cc: Marcelo Tosatti Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 1 + kernel/perf_counter.c | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index c8c1dfc22c9..13cb2fbbf33 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -508,6 +508,7 @@ struct perf_counter_context { int nr_counters; int nr_active; int is_active; + int rr_allowed; struct task_struct *task; /* diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 7af16d1c480..4d8f97375f3 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1120,7 +1120,8 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) __perf_counter_task_sched_out(ctx); rotate_ctx(&cpuctx->ctx); - rotate_ctx(ctx); + if (ctx->rr_allowed) + rotate_ctx(ctx); perf_counter_cpu_sched_in(cpuctx, cpu); perf_counter_task_sched_in(curr, cpu); @@ -3108,6 +3109,7 @@ __perf_counter_init_context(struct perf_counter_context *ctx, mutex_init(&ctx->mutex); INIT_LIST_HEAD(&ctx->counter_list); INIT_LIST_HEAD(&ctx->event_list); + ctx->rr_allowed = 1; ctx->task = task; } @@ -3348,6 +3350,9 @@ void perf_counter_init_task(struct task_struct *child) */ mutex_lock(&parent_ctx->mutex); + parent_ctx->rr_allowed = 0; + barrier(); /* irqs */ + /* * We dont have to disable NMIs - we are only looking at * the list, not manipulating it: @@ -3361,6 +3366,9 @@ void perf_counter_init_task(struct task_struct *child) break; } + barrier(); /* irqs */ + parent_ctx->rr_allowed = 1; + mutex_unlock(&parent_ctx->mutex); } -- cgit v1.2.3-70-g09d2 From 645069299a1c7358cf7330afe293f07552f11a5d Mon Sep 17 00:00:00 2001 From: Sascha Hlusiak Date: Tue, 19 May 2009 12:56:52 +0000 Subject: sit: stateless autoconf for isatap be sent periodically. The rs_delay can be speficied when adding the PRL entry and defaults to 15 minutes. The RS is sent from every link local adress that's assigned to the tunnel interface. It's directed to the (guessed) linklocal address of the router and is sent through the tunnel. Better: send to ff02::2 encapsuled in unicast directed to router-v4. Signed-off-by: Sascha Hlusiak Signed-off-by: David S. Miller --- include/linux/if_tunnel.h | 2 +- include/net/ipip.h | 7 ++++++ net/ipv6/ndisc.c | 1 + net/ipv6/sit.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 67 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h index 5a9aae4adb4..5eb9b0f857e 100644 --- a/include/linux/if_tunnel.h +++ b/include/linux/if_tunnel.h @@ -44,7 +44,7 @@ struct ip_tunnel_prl { __u16 flags; __u16 __reserved; __u32 datalen; - __u32 __reserved2; + __u32 rs_delay; /* data follows */ }; diff --git a/include/net/ipip.h b/include/net/ipip.h index fdf9bd74370..5d3036fa151 100644 --- a/include/net/ipip.h +++ b/include/net/ipip.h @@ -28,11 +28,18 @@ struct ip_tunnel unsigned int prl_count; /* # of entries in PRL */ }; +/* ISATAP: default interval between RS in secondy */ +#define IPTUNNEL_RS_DEFAULT_DELAY (900) + struct ip_tunnel_prl_entry { struct ip_tunnel_prl_entry *next; __be32 addr; u16 flags; + unsigned long rs_delay; + struct timer_list rs_timer; + struct ip_tunnel *tunnel; + spinlock_t lock; }; #define IPTUNNEL_XMIT() do { \ diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index ab65cc51b00..e09f12ee57c 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -658,6 +658,7 @@ void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr, &icmp6h, NULL, send_sllao ? ND_OPT_SOURCE_LL_ADDR : 0); } +EXPORT_SYMBOL(ndisc_send_rs); static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 3fd060076e7..b3a59bd40f0 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -15,6 +15,7 @@ * Roger Venning : 6to4 support * Nate Thompson : 6to4 support * Fred Templin : isatap support + * Sascha Hlusiak : stateless autoconf for isatap */ #include @@ -222,6 +223,44 @@ failed: return NULL; } +static void ipip6_tunnel_rs_timer(unsigned long data) +{ + struct ip_tunnel_prl_entry *p = (struct ip_tunnel_prl_entry *) data; + struct inet6_dev *ifp; + struct inet6_ifaddr *addr; + + spin_lock(&p->lock); + ifp = __in6_dev_get(p->tunnel->dev); + + read_lock_bh(&ifp->lock); + for (addr = ifp->addr_list; addr; addr = addr->if_next) { + struct in6_addr rtr; + + if (!(ipv6_addr_type(&addr->addr) & IPV6_ADDR_LINKLOCAL)) + continue; + + /* Send RS to guessed linklocal address of router + * + * Better: send to ff02::2 encapsuled in unicast directly + * to router-v4 instead of guessing the v6 address. + * + * Cisco/Windows seem to not set the u/l bit correctly, + * so we won't guess right. + */ + ipv6_addr_set(&rtr, htonl(0xFE800000), 0, 0, 0); + if (!__ipv6_isatap_ifid(rtr.s6_addr + 8, + p->addr)) { + ndisc_send_rs(p->tunnel->dev, &addr->addr, &rtr); + } + } + read_unlock_bh(&ifp->lock); + + mod_timer(&p->rs_timer, jiffies + HZ * p->rs_delay); + spin_unlock(&p->lock); + + return; +} + static struct ip_tunnel_prl_entry * __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr) { @@ -280,6 +319,7 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t, continue; kp[c].addr = prl->addr; kp[c].flags = prl->flags; + kp[c].rs_delay = prl->rs_delay; c++; if (kprl.addr != htonl(INADDR_ANY)) break; @@ -329,11 +369,23 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg) } p->next = t->prl; + p->tunnel = t; t->prl = p; t->prl_count++; + + spin_lock_init(&p->lock); + setup_timer(&p->rs_timer, ipip6_tunnel_rs_timer, (unsigned long) p); update: p->addr = a->addr; p->flags = a->flags; + p->rs_delay = a->rs_delay; + if (p->rs_delay == 0) + p->rs_delay = IPTUNNEL_RS_DEFAULT_DELAY; + spin_lock(&p->lock); + del_timer(&p->rs_timer); + if (p->flags & PRL_DEFAULT) + mod_timer(&p->rs_timer, jiffies + 1); + spin_unlock(&p->lock); out: write_unlock(&ipip6_lock); return err; @@ -352,6 +404,9 @@ ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) if ((*p)->addr == a->addr) { x = *p; *p = x->next; + spin_lock(&x->lock); + del_timer(&x->rs_timer); + spin_unlock(&x->lock); kfree(x); t->prl_count--; goto out; @@ -362,6 +417,9 @@ ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) while (t->prl) { x = t->prl; t->prl = t->prl->next; + spin_lock(&x->lock); + del_timer(&x->rs_timer); + spin_unlock(&x->lock); kfree(x); t->prl_count--; } -- cgit v1.2.3-70-g09d2 From 86579a4cccf18a2ddbf7de8fc5a0f5d9b94ed76d Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Mon, 18 May 2009 16:04:44 -0700 Subject: Input: ads7846 - support swapping x and y axes Signed-off-by: Michael Roth Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/ads7846.c | 7 +++++++ include/linux/spi/ads7846.h | 1 + 2 files changed, 8 insertions(+) (limited to 'include') diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c index 2b01e56568f..b5ad252f5cf 100644 --- a/drivers/input/touchscreen/ads7846.c +++ b/drivers/input/touchscreen/ads7846.c @@ -97,6 +97,8 @@ struct ads7846 { u16 x_plate_ohms; u16 pressure_max; + bool swap_xy; + struct ads7846_packet *packet; struct spi_transfer xfer[18]; @@ -599,6 +601,10 @@ static void ads7846_rx(void *ads) dev_dbg(&ts->spi->dev, "DOWN\n"); #endif } + + if (ts->swap_xy) + swap(x, y); + input_report_abs(input, ABS_X, x); input_report_abs(input, ABS_Y, y); input_report_abs(input, ABS_PRESSURE, Rt); @@ -917,6 +923,7 @@ static int __devinit ads7846_probe(struct spi_device *spi) ts->spi = spi; ts->input = input_dev; ts->vref_mv = pdata->vref_mv; + ts->swap_xy = pdata->swap_xy; hrtimer_init(&ts->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); ts->timer.function = ads7846_timer; diff --git a/include/linux/spi/ads7846.h b/include/linux/spi/ads7846.h index 2ea20320c09..51948eb6927 100644 --- a/include/linux/spi/ads7846.h +++ b/include/linux/spi/ads7846.h @@ -17,6 +17,7 @@ struct ads7846_platform_data { u16 vref_mv; /* external vref value, milliVolts */ bool keep_vref_on; /* set to keep vref on for differential * measurements as well */ + bool swap_xy; /* swap x and y axes */ /* Settling time of the analog signals; a function of Vcc and the * capacitance on the X/Y drivers. If set to non-zero, two samples -- cgit v1.2.3-70-g09d2 From 0a7ae2ff0d29bb3b327edff4c8ab67b3834fa811 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 20 May 2009 08:54:31 +0200 Subject: block: change the tag sync vs async restriction logic Make them fully share the tag space, but disallow async requests using the last any two slots. Signed-off-by: Jens Axboe --- block/blk-barrier.c | 2 +- block/blk-core.c | 2 +- block/blk-tag.c | 15 +++++++++------ block/elevator.c | 8 ++++---- include/linux/blkdev.h | 7 ++++++- 5 files changed, 21 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index 0ab81a0a750..0d98054cdbd 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -218,7 +218,7 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp) } else skip |= QUEUE_ORDSEQ_PREFLUSH; - if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && q->in_flight) + if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && queue_in_flight(q)) rq = NULL; else skip |= QUEUE_ORDSEQ_DRAIN; diff --git a/block/blk-core.c b/block/blk-core.c index 49065075d46..1c748403882 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1815,7 +1815,7 @@ void blk_dequeue_request(struct request *rq) * the driver side. */ if (blk_account_rq(rq)) - q->in_flight++; + q->in_flight[rq_is_sync(rq)]++; } /** diff --git a/block/blk-tag.c b/block/blk-tag.c index c260f7c30dd..2e5cfeb5933 100644 --- a/block/blk-tag.c +++ b/block/blk-tag.c @@ -336,7 +336,7 @@ EXPORT_SYMBOL(blk_queue_end_tag); int blk_queue_start_tag(struct request_queue *q, struct request *rq) { struct blk_queue_tag *bqt = q->queue_tags; - unsigned max_depth, offset; + unsigned max_depth; int tag; if (unlikely((rq->cmd_flags & REQ_QUEUED))) { @@ -355,13 +355,16 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq) * to starve sync IO on behalf of flooding async IO. */ max_depth = bqt->max_depth; - if (rq_is_sync(rq)) - offset = 0; - else - offset = max_depth >> 2; + if (!rq_is_sync(rq) && max_depth > 1) { + max_depth -= 2; + if (!max_depth) + max_depth = 1; + if (q->in_flight[0] > max_depth) + return 1; + } do { - tag = find_next_zero_bit(bqt->tag_map, max_depth, offset); + tag = find_first_zero_bit(bqt->tag_map, max_depth); if (tag >= max_depth) return 1; diff --git a/block/elevator.c b/block/elevator.c index 918920056e4..ebee948293e 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -546,7 +546,7 @@ void elv_requeue_request(struct request_queue *q, struct request *rq) * in_flight count again */ if (blk_account_rq(rq)) { - q->in_flight--; + q->in_flight[rq_is_sync(rq)]--; if (blk_sorted_rq(rq)) elv_deactivate_rq(q, rq); } @@ -685,7 +685,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) if (unplug_it && blk_queue_plugged(q)) { int nrq = q->rq.count[BLK_RW_SYNC] + q->rq.count[BLK_RW_ASYNC] - - q->in_flight; + - queue_in_flight(q); if (nrq >= q->unplug_thresh) __generic_unplug_device(q); @@ -823,7 +823,7 @@ void elv_completed_request(struct request_queue *q, struct request *rq) * request is released from the driver, io must be done */ if (blk_account_rq(rq)) { - q->in_flight--; + q->in_flight[rq_is_sync(rq)]--; if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn) e->ops->elevator_completed_req_fn(q, rq); } @@ -838,7 +838,7 @@ void elv_completed_request(struct request_queue *q, struct request *rq) if (!list_empty(&q->queue_head)) next = list_entry_rq(q->queue_head.next); - if (!q->in_flight && + if (!queue_in_flight(q) && blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN && (!next || blk_ordered_req_seq(next) > QUEUE_ORDSEQ_DRAIN)) { blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 564445be7a6..a967dd775db 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -404,7 +404,7 @@ struct request_queue struct list_head tag_busy_list; unsigned int nr_sorted; - unsigned int in_flight; + unsigned int in_flight[2]; unsigned int rq_timeout; struct timer_list timeout; @@ -511,6 +511,11 @@ static inline void queue_flag_clear_unlocked(unsigned int flag, __clear_bit(flag, &q->queue_flags); } +static inline int queue_in_flight(struct request_queue *q) +{ + return q->in_flight[0] + q->in_flight[1]; +} + static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) { WARN_ON_ONCE(!queue_is_locked(q)); -- cgit v1.2.3-70-g09d2 From d7b629a34fc4134a43c730b5f0197855dc4948d0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 20 May 2009 12:21:19 +0200 Subject: perf_counter: Solve the rotate_ctx vs inherit race differently Instead of disabling RR scheduling of the counters, use a different list that does not get rotated to iterate the counters on inheritance. [ Impact: cleanup, optimization ] Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: Marcelo Tosatti Cc: John Kacur LKML-Reference: <20090520102553.237504544@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 1 - kernel/perf_counter.c | 15 +++++---------- 2 files changed, 5 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 13cb2fbbf33..c8c1dfc22c9 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -508,7 +508,6 @@ struct perf_counter_context { int nr_counters; int nr_active; int is_active; - int rr_allowed; struct task_struct *task; /* diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 4d8f97375f3..64113e6d194 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1120,8 +1120,7 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) __perf_counter_task_sched_out(ctx); rotate_ctx(&cpuctx->ctx); - if (ctx->rr_allowed) - rotate_ctx(ctx); + rotate_ctx(ctx); perf_counter_cpu_sched_in(cpuctx, cpu); perf_counter_task_sched_in(curr, cpu); @@ -3109,7 +3108,6 @@ __perf_counter_init_context(struct perf_counter_context *ctx, mutex_init(&ctx->mutex); INIT_LIST_HEAD(&ctx->counter_list); INIT_LIST_HEAD(&ctx->event_list); - ctx->rr_allowed = 1; ctx->task = task; } @@ -3350,14 +3348,14 @@ void perf_counter_init_task(struct task_struct *child) */ mutex_lock(&parent_ctx->mutex); - parent_ctx->rr_allowed = 0; - barrier(); /* irqs */ - /* * We dont have to disable NMIs - we are only looking at * the list, not manipulating it: */ - list_for_each_entry(counter, &parent_ctx->counter_list, list_entry) { + list_for_each_entry_rcu(counter, &parent_ctx->event_list, event_entry) { + if (counter != counter->group_leader) + continue; + if (!counter->hw_event.inherit) continue; @@ -3366,9 +3364,6 @@ void perf_counter_init_task(struct task_struct *child) break; } - barrier(); /* irqs */ - parent_ctx->rr_allowed = 1; - mutex_unlock(&parent_ctx->mutex); } -- cgit v1.2.3-70-g09d2 From 26b119bc811a73bac6ecf95bdf284bf31c7955f0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 20 May 2009 12:21:20 +0200 Subject: perf_counter: Log irq_period changes For the dynamic irq_period code, log whenever we change the period so that analyzing code can normalize the event flow. [ Impact: add new feature to allow more precise profiling ] Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: Marcelo Tosatti Cc: John Kacur LKML-Reference: <20090520102553.298769743@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 8 ++++++++ kernel/perf_counter.c | 40 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 47 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index c8c1dfc22c9..f612941ef46 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -257,6 +257,14 @@ enum perf_event_type { */ PERF_EVENT_COMM = 3, + /* + * struct { + * struct perf_event_header header; + * u64 irq_period; + * }; + */ + PERF_EVENT_PERIOD = 4, + /* * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field * will be PERF_RECORD_* diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 64113e6d194..db02eb16c77 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1046,7 +1046,9 @@ int perf_counter_task_enable(void) return 0; } -void perf_adjust_freq(struct perf_counter_context *ctx) +static void perf_log_period(struct perf_counter *counter, u64 period); + +static void perf_adjust_freq(struct perf_counter_context *ctx) { struct perf_counter *counter; u64 irq_period; @@ -1072,6 +1074,8 @@ void perf_adjust_freq(struct perf_counter_context *ctx) if (!irq_period) irq_period = 1; + perf_log_period(counter, irq_period); + counter->hw.irq_period = irq_period; counter->hw.interrupts = 0; } @@ -2406,6 +2410,40 @@ void perf_counter_munmap(unsigned long addr, unsigned long len, perf_counter_mmap_event(&mmap_event); } +/* + * + */ + +static void perf_log_period(struct perf_counter *counter, u64 period) +{ + struct perf_output_handle handle; + int ret; + + struct { + struct perf_event_header header; + u64 time; + u64 period; + } freq_event = { + .header = { + .type = PERF_EVENT_PERIOD, + .misc = 0, + .size = sizeof(freq_event), + }, + .time = sched_clock(), + .period = period, + }; + + if (counter->hw.irq_period == period) + return; + + ret = perf_output_begin(&handle, counter, sizeof(freq_event), 0, 0); + if (ret) + return; + + perf_output_put(&handle, freq_event); + perf_output_end(&handle); +} + /* * Generic counter overflow handling. */ -- cgit v1.2.3-70-g09d2 From 89f536ccfa8b370ff4d054f4061858ca9322c25a Mon Sep 17 00:00:00 2001 From: Stephane Chatty Date: Wed, 20 May 2009 15:41:24 +0200 Subject: HID: add new multitouch and digitizer contants Added constants to hid.h for all digitizer usages (including the new multitouch ones that are not yet in the official USB spec but are being pushed by Microsft as described in their paper "Digitizer Drivers for Windows Touch and Pen-Based Computers"). Updated hid-debug.c to support the new MT input constants such as ABS_MT_POSITION_X. Signed-off-by: Stephane Chatty Signed-off-by: Jiri Kosina --- drivers/hid/hid-debug.c | 23 +++++++++++++++++++++-- include/linux/hid.h | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index 47ac1a7d66e..04359ed64b8 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -137,6 +137,14 @@ static const struct hid_usage_entry hid_usage_table[] = { {0, 0x44, "BarrelSwitch"}, {0, 0x45, "Eraser"}, {0, 0x46, "TabletPick"}, + {0, 0x47, "Confidence"}, + {0, 0x48, "Width"}, + {0, 0x49, "Height"}, + {0, 0x51, "ContactID"}, + {0, 0x52, "InputMode"}, + {0, 0x53, "DeviceIndex"}, + {0, 0x54, "ContactCount"}, + {0, 0x55, "ContactMaximumNumber"}, { 15, 0, "PhysicalInterfaceDevice" }, {0, 0x00, "Undefined"}, {0, 0x01, "Physical_Interface_Device"}, @@ -514,9 +522,11 @@ static const char *events[EV_MAX + 1] = { [EV_FF_STATUS] = "ForceFeedbackStatus", }; -static const char *syncs[2] = { +static const char *syncs[3] = { [SYN_REPORT] = "Report", [SYN_CONFIG] = "Config", + [SYN_MT_REPORT] = "MT Report", }; + static const char *keys[KEY_MAX + 1] = { [KEY_RESERVED] = "Reserved", [KEY_ESC] = "Esc", [KEY_1] = "1", [KEY_2] = "2", @@ -734,8 +744,17 @@ static const char *absolutes[ABS_MAX + 1] = { [ABS_HAT2Y] = "Hat2Y", [ABS_HAT3X] = "Hat3X", [ABS_HAT3Y] = "Hat 3Y", [ABS_PRESSURE] = "Pressure", [ABS_DISTANCE] = "Distance", [ABS_TILT_X] = "XTilt", - [ABS_TILT_Y] = "YTilt", [ABS_TOOL_WIDTH] = "Tool Width", + [ABS_TILT_Y] = "YTilt", [ABS_TOOL_WIDTH] = "ToolWidth", [ABS_VOLUME] = "Volume", [ABS_MISC] = "Misc", + [ABS_MT_TOUCH_MAJOR] = "MTMajor", + [ABS_MT_TOUCH_MINOR] = "MTMinor", + [ABS_MT_WIDTH_MAJOR] = "MTMajorW", + [ABS_MT_WIDTH_MINOR] = "MTMinorW", + [ABS_MT_ORIENTATION] = "MTOrientation", + [ABS_MT_POSITION_X] = "MTPositionX", + [ABS_MT_POSITION_Y] = "MTPositionY", + [ABS_MT_TOOL_TYPE] = "MTToolType", + [ABS_MT_BLOB_ID] = "MTBlobID", }; static const char *misc[MSC_MAX + 1] = { diff --git a/include/linux/hid.h b/include/linux/hid.h index a72876e4358..53489fd4d70 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -238,6 +238,42 @@ struct hid_item { #define HID_GD_RIGHT 0x00010092 #define HID_GD_LEFT 0x00010093 +#define HID_DG_DIGITIZER 0x000d0001 +#define HID_DG_PEN 0x000d0002 +#define HID_DG_LIGHTPEN 0x000d0003 +#define HID_DG_TOUCHSCREEN 0x000d0004 +#define HID_DG_TOUCHPAD 0x000d0005 +#define HID_DG_STYLUS 0x000d0020 +#define HID_DG_PUCK 0x000d0021 +#define HID_DG_FINGER 0x000d0022 +#define HID_DG_TIPPRESSURE 0x000d0030 +#define HID_DG_BARRELPRESSURE 0x000d0031 +#define HID_DG_INRANGE 0x000d0032 +#define HID_DG_TOUCH 0x000d0033 +#define HID_DG_UNTOUCH 0x000d0034 +#define HID_DG_TAP 0x000d0035 +#define HID_DG_TABLETFUNCTIONKEY 0x000d0039 +#define HID_DG_PROGRAMCHANGEKEY 0x000d003a +#define HID_DG_INVERT 0x000d003c +#define HID_DG_TIPSWITCH 0x000d0042 +#define HID_DG_TIPSWITCH2 0x000d0043 +#define HID_DG_BARRELSWITCH 0x000d0044 +#define HID_DG_ERASER 0x000d0045 +#define HID_DG_TABLETPICK 0x000d0046 +/* + * as of May 20, 2009 the usages below are not yet in the official USB spec + * but are being pushed by Microsft as described in their paper "Digitizer + * Drivers for Windows Touch and Pen-Based Computers" + */ +#define HID_DG_CONFIDENCE 0x000d0047 +#define HID_DG_WIDTH 0x000d0048 +#define HID_DG_HEIGHT 0x000d0049 +#define HID_DG_CONTACTID 0x000d0051 +#define HID_DG_INPUTMODE 0x000d0052 +#define HID_DG_DEVICEINDEX 0x000d0053 +#define HID_DG_CONTACTCOUNT 0x000d0054 +#define HID_DG_CONTACTMAX 0x000d0055 + /* * HID report types --- Ouch! HID spec says 1 2 3! */ -- cgit v1.2.3-70-g09d2 From 038659e7c6b385065cb223872771ac437ef70b62 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Sat, 2 May 2009 00:37:17 -0400 Subject: cfg80211: Process regulatory max bandwidth checks for HT40 We are not correctly listening to the regulatory max bandwidth settings. To actually make use of it we need to redesign things a bit. This patch does the work for that. We do this to so we can obey to regulatory rules accordingly for use of HT40. We end up dealing with HT40 by having two passes for each channel. The first check will see if a 20 MHz channel fits into the channel's center freq on a given frequency range. We check for a 20 MHz banwidth channel as that is the maximum an individual channel will use, at least for now. The first pass will go ahead and check if the regulatory rule for that given center of frequency allows 40 MHz bandwidths and we use this to determine whether or not the channel supports HT40 or not. So to support HT40 you'll need at a regulatory rule that allows you to use 40 MHz channels but you're channel must also be enabled and support 20 MHz by itself. The second pass is done after we do the regulatory checks over an device's supported channel list. On each channel we'll check if the control channel and the extension both: o exist o are enabled o regulatory allows 40 MHz bandwidth on its frequency range This work allows allows us to idependently check for HT40- and HT40+. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- drivers/net/wireless/ath/regd.c | 10 +- include/net/cfg80211.h | 14 ++- net/wireless/reg.c | 201 +++++++++++++++++++++++++++++++--------- 3 files changed, 175 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/ath/regd.c b/drivers/net/wireless/ath/regd.c index fdf07c82208..7a89f9fac7d 100644 --- a/drivers/net/wireless/ath/regd.c +++ b/drivers/net/wireless/ath/regd.c @@ -200,8 +200,10 @@ ath_reg_apply_beaconing_flags(struct wiphy *wiphy, continue; if (initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE) { - r = freq_reg_info(wiphy, ch->center_freq, - &bandwidth, ®_rule); + r = freq_reg_info(wiphy, + ch->center_freq, + bandwidth, + ®_rule); if (r) continue; /* @@ -265,7 +267,7 @@ ath_reg_apply_active_scan_flags(struct wiphy *wiphy, */ ch = &sband->channels[11]; /* CH 12 */ - r = freq_reg_info(wiphy, ch->center_freq, &bandwidth, ®_rule); + r = freq_reg_info(wiphy, ch->center_freq, bandwidth, ®_rule); if (!r) { if (!(reg_rule->flags & NL80211_RRF_PASSIVE_SCAN)) if (ch->flags & IEEE80211_CHAN_PASSIVE_SCAN) @@ -273,7 +275,7 @@ ath_reg_apply_active_scan_flags(struct wiphy *wiphy, } ch = &sband->channels[12]; /* CH 13 */ - r = freq_reg_info(wiphy, ch->center_freq, &bandwidth, ®_rule); + r = freq_reg_info(wiphy, ch->center_freq, bandwidth, ®_rule); if (!r) { if (!(reg_rule->flags & NL80211_RRF_PASSIVE_SCAN)) if (ch->flags & IEEE80211_CHAN_PASSIVE_SCAN) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 9e17a83d343..282d58d52fc 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -70,6 +70,9 @@ enum ieee80211_channel_flags { IEEE80211_CHAN_NO_FAT_BELOW = 1<<5, }; +#define IEEE80211_CHAN_NO_HT40 \ + (IEEE80211_CHAN_NO_FAT_ABOVE | IEEE80211_CHAN_NO_FAT_BELOW) + /** * struct ieee80211_channel - channel definition * @@ -1303,9 +1306,10 @@ extern void wiphy_apply_custom_regulatory( * freq_reg_info - get regulatory information for the given frequency * @wiphy: the wiphy for which we want to process this rule for * @center_freq: Frequency in KHz for which we want regulatory information for - * @bandwidth: the bandwidth requirement you have in KHz, if you do not have one - * you can set this to 0. If this frequency is allowed we then set - * this value to the maximum allowed bandwidth. + * @desired_bw_khz: the desired max bandwidth you want to use per + * channel. Note that this is still 20 MHz if you want to use HT40 + * as HT40 makes use of two channels for its 40 MHz width bandwidth. + * If set to 0 we'll assume you want the standard 20 MHz. * @reg_rule: the regulatory rule which we have for this frequency * * Use this function to get the regulatory rule for a specific frequency on @@ -1320,7 +1324,9 @@ extern void wiphy_apply_custom_regulatory( * freq_in_rule_band() for our current definition of a band -- this is purely * subjective and right now its 802.11 specific. */ -extern int freq_reg_info(struct wiphy *wiphy, u32 center_freq, u32 *bandwidth, +extern int freq_reg_info(struct wiphy *wiphy, + u32 center_freq, + u32 desired_bw_khz, const struct ieee80211_reg_rule **reg_rule); /* diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 537af62ec42..d897528ee7f 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -48,12 +48,6 @@ static struct regulatory_request *last_request; /* To trigger userspace events */ static struct platform_device *reg_pdev; -/* Keep the ordering from large to small */ -static u32 supported_bandwidths[] = { - MHZ_TO_KHZ(40), - MHZ_TO_KHZ(20), -}; - /* * Central wireless core regulatory domains, we only need two, * the current one and a world regulatory domain in case we have no @@ -435,19 +429,20 @@ static bool is_valid_rd(const struct ieee80211_regdomain *rd) return true; } -/* Returns value in KHz */ -static u32 freq_max_bandwidth(const struct ieee80211_freq_range *freq_range, - u32 freq) +static bool reg_does_bw_fit(const struct ieee80211_freq_range *freq_range, + u32 center_freq_khz, + u32 bw_khz) { - unsigned int i; - for (i = 0; i < ARRAY_SIZE(supported_bandwidths); i++) { - u32 start_freq_khz = freq - supported_bandwidths[i]/2; - u32 end_freq_khz = freq + supported_bandwidths[i]/2; - if (start_freq_khz >= freq_range->start_freq_khz && - end_freq_khz <= freq_range->end_freq_khz) - return supported_bandwidths[i]; - } - return 0; + u32 start_freq_khz, end_freq_khz; + + start_freq_khz = center_freq_khz - (bw_khz/2); + end_freq_khz = center_freq_khz + (bw_khz/2); + + if (start_freq_khz >= freq_range->start_freq_khz && + end_freq_khz <= freq_range->end_freq_khz) + return true; + + return false; } /** @@ -847,14 +842,17 @@ static u32 map_regdom_flags(u32 rd_flags) static int freq_reg_info_regd(struct wiphy *wiphy, u32 center_freq, - u32 *bandwidth, + u32 desired_bw_khz, const struct ieee80211_reg_rule **reg_rule, const struct ieee80211_regdomain *custom_regd) { int i; bool band_rule_found = false; const struct ieee80211_regdomain *regd; - u32 max_bandwidth = 0; + bool bw_fits = false; + + if (!desired_bw_khz) + desired_bw_khz = MHZ_TO_KHZ(20); regd = custom_regd ? custom_regd : cfg80211_regdomain; @@ -887,38 +885,54 @@ static int freq_reg_info_regd(struct wiphy *wiphy, if (!band_rule_found) band_rule_found = freq_in_rule_band(fr, center_freq); - max_bandwidth = freq_max_bandwidth(fr, center_freq); + bw_fits = reg_does_bw_fit(fr, + center_freq, + desired_bw_khz); - if (max_bandwidth && *bandwidth <= max_bandwidth) { + if (band_rule_found && bw_fits) { *reg_rule = rr; - *bandwidth = max_bandwidth; - break; + return 0; } } if (!band_rule_found) return -ERANGE; - return !max_bandwidth; + return -EINVAL; } EXPORT_SYMBOL(freq_reg_info); -int freq_reg_info(struct wiphy *wiphy, u32 center_freq, u32 *bandwidth, - const struct ieee80211_reg_rule **reg_rule) +int freq_reg_info(struct wiphy *wiphy, + u32 center_freq, + u32 desired_bw_khz, + const struct ieee80211_reg_rule **reg_rule) { assert_cfg80211_lock(); - return freq_reg_info_regd(wiphy, center_freq, - bandwidth, reg_rule, NULL); + return freq_reg_info_regd(wiphy, + center_freq, + desired_bw_khz, + reg_rule, + NULL); } +/* + * Note that right now we assume the desired channel bandwidth + * is always 20 MHz for each individual channel (HT40 uses 20 MHz + * per channel, the primary and the extension channel). To support + * smaller custom bandwidths such as 5 MHz or 10 MHz we'll need a + * new ieee80211_channel.target_bw and re run the regulatory check + * on the wiphy with the target_bw specified. Then we can simply use + * that below for the desired_bw_khz below. + */ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, unsigned int chan_idx) { int r; - u32 flags; - u32 max_bandwidth = 0; + u32 flags, bw_flags = 0; + u32 desired_bw_khz = MHZ_TO_KHZ(20); const struct ieee80211_reg_rule *reg_rule = NULL; const struct ieee80211_power_rule *power_rule = NULL; + const struct ieee80211_freq_range *freq_range = NULL; struct ieee80211_supported_band *sband; struct ieee80211_channel *chan; struct wiphy *request_wiphy = NULL; @@ -933,8 +947,10 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, flags = chan->orig_flags; - r = freq_reg_info(wiphy, MHZ_TO_KHZ(chan->center_freq), - &max_bandwidth, ®_rule); + r = freq_reg_info(wiphy, + MHZ_TO_KHZ(chan->center_freq), + desired_bw_khz, + ®_rule); if (r) { /* @@ -977,6 +993,10 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, } power_rule = ®_rule->power_rule; + freq_range = ®_rule->freq_range; + + if (freq_range->max_bandwidth_khz < MHZ_TO_KHZ(40)) + bw_flags = IEEE80211_CHAN_NO_HT40; if (last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER && request_wiphy && request_wiphy == wiphy && @@ -987,19 +1007,19 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, * settings */ chan->flags = chan->orig_flags = - map_regdom_flags(reg_rule->flags); + map_regdom_flags(reg_rule->flags) | bw_flags; chan->max_antenna_gain = chan->orig_mag = (int) MBI_TO_DBI(power_rule->max_antenna_gain); - chan->max_bandwidth = KHZ_TO_MHZ(max_bandwidth); + chan->max_bandwidth = KHZ_TO_MHZ(desired_bw_khz); chan->max_power = chan->orig_mpwr = (int) MBM_TO_DBM(power_rule->max_eirp); return; } - chan->flags = flags | map_regdom_flags(reg_rule->flags); + chan->flags = flags | bw_flags | map_regdom_flags(reg_rule->flags); chan->max_antenna_gain = min(chan->orig_mag, (int) MBI_TO_DBI(power_rule->max_antenna_gain)); - chan->max_bandwidth = KHZ_TO_MHZ(max_bandwidth); + chan->max_bandwidth = KHZ_TO_MHZ(desired_bw_khz); if (chan->orig_mpwr) chan->max_power = min(chan->orig_mpwr, (int) MBM_TO_DBM(power_rule->max_eirp)); @@ -1156,6 +1176,93 @@ static void reg_process_beacons(struct wiphy *wiphy) wiphy_update_beacon_reg(wiphy); } +static bool is_ht40_not_allowed(struct ieee80211_channel *chan) +{ + if (!chan) + return true; + if (chan->flags & IEEE80211_CHAN_DISABLED) + return true; + /* This would happen when regulatory rules disallow HT40 completely */ + if (IEEE80211_CHAN_NO_HT40 == (chan->flags & (IEEE80211_CHAN_NO_HT40))) + return true; + return false; +} + +static void reg_process_ht_flags_channel(struct wiphy *wiphy, + enum ieee80211_band band, + unsigned int chan_idx) +{ + struct ieee80211_supported_band *sband; + struct ieee80211_channel *channel; + struct ieee80211_channel *channel_before = NULL, *channel_after = NULL; + unsigned int i; + + assert_cfg80211_lock(); + + sband = wiphy->bands[band]; + BUG_ON(chan_idx >= sband->n_channels); + channel = &sband->channels[chan_idx]; + + if (is_ht40_not_allowed(channel)) { + channel->flags |= IEEE80211_CHAN_NO_HT40; + return; + } + + /* + * We need to ensure the extension channels exist to + * be able to use HT40- or HT40+, this finds them (or not) + */ + for (i = 0; i < sband->n_channels; i++) { + struct ieee80211_channel *c = &sband->channels[i]; + if (c->center_freq == (channel->center_freq - 20)) + channel_before = c; + if (c->center_freq == (channel->center_freq + 20)) + channel_after = c; + } + + /* + * Please note that this assumes target bandwidth is 20 MHz, + * if that ever changes we also need to change the below logic + * to include that as well. + */ + if (is_ht40_not_allowed(channel_before)) + channel->flags |= IEEE80211_CHAN_NO_FAT_BELOW; + else + channel->flags &= ~IEEE80211_CHAN_NO_FAT_BELOW; + + if (is_ht40_not_allowed(channel_after)) + channel->flags |= IEEE80211_CHAN_NO_FAT_ABOVE; + else + channel->flags &= ~IEEE80211_CHAN_NO_FAT_ABOVE; +} + +static void reg_process_ht_flags_band(struct wiphy *wiphy, + enum ieee80211_band band) +{ + unsigned int i; + struct ieee80211_supported_band *sband; + + BUG_ON(!wiphy->bands[band]); + sband = wiphy->bands[band]; + + for (i = 0; i < sband->n_channels; i++) + reg_process_ht_flags_channel(wiphy, band, i); +} + +static void reg_process_ht_flags(struct wiphy *wiphy) +{ + enum ieee80211_band band; + + if (!wiphy) + return; + + for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + if (wiphy->bands[band]) + reg_process_ht_flags_band(wiphy, band); + } + +} + void wiphy_update_regulatory(struct wiphy *wiphy, enum nl80211_reg_initiator initiator) { @@ -1169,6 +1276,7 @@ void wiphy_update_regulatory(struct wiphy *wiphy, } out: reg_process_beacons(wiphy); + reg_process_ht_flags(wiphy); if (wiphy->reg_notifier) wiphy->reg_notifier(wiphy, last_request); } @@ -1179,9 +1287,11 @@ static void handle_channel_custom(struct wiphy *wiphy, const struct ieee80211_regdomain *regd) { int r; - u32 max_bandwidth = 0; + u32 desired_bw_khz = MHZ_TO_KHZ(20); + u32 bw_flags = 0; const struct ieee80211_reg_rule *reg_rule = NULL; const struct ieee80211_power_rule *power_rule = NULL; + const struct ieee80211_freq_range *freq_range = NULL; struct ieee80211_supported_band *sband; struct ieee80211_channel *chan; @@ -1191,8 +1301,11 @@ static void handle_channel_custom(struct wiphy *wiphy, BUG_ON(chan_idx >= sband->n_channels); chan = &sband->channels[chan_idx]; - r = freq_reg_info_regd(wiphy, MHZ_TO_KHZ(chan->center_freq), - &max_bandwidth, ®_rule, regd); + r = freq_reg_info_regd(wiphy, + MHZ_TO_KHZ(chan->center_freq), + desired_bw_khz, + ®_rule, + regd); if (r) { chan->flags = IEEE80211_CHAN_DISABLED; @@ -1200,10 +1313,14 @@ static void handle_channel_custom(struct wiphy *wiphy, } power_rule = ®_rule->power_rule; + freq_range = ®_rule->freq_range; + + if (freq_range->max_bandwidth_khz < MHZ_TO_KHZ(40)) + bw_flags = IEEE80211_CHAN_NO_HT40; - chan->flags |= map_regdom_flags(reg_rule->flags); + chan->flags |= map_regdom_flags(reg_rule->flags) | bw_flags; chan->max_antenna_gain = (int) MBI_TO_DBI(power_rule->max_antenna_gain); - chan->max_bandwidth = KHZ_TO_MHZ(max_bandwidth); + chan->max_bandwidth = KHZ_TO_MHZ(desired_bw_khz); chan->max_power = (int) MBM_TO_DBM(power_rule->max_eirp); } -- cgit v1.2.3-70-g09d2 From 689da1b3b8b37ff41e79f3fb973c06cdfeef12e5 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Sat, 2 May 2009 00:37:18 -0400 Subject: wireless: rename IEEE80211_CHAN_NO_FAT_* to HT40-/+ This is more consistent with our nl80211 naming convention for HT40-/+. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-core.c | 4 ++-- drivers/net/wireless/iwlwifi/iwl-eeprom.c | 8 ++++---- include/net/cfg80211.h | 10 +++++----- net/mac80211/mlme.c | 4 ++-- net/wireless/reg.c | 8 ++++---- 5 files changed, 17 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c index 5393fb3f452..6c1b171d92b 100644 --- a/drivers/net/wireless/iwlwifi/iwl-core.c +++ b/drivers/net/wireless/iwlwifi/iwl-core.c @@ -603,10 +603,10 @@ static u8 iwl_is_channel_extension(struct iwl_priv *priv, if (extension_chan_offset == IEEE80211_HT_PARAM_CHA_SEC_ABOVE) return !(ch_info->fat_extension_channel & - IEEE80211_CHAN_NO_FAT_ABOVE); + IEEE80211_CHAN_NO_HT40PLUS); else if (extension_chan_offset == IEEE80211_HT_PARAM_CHA_SEC_BELOW) return !(ch_info->fat_extension_channel & - IEEE80211_CHAN_NO_FAT_BELOW); + IEEE80211_CHAN_NO_HT40MINUS); return 0; } diff --git a/drivers/net/wireless/iwlwifi/iwl-eeprom.c b/drivers/net/wireless/iwlwifi/iwl-eeprom.c index 401438aec19..b400bd510fc 100644 --- a/drivers/net/wireless/iwlwifi/iwl-eeprom.c +++ b/drivers/net/wireless/iwlwifi/iwl-eeprom.c @@ -481,8 +481,8 @@ int iwl_init_channel_map(struct iwl_priv *priv) /* First write that fat is not enabled, and then enable * one by one */ ch_info->fat_extension_channel = - (IEEE80211_CHAN_NO_FAT_ABOVE | - IEEE80211_CHAN_NO_FAT_BELOW); + (IEEE80211_CHAN_NO_HT40PLUS | + IEEE80211_CHAN_NO_HT40MINUS); if (!(is_channel_valid(ch_info))) { IWL_DEBUG_INFO(priv, "Ch. %d Flags %x [%sGHz] - " @@ -561,7 +561,7 @@ int iwl_init_channel_map(struct iwl_priv *priv) fat_extension_chan = 0; else fat_extension_chan = - IEEE80211_CHAN_NO_FAT_BELOW; + IEEE80211_CHAN_NO_HT40MINUS; /* Set up driver's info for lower half */ iwl_set_fat_chan_info(priv, ieeeband, @@ -573,7 +573,7 @@ int iwl_init_channel_map(struct iwl_priv *priv) iwl_set_fat_chan_info(priv, ieeeband, (eeprom_ch_index[ch] + 4), &(eeprom_ch_info[ch]), - IEEE80211_CHAN_NO_FAT_ABOVE); + IEEE80211_CHAN_NO_HT40PLUS); } } diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 282d58d52fc..7391ad10c8c 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -56,9 +56,9 @@ enum ieee80211_band { * on this channel. * @IEEE80211_CHAN_NO_IBSS: IBSS is not allowed on this channel. * @IEEE80211_CHAN_RADAR: Radar detection is required on this channel. - * @IEEE80211_CHAN_NO_FAT_ABOVE: extension channel above this channel + * @IEEE80211_CHAN_NO_HT40PLUS: extension channel above this channel * is not permitted. - * @IEEE80211_CHAN_NO_FAT_BELOW: extension channel below this channel + * @IEEE80211_CHAN_NO_HT40MINUS: extension channel below this channel * is not permitted. */ enum ieee80211_channel_flags { @@ -66,12 +66,12 @@ enum ieee80211_channel_flags { IEEE80211_CHAN_PASSIVE_SCAN = 1<<1, IEEE80211_CHAN_NO_IBSS = 1<<2, IEEE80211_CHAN_RADAR = 1<<3, - IEEE80211_CHAN_NO_FAT_ABOVE = 1<<4, - IEEE80211_CHAN_NO_FAT_BELOW = 1<<5, + IEEE80211_CHAN_NO_HT40PLUS = 1<<4, + IEEE80211_CHAN_NO_HT40MINUS = 1<<5, }; #define IEEE80211_CHAN_NO_HT40 \ - (IEEE80211_CHAN_NO_FAT_ABOVE | IEEE80211_CHAN_NO_FAT_BELOW) + (IEEE80211_CHAN_NO_HT40PLUS | IEEE80211_CHAN_NO_HT40MINUS) /** * struct ieee80211_channel - channel definition diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index ae030688771..da582b643b9 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -349,13 +349,13 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) switch (ht_info->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) { case IEEE80211_HT_PARAM_CHA_SEC_ABOVE: - if (flags & IEEE80211_CHAN_NO_FAT_ABOVE) { + if (flags & IEEE80211_CHAN_NO_HT40PLUS) { cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40; cap &= ~IEEE80211_HT_CAP_SGI_40; } break; case IEEE80211_HT_PARAM_CHA_SEC_BELOW: - if (flags & IEEE80211_CHAN_NO_FAT_BELOW) { + if (flags & IEEE80211_CHAN_NO_HT40MINUS) { cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40; cap &= ~IEEE80211_HT_CAP_SGI_40; } diff --git a/net/wireless/reg.c b/net/wireless/reg.c index d897528ee7f..48db569d4c6 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1226,14 +1226,14 @@ static void reg_process_ht_flags_channel(struct wiphy *wiphy, * to include that as well. */ if (is_ht40_not_allowed(channel_before)) - channel->flags |= IEEE80211_CHAN_NO_FAT_BELOW; + channel->flags |= IEEE80211_CHAN_NO_HT40MINUS; else - channel->flags &= ~IEEE80211_CHAN_NO_FAT_BELOW; + channel->flags &= ~IEEE80211_CHAN_NO_HT40MINUS; if (is_ht40_not_allowed(channel_after)) - channel->flags |= IEEE80211_CHAN_NO_FAT_ABOVE; + channel->flags |= IEEE80211_CHAN_NO_HT40PLUS; else - channel->flags &= ~IEEE80211_CHAN_NO_FAT_ABOVE; + channel->flags &= ~IEEE80211_CHAN_NO_HT40PLUS; } static void reg_process_ht_flags_band(struct wiphy *wiphy, -- cgit v1.2.3-70-g09d2 From 3dcf670baf3d3a9bfc752e59d0b1a8d886230750 Mon Sep 17 00:00:00 2001 From: David Kilroy Date: Sat, 16 May 2009 23:13:46 +0100 Subject: cfg80211: mark ops as pointer to const This allows drivers to mark their cfg80211_ops tables const. Signed-off-by: David Kilroy Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 2 +- net/wireless/core.c | 2 +- net/wireless/core.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 7391ad10c8c..09a823520e5 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1073,7 +1073,7 @@ static inline const char *wiphy_name(struct wiphy *wiphy) * The returned pointer must be assigned to each netdev's * ieee80211_ptr for proper operation. */ -struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv); +struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv); /** * wiphy_register - register a wiphy with cfg80211 diff --git a/net/wireless/core.c b/net/wireless/core.c index b6ef5337c85..a5dbea1da47 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -229,7 +229,7 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, /* exported functions */ -struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv) +struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) { static int wiphy_counter; diff --git a/net/wireless/core.h b/net/wireless/core.h index 88f234c8f6b..9511c2be83d 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -16,7 +16,7 @@ #include "reg.h" struct cfg80211_registered_device { - struct cfg80211_ops *ops; + const struct cfg80211_ops *ops; struct list_head list; /* we hold this mutex during any call so that * we cannot do multiple calls at once, and also -- cgit v1.2.3-70-g09d2 From cf5aa2f1f39d367d449df4c10de3d8e6f9c4443b Mon Sep 17 00:00:00 2001 From: David Kilroy Date: Sat, 16 May 2009 23:13:47 +0100 Subject: cfg80211: mark wiphy->privid as pointer to const This allows drivers to use a const pointer as the privid without a cast. Signed-off-by: David Kilroy Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 09a823520e5..81a3bfa4547 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -997,7 +997,7 @@ struct wiphy { * know whether it points to a wiphy your driver has registered * or not. Assign this to something global to your driver to * help determine whether you own this wiphy or not. */ - void *privid; + const void *privid; struct ieee80211_supported_band *bands[IEEE80211_NUM_BANDS]; -- cgit v1.2.3-70-g09d2 From e3da574a0ddd3e90a1e2b788b84b94bc17a75172 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 18 May 2009 19:56:36 +0200 Subject: cfg80211: allow wext to remove keys that don't exist Some applications using wireless extensions expect to be able to remove a key that doesn't exist. One example is wpa_supplicant which doesn't actually change behaviour when running into an error while trying to do that, but it prints an error message which users interpret as wpa_supplicant having problems. The safe thing to do is not change the behaviour of wireless extensions any more, so when the driver reports -ENOENT let the wext bridge code return success to userspace. To guarantee this, also document that drivers should return -ENOENT when the key doesn't exist. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 5 +++-- net/wireless/wext-compat.c | 7 +++++++ 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 81a3bfa4547..389f1d20adf 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -781,10 +781,11 @@ enum wiphy_params_flags { * @get_key: get information about the key with the given parameters. * @mac_addr will be %NULL when requesting information for a group * key. All pointers given to the @callback function need not be valid - * after it returns. + * after it returns. This function should return an error if it is + * not possible to retrieve the key, -ENOENT if it doesn't exist. * * @del_key: remove a key given the @mac_addr (%NULL for a group key) - * and @key_index + * and @key_index, return -ENOENT if the key doesn't exist. * * @set_default_key: set the default key on an interface * diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index f98090b90fb..711e00a0c9b 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -504,6 +504,13 @@ static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev, else if (idx == wdev->wext.default_mgmt_key) wdev->wext.default_mgmt_key = -1; } + /* + * Applications using wireless extensions expect to be + * able to delete keys that don't exist, so allow that. + */ + if (err == -ENOENT) + return 0; + return err; } else { if (addr) -- cgit v1.2.3-70-g09d2 From cce4c77b87ce7e71a0f244a3dfb6ac1c3a1bc67e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 19 May 2009 10:39:34 +0200 Subject: mac80211: fix kernel-doc Moving information from config_interface to bss_info_changed removed struct ieee80211_if_conf which the documentation still refers to, additionally there's one kernel-doc description too much and one other missing, fix all this. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- Documentation/DocBook/mac80211.tmpl | 1 - include/net/mac80211.h | 1 - net/mac80211/sta_info.h | 1 + 3 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/Documentation/DocBook/mac80211.tmpl b/Documentation/DocBook/mac80211.tmpl index fbeaffc1dcc..e3698666357 100644 --- a/Documentation/DocBook/mac80211.tmpl +++ b/Documentation/DocBook/mac80211.tmpl @@ -145,7 +145,6 @@ usage should require reading the full document. interface in STA mode at first! !Finclude/net/mac80211.h ieee80211_if_init_conf -!Finclude/net/mac80211.h ieee80211_if_conf diff --git a/include/net/mac80211.h b/include/net/mac80211.h index d10ed1776fc..2d0610581ef 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -173,7 +173,6 @@ enum ieee80211_bss_change { * @timestamp: beacon timestamp * @beacon_int: beacon interval * @assoc_capability: capabilities taken from assoc resp - * @ht: BSS's HT configuration * @basic_rates: bitmap of basic rates, each bit stands for an * index into the rate table configured by the driver in * the current band. diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index f53aa9dc6e9..49a1a1f7651 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -216,6 +216,7 @@ struct sta_ampdu_mlme { * @plink_state: peer link state * @plink_timeout: timeout of peer link * @plink_timer: peer link watch timer + * @plink_timer_was_running: used by suspend/resume to restore timers * @debugfs: debug filesystem info * @sta: station information we share with the driver */ -- cgit v1.2.3-70-g09d2 From 04af8cf6f320031090ab6fa4600b912b0c18fb4b Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Wed, 20 May 2009 17:26:23 -0700 Subject: net: Remove unused parameter from fill method in fib_rules_ops. The netlink message header (struct nlmsghdr) is an unused parameter in fill method of fib_rules_ops struct. This patch removes this parameter from this method and fixes the places where this method is called. (include/net/fib_rules.h) Signed-off-by: Rami Rosen Signed-off-by: David S. Miller --- include/net/fib_rules.h | 1 - net/core/fib_rules.c | 2 +- net/decnet/dn_rules.c | 2 +- net/ipv4/fib_rules.c | 2 +- net/ipv6/fib6_rules.c | 2 +- 5 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index b9b63395d00..ca4b2e84007 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -54,7 +54,6 @@ struct fib_rules_ops struct fib_rule_hdr *, struct nlattr **); int (*fill)(struct fib_rule *, struct sk_buff *, - struct nlmsghdr *, struct fib_rule_hdr *); u32 (*default_pref)(struct fib_rules_ops *ops); size_t (*nlmsg_payload)(struct fib_rule *); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 17d9f497b79..bd309384f8b 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -500,7 +500,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, if (rule->target) NLA_PUT_U32(skb, FRA_GOTO, rule->target); - if (ops->fill(rule, skb, nlh, frh) < 0) + if (ops->fill(rule, skb, frh) < 0) goto nla_put_failure; return nlmsg_end(skb, nlh); diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index a2690b12e03..72495f25269 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -192,7 +192,7 @@ unsigned dnet_addr_type(__le16 addr) } static int dn_fib_rule_fill(struct fib_rule *rule, struct sk_buff *skb, - struct nlmsghdr *nlh, struct fib_rule_hdr *frh) + struct fib_rule_hdr *frh) { struct dn_fib_rule *r = (struct dn_fib_rule *)rule; diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 38904be4102..92d9d97ec5e 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -209,7 +209,7 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, } static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb, - struct nlmsghdr *nlh, struct fib_rule_hdr *frh) + struct fib_rule_hdr *frh) { struct fib4_rule *rule4 = (struct fib4_rule *) rule; diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index e1a36dbb5a2..00a7a5e4ac9 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -211,7 +211,7 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, } static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb, - struct nlmsghdr *nlh, struct fib_rule_hdr *frh) + struct fib_rule_hdr *frh) { struct fib6_rule *rule6 = (struct fib6_rule *) rule; -- cgit v1.2.3-70-g09d2 From d95ed9275edcb8995bda31005bb3f55e087626d7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 19 May 2009 18:27:17 +0000 Subject: af_packet: Teach to listen for multiple unicast addresses. The the PACKET_ADD_MEMBERSHIP and the PACKET_DROP_MEMBERSHIP setsockopt calls for af_packet already has all of the infrastructure needed to subscribe to multiple mac addresses. All that is missing is a flag to say that the address we want to listen on is a unicast address. So introduce PACKET_MR_UNICAST and wire it up to dev_unicast_add and dev_unicast_delete. Additionally I noticed that errors from dev_mc_add were not propagated from packet_dev_mc so fix that. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- include/linux/if_packet.h | 1 + net/packet/af_packet.c | 10 ++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h index 5b2badeb949..dea7d6b7cf9 100644 --- a/include/linux/if_packet.h +++ b/include/linux/if_packet.h @@ -145,5 +145,6 @@ struct packet_mreq #define PACKET_MR_MULTICAST 0 #define PACKET_MR_PROMISC 1 #define PACKET_MR_ALLMULTI 2 +#define PACKET_MR_UNICAST 3 #endif diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 766e6b41f7c..c7c5d524967 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1570,9 +1570,9 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i, switch (i->type) { case PACKET_MR_MULTICAST: if (what > 0) - dev_mc_add(dev, i->addr, i->alen, 0); + return dev_mc_add(dev, i->addr, i->alen, 0); else - dev_mc_delete(dev, i->addr, i->alen, 0); + return dev_mc_delete(dev, i->addr, i->alen, 0); break; case PACKET_MR_PROMISC: return dev_set_promiscuity(dev, what); @@ -1580,6 +1580,12 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i, case PACKET_MR_ALLMULTI: return dev_set_allmulti(dev, what); break; + case PACKET_MR_UNICAST: + if (what > 0) + return dev_unicast_add(dev, i->addr, i->alen); + else + return dev_unicast_delete(dev, i->addr, i->alen); + break; default:; } return 0; -- cgit v1.2.3-70-g09d2 From b9fc745db833bbf74b4988493b8cd902a84c9415 Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Tue, 19 May 2009 13:25:57 -0400 Subject: integrity: path_check update - Add support in ima_path_check() for integrity checking without incrementing the counts. (Required for nfsd.) - rename and export opencount_get to ima_counts_get - replace ima_shm_check calls with ima_counts_get - export ima_path_check Signed-off-by: Mimi Zohar Signed-off-by: James Morris --- fs/exec.c | 5 ++-- fs/namei.c | 6 +++-- include/linux/ima.h | 11 +++++---- ipc/shm.c | 4 ++-- mm/shmem.c | 2 +- security/integrity/ima/ima_main.c | 48 +++++++++++++++++++++++---------------- 6 files changed, 46 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/fs/exec.c b/fs/exec.c index 998e856c307..618d6d1e2c5 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -130,7 +130,8 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) MAY_READ | MAY_EXEC | MAY_OPEN); if (error) goto exit; - error = ima_path_check(&nd.path, MAY_READ | MAY_EXEC | MAY_OPEN); + error = ima_path_check(&nd.path, MAY_READ | MAY_EXEC | MAY_OPEN, + IMA_COUNT_UPDATE); if (error) goto exit; @@ -680,7 +681,7 @@ struct file *open_exec(const char *name) err = inode_permission(nd.path.dentry->d_inode, MAY_EXEC | MAY_OPEN); if (err) goto out_path_put; - err = ima_path_check(&nd.path, MAY_EXEC | MAY_OPEN); + err = ima_path_check(&nd.path, MAY_EXEC | MAY_OPEN, IMA_COUNT_UPDATE); if (err) goto out_path_put; diff --git a/fs/namei.c b/fs/namei.c index 78f253cd2d4..b05a2b1dea6 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -853,7 +853,8 @@ static int __link_path_walk(const char *name, struct nameidata *nd) err = inode_permission(nd->path.dentry->d_inode, MAY_EXEC); if (!err) - err = ima_path_check(&nd->path, MAY_EXEC); + err = ima_path_check(&nd->path, MAY_EXEC, + IMA_COUNT_UPDATE); if (err) break; @@ -1515,7 +1516,8 @@ int may_open(struct path *path, int acc_mode, int flag) return error; error = ima_path_check(path, - acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC)); + acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC), + IMA_COUNT_UPDATE); if (error) return error; /* diff --git a/include/linux/ima.h b/include/linux/ima.h index 0e2aa45cb0c..b1b827d091a 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -13,14 +13,17 @@ #include struct linux_binprm; +#define IMA_COUNT_UPDATE 1 +#define IMA_COUNT_LEAVE 0 + #ifdef CONFIG_IMA extern int ima_bprm_check(struct linux_binprm *bprm); extern int ima_inode_alloc(struct inode *inode); extern void ima_inode_free(struct inode *inode); -extern int ima_path_check(struct path *path, int mask); +extern int ima_path_check(struct path *path, int mask, int update_counts); extern void ima_file_free(struct file *file); extern int ima_file_mmap(struct file *file, unsigned long prot); -extern void ima_shm_check(struct file *file); +extern void ima_counts_get(struct file *file); #else static inline int ima_bprm_check(struct linux_binprm *bprm) @@ -38,7 +41,7 @@ static inline void ima_inode_free(struct inode *inode) return; } -static inline int ima_path_check(struct path *path, int mask) +static inline int ima_path_check(struct path *path, int mask, int update_counts) { return 0; } @@ -53,7 +56,7 @@ static inline int ima_file_mmap(struct file *file, unsigned long prot) return 0; } -static inline void ima_shm_check(struct file *file) +static inline void ima_counts_get(struct file *file) { return; } diff --git a/ipc/shm.c b/ipc/shm.c index faa46da99eb..47b464229cd 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -384,7 +384,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) error = PTR_ERR(file); if (IS_ERR(file)) goto no_file; - ima_shm_check(file); + ima_counts_get(file); id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni); if (id < 0) { @@ -891,7 +891,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) file = alloc_file(path.mnt, path.dentry, f_mode, &shm_file_operations); if (!file) goto out_free; - ima_shm_check(file); + ima_counts_get(file); file->private_data = sfd; file->f_mapping = shp->shm_file->f_mapping; diff --git a/mm/shmem.c b/mm/shmem.c index b25f95ce3db..a817f75f144 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2684,7 +2684,7 @@ int shmem_zero_setup(struct vm_area_struct *vma) if (IS_ERR(file)) return PTR_ERR(file); - ima_shm_check(file); + ima_counts_get(file); if (vma->vm_file) fput(vma->vm_file); vma->vm_file = file; diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index c4228c0eb2d..a2eb23310ea 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -125,6 +125,15 @@ static int get_path_measurement(struct ima_iint_cache *iint, struct file *file, return rc; } +static void ima_update_counts(struct ima_iint_cache *iint, int mask) +{ + iint->opencount++; + if ((mask & MAY_WRITE) || (mask == 0)) + iint->writecount++; + else if (mask & (MAY_READ | MAY_EXEC)) + iint->readcount++; +} + /** * ima_path_check - based on policy, collect/store measurement. * @path: contains a pointer to the path to be measured @@ -143,7 +152,7 @@ static int get_path_measurement(struct ima_iint_cache *iint, struct file *file, * Return 0 on success, an error code on failure. * (Based on the results of appraise_measurement().) */ -int ima_path_check(struct path *path, int mask) +int ima_path_check(struct path *path, int mask, int update_counts) { struct inode *inode = path->dentry->d_inode; struct ima_iint_cache *iint; @@ -157,11 +166,8 @@ int ima_path_check(struct path *path, int mask) return 0; mutex_lock(&iint->mutex); - iint->opencount++; - if ((mask & MAY_WRITE) || (mask == 0)) - iint->writecount++; - else if (mask & (MAY_READ | MAY_EXEC)) - iint->readcount++; + if (update_counts) + ima_update_counts(iint, mask); rc = ima_must_measure(iint, inode, MAY_READ, PATH_CHECK); if (rc < 0) @@ -197,6 +203,7 @@ out: kref_put(&iint->refcount, iint_free); return 0; } +EXPORT_SYMBOL_GPL(ima_path_check); static int process_measurement(struct file *file, const unsigned char *filename, int mask, int function) @@ -225,7 +232,16 @@ out: return rc; } -static void opencount_get(struct file *file) +/* + * ima_opens_get - increment file counts + * + * - for IPC shm and shmat file. + * - for nfsd exported files. + * + * Increment the counts for these files to prevent unnecessary + * imbalance messages. + */ +void ima_counts_get(struct file *file) { struct inode *inode = file->f_dentry->d_inode; struct ima_iint_cache *iint; @@ -237,8 +253,14 @@ static void opencount_get(struct file *file) return; mutex_lock(&iint->mutex); iint->opencount++; + if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) + iint->readcount++; + + if (file->f_mode & FMODE_WRITE) + iint->writecount++; mutex_unlock(&iint->mutex); } +EXPORT_SYMBOL_GPL(ima_counts_get); /** * ima_file_mmap - based on policy, collect/store measurement. @@ -263,18 +285,6 @@ int ima_file_mmap(struct file *file, unsigned long prot) return 0; } -/* - * ima_shm_check - IPC shm and shmat create/fput a file - * - * Maintain the opencount for these files to prevent unnecessary - * imbalance messages. - */ -void ima_shm_check(struct file *file) -{ - opencount_get(file); - return; -} - /** * ima_bprm_check - based on policy, collect/store measurement. * @bprm: contains the linux_binprm structure -- cgit v1.2.3-70-g09d2 From 4ea7e38696c7e798c47ebbecadfd392f23f814f9 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Thu, 21 May 2009 07:36:08 +0000 Subject: dropmon: add ability to detect when hardware dropsrxpackets Patch to add the ability to detect drops in hardware interfaces via dropwatch. Adds a tracepoint to net_rx_action to signal everytime a napi instance is polled. The dropmon code then periodically checks to see if the rx_frames counter has changed, and if so, adds a drop notification to the netlink protocol, using the reserved all-0's vector to indicate the drop location was in hardware, rather than somewhere in the code. Signed-off-by: Neil Horman include/linux/net_dropmon.h | 8 ++ include/trace/napi.h | 11 +++ net/core/dev.c | 5 + net/core/drop_monitor.c | 124 ++++++++++++++++++++++++++++++++++++++++++-- net/core/net-traces.c | 4 + net/core/netpoll.c | 2 6 files changed, 149 insertions(+), 5 deletions(-) Signed-off-by: David S. Miller --- include/linux/net_dropmon.h | 8 +++ include/trace/napi.h | 11 ++++ net/core/dev.c | 5 +- net/core/drop_monitor.c | 124 ++++++++++++++++++++++++++++++++++++++++++-- net/core/net-traces.c | 4 ++ net/core/netpoll.c | 2 + 6 files changed, 149 insertions(+), 5 deletions(-) create mode 100644 include/trace/napi.h (limited to 'include') diff --git a/include/linux/net_dropmon.h b/include/linux/net_dropmon.h index 0217fb81a63..e8a8b5c50ed 100644 --- a/include/linux/net_dropmon.h +++ b/include/linux/net_dropmon.h @@ -2,12 +2,20 @@ #define __NET_DROPMON_H #include +#include struct net_dm_drop_point { __u8 pc[8]; __u32 count; }; +#define is_drop_point_hw(x) do {\ + int ____i, ____j;\ + for (____i = 0; ____i < 8; i ____i++)\ + ____j |= x[____i];\ + ____j;\ +} while (0) + #define NET_DM_CFG_VERSION 0 #define NET_DM_CFG_ALERT_COUNT 1 #define NET_DM_CFG_ALERT_DELAY 2 diff --git a/include/trace/napi.h b/include/trace/napi.h new file mode 100644 index 00000000000..a8989c4547e --- /dev/null +++ b/include/trace/napi.h @@ -0,0 +1,11 @@ +#ifndef _TRACE_NAPI_H_ +#define _TRACE_NAPI_H_ + +#include +#include + +DECLARE_TRACE(napi_poll, + TP_PROTO(struct napi_struct *napi), + TP_ARGS(napi)); + +#endif diff --git a/net/core/dev.c b/net/core/dev.c index 92ebeca2990..3942266d1f6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -126,6 +126,7 @@ #include #include #include +#include #include "net-sysfs.h" @@ -2771,8 +2772,10 @@ static void net_rx_action(struct softirq_action *h) * accidently calling ->poll() when NAPI is not scheduled. */ work = 0; - if (test_bit(NAPI_STATE_SCHED, &n->state)) + if (test_bit(NAPI_STATE_SCHED, &n->state)) { work = n->poll(n, weight); + trace_napi_poll(n); + } WARN_ON_ONCE(work > weight); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 2797b711a97..a6c2ac2828f 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -22,8 +22,10 @@ #include #include #include +#include #include +#include #include @@ -38,7 +40,8 @@ static void send_dm_alert(struct work_struct *unused); * and the work handle that will send up * netlink alerts */ -struct sock *dm_sock; +static int trace_state = TRACE_OFF; +static spinlock_t trace_state_lock = SPIN_LOCK_UNLOCKED; struct per_cpu_dm_data { struct work_struct dm_alert_work; @@ -47,6 +50,13 @@ struct per_cpu_dm_data { struct timer_list send_timer; }; +struct dm_hw_stat_delta { + struct net_device *dev; + struct list_head list; + struct rcu_head rcu; + unsigned long last_drop_val; +}; + static struct genl_family net_drop_monitor_family = { .id = GENL_ID_GENERATE, .hdrsize = 0, @@ -59,7 +69,8 @@ static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data); static int dm_hit_limit = 64; static int dm_delay = 1; - +static unsigned long dm_hw_check_delta = 2*HZ; +static LIST_HEAD(hw_stats_list); static void reset_per_cpu_data(struct per_cpu_dm_data *data) { @@ -115,7 +126,7 @@ static void sched_send_work(unsigned long unused) schedule_work(&data->dm_alert_work); } -static void trace_kfree_skb_hit(struct sk_buff *skb, void *location) +static void trace_drop_common(struct sk_buff *skb, void *location) { struct net_dm_alert_msg *msg; struct nlmsghdr *nlh; @@ -159,24 +170,80 @@ out: return; } +static void trace_kfree_skb_hit(struct sk_buff *skb, void *location) +{ + trace_drop_common(skb, location); +} + +static void trace_napi_poll_hit(struct napi_struct *napi) +{ + struct dm_hw_stat_delta *new_stat; + + /* + * Ratelimit our check time to dm_hw_check_delta jiffies + */ + if (!time_after(jiffies, napi->dev->last_rx + dm_hw_check_delta)) + return; + + rcu_read_lock(); + list_for_each_entry_rcu(new_stat, &hw_stats_list, list) { + if ((new_stat->dev == napi->dev) && + (napi->dev->stats.rx_dropped != new_stat->last_drop_val)) { + trace_drop_common(NULL, NULL); + new_stat->last_drop_val = napi->dev->stats.rx_dropped; + break; + } + } + rcu_read_unlock(); +} + + +static void free_dm_hw_stat(struct rcu_head *head) +{ + struct dm_hw_stat_delta *n; + n = container_of(head, struct dm_hw_stat_delta, rcu); + kfree(n); +} + static int set_all_monitor_traces(int state) { int rc = 0; + struct dm_hw_stat_delta *new_stat = NULL; + struct dm_hw_stat_delta *temp; + + spin_lock(&trace_state_lock); switch (state) { case TRACE_ON: rc |= register_trace_kfree_skb(trace_kfree_skb_hit); + rc |= register_trace_napi_poll(trace_napi_poll_hit); break; case TRACE_OFF: rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit); + rc |= unregister_trace_napi_poll(trace_napi_poll_hit); tracepoint_synchronize_unregister(); + + /* + * Clean the device list + */ + list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) { + if (new_stat->dev == NULL) { + list_del_rcu(&new_stat->list); + call_rcu(&new_stat->rcu, free_dm_hw_stat); + } + } break; default: rc = 1; break; } + if (!rc) + trace_state = state; + + spin_unlock(&trace_state_lock); + if (rc) return -EINPROGRESS; return rc; @@ -204,6 +271,44 @@ static int net_dm_cmd_trace(struct sk_buff *skb, return -ENOTSUPP; } +static int dropmon_net_event(struct notifier_block *ev_block, + unsigned long event, void *ptr) +{ + struct net_device *dev = ptr; + struct dm_hw_stat_delta *new_stat = NULL; + struct dm_hw_stat_delta *tmp; + + switch (event) { + case NETDEV_REGISTER: + new_stat = kzalloc(sizeof(struct dm_hw_stat_delta), GFP_KERNEL); + + if (!new_stat) + goto out; + + new_stat->dev = dev; + INIT_RCU_HEAD(&new_stat->rcu); + spin_lock(&trace_state_lock); + list_add_rcu(&new_stat->list, &hw_stats_list); + spin_unlock(&trace_state_lock); + break; + case NETDEV_UNREGISTER: + spin_lock(&trace_state_lock); + list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) { + if (new_stat->dev == dev) { + new_stat->dev = NULL; + if (trace_state == TRACE_OFF) { + list_del_rcu(&new_stat->list); + call_rcu(&new_stat->rcu, free_dm_hw_stat); + break; + } + } + } + spin_unlock(&trace_state_lock); + break; + } +out: + return NOTIFY_DONE; +} static struct genl_ops dropmon_ops[] = { { @@ -220,6 +325,10 @@ static struct genl_ops dropmon_ops[] = { }, }; +static struct notifier_block dropmon_net_notifier = { + .notifier_call = dropmon_net_event +}; + static int __init init_net_drop_monitor(void) { int cpu; @@ -243,12 +352,18 @@ static int __init init_net_drop_monitor(void) ret = genl_register_ops(&net_drop_monitor_family, &dropmon_ops[i]); if (ret) { - printk(KERN_CRIT "failed to register operation %d\n", + printk(KERN_CRIT "Failed to register operation %d\n", dropmon_ops[i].cmd); goto out_unreg; } } + rc = register_netdevice_notifier(&dropmon_net_notifier); + if (rc < 0) { + printk(KERN_CRIT "Failed to register netdevice notifier\n"); + goto out_unreg; + } + rc = 0; for_each_present_cpu(cpu) { @@ -259,6 +374,7 @@ static int __init init_net_drop_monitor(void) data->send_timer.data = cpu; data->send_timer.function = sched_send_work; } + goto out; out_unreg: diff --git a/net/core/net-traces.c b/net/core/net-traces.c index c8fb45665e4..b07b25bd2cd 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -27,3 +28,6 @@ DEFINE_TRACE(kfree_skb); EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); + +DEFINE_TRACE(napi_poll); +EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 64f51eec657..00b14e2c50e 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -24,6 +24,7 @@ #include #include #include +#include /* * We maintain a small pool of fully-sized skbs, to make sure the @@ -137,6 +138,7 @@ static int poll_one_napi(struct netpoll_info *npinfo, set_bit(NAPI_STATE_NPSVC, &napi->state); work = napi->poll(napi, budget); + trace_napi_poll(napi->dev); clear_bit(NAPI_STATE_NPSVC, &napi->state); atomic_dec(&trapped); -- cgit v1.2.3-70-g09d2 From a7b11d738282337488ae158c975d76271ad43a98 Mon Sep 17 00:00:00 2001 From: Michał Mirosław Date: Thu, 21 May 2009 10:34:04 +0000 Subject: genetlink: Introduce genl_register_family_with_ops() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This introduces genl_register_family_with_ops() that registers a genetlink family along with operations from a table. This is used to kill copy'n'paste occurrences in following patches. Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- include/net/genetlink.h | 2 ++ net/netlink/genetlink.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) (limited to 'include') diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 747c255d1df..1b0e3ee4ddd 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -88,6 +88,8 @@ struct genl_ops }; extern int genl_register_family(struct genl_family *family); +extern int genl_register_family_with_ops(struct genl_family *family, + struct genl_ops *ops, size_t n_ops); extern int genl_unregister_family(struct genl_family *family); extern int genl_register_ops(struct genl_family *, struct genl_ops *ops); extern int genl_unregister_ops(struct genl_family *, struct genl_ops *ops); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 1d3dd30099d..eed4c6a8afc 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -383,6 +383,52 @@ errout: return err; } +/** + * genl_register_family_with_ops - register a generic netlink family + * @family: generic netlink family + * @ops: operations to be registered + * @n_ops: number of elements to register + * + * Registers the specified family and operations from the specified table. + * Only one family may be registered with the same family name or identifier. + * + * The family id may equal GENL_ID_GENERATE causing an unique id to + * be automatically generated and assigned. + * + * Either a doit or dumpit callback must be specified for every registered + * operation or the function will fail. Only one operation structure per + * command identifier may be registered. + * + * See include/net/genetlink.h for more documenation on the operations + * structure. + * + * This is equivalent to calling genl_register_family() followed by + * genl_register_ops() for every operation entry in the table taking + * care to unregister the family on error path. + * + * Return 0 on success or a negative error code. + */ +int genl_register_family_with_ops(struct genl_family *family, + struct genl_ops *ops, size_t n_ops) +{ + int err, i; + + err = genl_register_family(family); + if (err) + return err; + + for (i = 0; i < n_ops; ++i, ++ops) { + err = genl_register_ops(family, ops); + if (err) + goto err_out; + } + return 0; +err_out: + genl_unregister_family(family); + return err; +} +EXPORT_SYMBOL(genl_register_family_with_ops); + /** * genl_unregister_family - unregister generic netlink family * @family: generic netlink family -- cgit v1.2.3-70-g09d2 From 5c82f56736e4c3a9eaf53c94366b056c8622d79e Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 22 May 2009 09:41:30 +0100 Subject: AsoC: Make snd_soc_read() and snd_soc_write() functions Should be no impact on the generated code but it helps the compiler print clearer messages. Signed-off-by: Mark Brown --- include/sound/soc.h | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 2af3213df90..cf6111d72b1 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -214,10 +214,6 @@ void snd_soc_jack_free_gpios(struct snd_soc_jack *jack, int count, struct snd_soc_jack_gpio *gpios); #endif -/* codec IO */ -#define snd_soc_read(codec, reg) codec->read(codec, reg) -#define snd_soc_write(codec, reg, value) codec->write(codec, reg, value) - /* codec register bit access */ int snd_soc_update_bits(struct snd_soc_codec *codec, unsigned short reg, unsigned short mask, unsigned short value); @@ -507,6 +503,19 @@ struct soc_enum { void *dapm; }; +/* codec IO */ +static inline unsigned int snd_soc_read(struct snd_soc_codec *codec, + unsigned int reg) +{ + return codec->read(codec, reg); +} + +static inline unsigned int snd_soc_write(struct snd_soc_codec *codec, + unsigned int reg, unsigned int val) +{ + return codec->write(codec, reg, val); +} + #include #endif -- cgit v1.2.3-70-g09d2 From a63eaf34ae60bdb067a354cc8def2e8f4a01f5f4 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 22 May 2009 14:17:31 +1000 Subject: perf_counter: Dynamically allocate tasks' perf_counter_context struct This replaces the struct perf_counter_context in the task_struct with a pointer to a dynamically allocated perf_counter_context struct. The main reason for doing is this is to allow us to transfer a perf_counter_context from one task to another when we do lazy PMU switching in a later patch. This has a few side-benefits: the task_struct becomes a little smaller, we save some memory because only tasks that have perf_counters attached get a perf_counter_context allocated for them, and we can remove the inclusion of in sched.h, meaning that we don't end up recompiling nearly everything whenever perf_counter.h changes. The perf_counter_context structures are reference-counted and freed when the last reference is dropped. A context can have references from its task and the counters on its task. Counters can outlive the task so it is possible that a context will be freed well after its task has exited. Contexts are allocated on fork if the parent had a context, or otherwise the first time that a per-task counter is created on a task. In the latter case, we set the context pointer in the task struct locklessly using an atomic compare-and-exchange operation in case we raced with some other task in creating a context for the subject task. This also removes the task pointer from the perf_counter struct. The task pointer was not used anywhere and would make it harder to move a context from one task to another. Anything that needed to know which task a counter was attached to was already using counter->ctx->task. The __perf_counter_init_context function moves up in perf_counter.c so that it can be called from find_get_context, and now initializes the refcount, but is otherwise unchanged. We were potentially calling list_del_counter twice: once from __perf_counter_exit_task when the task exits and once from __perf_counter_remove_from_context when the counter's fd gets closed. This adds a check in list_del_counter so it doesn't do anything if the counter has already been removed from the lists. Since perf_counter_task_sched_in doesn't do anything if the task doesn't have a context, and leaves cpuctx->task_ctx = NULL, this adds code to __perf_install_in_context to set cpuctx->task_ctx if necessary, i.e. in the case where the current task adds the first counter to itself and thus creates a context for itself. This also adds similar code to __perf_counter_enable to handle a similar situation which can arise when the counters have been disabled using prctl; that also leaves cpuctx->task_ctx = NULL. [ Impact: refactor counter context management to prepare for new feature ] Signed-off-by: Paul Mackerras Acked-by: Peter Zijlstra Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo LKML-Reference: <18966.10075.781053.231153@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic.c | 1 + include/linux/init_task.h | 13 --- include/linux/perf_counter.h | 4 +- include/linux/sched.h | 6 +- kernel/exit.c | 3 +- kernel/fork.c | 1 + kernel/perf_counter.c | 218 +++++++++++++++++++++++++++---------------- 7 files changed, 145 insertions(+), 101 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index e9021a90802..b4f64402a82 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -14,6 +14,7 @@ * Mikael Pettersson : PM converted to driver model. */ +#include #include #include #include diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 503afaa0afa..d87247d2641 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -108,18 +108,6 @@ extern struct group_info init_groups; extern struct cred init_cred; -#ifdef CONFIG_PERF_COUNTERS -# define INIT_PERF_COUNTERS(tsk) \ - .perf_counter_ctx.counter_list = \ - LIST_HEAD_INIT(tsk.perf_counter_ctx.counter_list), \ - .perf_counter_ctx.event_list = \ - LIST_HEAD_INIT(tsk.perf_counter_ctx.event_list), \ - .perf_counter_ctx.lock = \ - __SPIN_LOCK_UNLOCKED(tsk.perf_counter_ctx.lock), -#else -# define INIT_PERF_COUNTERS(tsk) -#endif - /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -183,7 +171,6 @@ extern struct cred init_cred; }, \ .dirties = INIT_PROP_LOCAL_SINGLE(dirties), \ INIT_IDS \ - INIT_PERF_COUNTERS(tsk) \ INIT_TRACE_IRQFLAGS \ INIT_LOCKDEP \ INIT_FTRACE_GRAPH \ diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index f612941ef46..07130900546 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -449,7 +449,6 @@ struct perf_counter { struct hw_perf_counter hw; struct perf_counter_context *ctx; - struct task_struct *task; struct file *filp; struct perf_counter *parent; @@ -498,7 +497,6 @@ struct perf_counter { * Used as a container for task counters and CPU counters as well: */ struct perf_counter_context { -#ifdef CONFIG_PERF_COUNTERS /* * Protect the states of the counters in the list, * nr_active, and the list: @@ -516,6 +514,7 @@ struct perf_counter_context { int nr_counters; int nr_active; int is_active; + atomic_t refcount; struct task_struct *task; /* @@ -523,7 +522,6 @@ struct perf_counter_context { */ u64 time; u64 timestamp; -#endif }; /** diff --git a/include/linux/sched.h b/include/linux/sched.h index ff59d123151..9714d450f41 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -71,7 +71,6 @@ struct sched_param { #include #include #include -#include #include #include #include @@ -99,6 +98,7 @@ struct robust_list_head; struct bio; struct bts_tracer; struct fs_struct; +struct perf_counter_context; /* * List of flags we want to share for kernel threads, @@ -1387,7 +1387,9 @@ struct task_struct { struct list_head pi_state_list; struct futex_pi_state *pi_state_cache; #endif - struct perf_counter_context perf_counter_ctx; +#ifdef CONFIG_PERF_COUNTERS + struct perf_counter_context *perf_counter_ctxp; +#endif #ifdef CONFIG_NUMA struct mempolicy *mempolicy; short il_next; diff --git a/kernel/exit.c b/kernel/exit.c index f9dfedd94af..99ad4063ee4 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -159,7 +160,7 @@ static void delayed_put_task_struct(struct rcu_head *rhp) struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); #ifdef CONFIG_PERF_COUNTERS - WARN_ON_ONCE(!list_empty(&tsk->perf_counter_ctx.counter_list)); + WARN_ON_ONCE(tsk->perf_counter_ctxp); #endif trace_sched_process_free(tsk); put_task_struct(tsk); diff --git a/kernel/fork.c b/kernel/fork.c index d32fef4d38e..e72a09f5355 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -63,6 +63,7 @@ #include #include #include +#include #include #include diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 08584c16049..06ea3eae886 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -97,6 +97,17 @@ void perf_enable(void) hw_perf_enable(); } +static void get_ctx(struct perf_counter_context *ctx) +{ + atomic_inc(&ctx->refcount); +} + +static void put_ctx(struct perf_counter_context *ctx) +{ + if (atomic_dec_and_test(&ctx->refcount)) + kfree(ctx); +} + static void list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) { @@ -118,11 +129,17 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) ctx->nr_counters++; } +/* + * Remove a counter from the lists for its context. + * Must be called with counter->mutex and ctx->mutex held. + */ static void list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx) { struct perf_counter *sibling, *tmp; + if (list_empty(&counter->list_entry)) + return; ctx->nr_counters--; list_del_init(&counter->list_entry); @@ -216,8 +233,6 @@ static void __perf_counter_remove_from_context(void *info) counter_sched_out(counter, cpuctx, ctx); - counter->task = NULL; - list_del_counter(counter, ctx); if (!ctx->task) { @@ -279,7 +294,6 @@ retry: */ if (!list_empty(&counter->list_entry)) { list_del_counter(counter, ctx); - counter->task = NULL; } spin_unlock_irq(&ctx->lock); } @@ -568,11 +582,17 @@ static void __perf_install_in_context(void *info) * If this is a task context, we need to check whether it is * the current task context of this cpu. If not it has been * scheduled out before the smp call arrived. + * Or possibly this is the right context but it isn't + * on this cpu because it had no counters. */ - if (ctx->task && cpuctx->task_ctx != ctx) - return; + if (ctx->task && cpuctx->task_ctx != ctx) { + if (cpuctx->task_ctx || ctx->task != current) + return; + cpuctx->task_ctx = ctx; + } spin_lock_irqsave(&ctx->lock, flags); + ctx->is_active = 1; update_context_time(ctx); /* @@ -653,7 +673,6 @@ perf_install_in_context(struct perf_counter_context *ctx, return; } - counter->task = task; retry: task_oncpu_function_call(task, __perf_install_in_context, counter); @@ -693,10 +712,14 @@ static void __perf_counter_enable(void *info) * If this is a per-task counter, need to check whether this * counter's task is the current task on this cpu. */ - if (ctx->task && cpuctx->task_ctx != ctx) - return; + if (ctx->task && cpuctx->task_ctx != ctx) { + if (cpuctx->task_ctx || ctx->task != current) + return; + cpuctx->task_ctx = ctx; + } spin_lock_irqsave(&ctx->lock, flags); + ctx->is_active = 1; update_context_time(ctx); counter->prev_state = counter->state; @@ -852,10 +875,10 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx, void perf_counter_task_sched_out(struct task_struct *task, int cpu) { struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); - struct perf_counter_context *ctx = &task->perf_counter_ctx; + struct perf_counter_context *ctx = task->perf_counter_ctxp; struct pt_regs *regs; - if (likely(!cpuctx->task_ctx)) + if (likely(!ctx || !cpuctx->task_ctx)) return; update_context_time(ctx); @@ -871,6 +894,8 @@ static void __perf_counter_task_sched_out(struct perf_counter_context *ctx) { struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + if (!cpuctx->task_ctx) + return; __perf_counter_sched_out(ctx, cpuctx); cpuctx->task_ctx = NULL; } @@ -969,8 +994,10 @@ __perf_counter_sched_in(struct perf_counter_context *ctx, void perf_counter_task_sched_in(struct task_struct *task, int cpu) { struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); - struct perf_counter_context *ctx = &task->perf_counter_ctx; + struct perf_counter_context *ctx = task->perf_counter_ctxp; + if (likely(!ctx)) + return; __perf_counter_sched_in(ctx, cpuctx, cpu); cpuctx->task_ctx = ctx; } @@ -985,11 +1012,11 @@ static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu) int perf_counter_task_disable(void) { struct task_struct *curr = current; - struct perf_counter_context *ctx = &curr->perf_counter_ctx; + struct perf_counter_context *ctx = curr->perf_counter_ctxp; struct perf_counter *counter; unsigned long flags; - if (likely(!ctx->nr_counters)) + if (!ctx || !ctx->nr_counters) return 0; local_irq_save(flags); @@ -1020,12 +1047,12 @@ int perf_counter_task_disable(void) int perf_counter_task_enable(void) { struct task_struct *curr = current; - struct perf_counter_context *ctx = &curr->perf_counter_ctx; + struct perf_counter_context *ctx = curr->perf_counter_ctxp; struct perf_counter *counter; unsigned long flags; int cpu; - if (likely(!ctx->nr_counters)) + if (!ctx || !ctx->nr_counters) return 0; local_irq_save(flags); @@ -1128,19 +1155,23 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) return; cpuctx = &per_cpu(perf_cpu_context, cpu); - ctx = &curr->perf_counter_ctx; + ctx = curr->perf_counter_ctxp; perf_adjust_freq(&cpuctx->ctx); - perf_adjust_freq(ctx); + if (ctx) + perf_adjust_freq(ctx); perf_counter_cpu_sched_out(cpuctx); - __perf_counter_task_sched_out(ctx); + if (ctx) + __perf_counter_task_sched_out(ctx); rotate_ctx(&cpuctx->ctx); - rotate_ctx(ctx); + if (ctx) + rotate_ctx(ctx); perf_counter_cpu_sched_in(cpuctx, cpu); - perf_counter_task_sched_in(curr, cpu); + if (ctx) + perf_counter_task_sched_in(curr, cpu); } /* @@ -1176,6 +1207,22 @@ static u64 perf_counter_read(struct perf_counter *counter) return atomic64_read(&counter->count); } +/* + * Initialize the perf_counter context in a task_struct: + */ +static void +__perf_counter_init_context(struct perf_counter_context *ctx, + struct task_struct *task) +{ + memset(ctx, 0, sizeof(*ctx)); + spin_lock_init(&ctx->lock); + mutex_init(&ctx->mutex); + INIT_LIST_HEAD(&ctx->counter_list); + INIT_LIST_HEAD(&ctx->event_list); + atomic_set(&ctx->refcount, 1); + ctx->task = task; +} + static void put_context(struct perf_counter_context *ctx) { if (ctx->task) @@ -1186,6 +1233,7 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu) { struct perf_cpu_context *cpuctx; struct perf_counter_context *ctx; + struct perf_counter_context *tctx; struct task_struct *task; /* @@ -1225,15 +1273,36 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu) if (!task) return ERR_PTR(-ESRCH); - ctx = &task->perf_counter_ctx; - ctx->task = task; - /* Reuse ptrace permission checks for now. */ if (!ptrace_may_access(task, PTRACE_MODE_READ)) { - put_context(ctx); + put_task_struct(task); return ERR_PTR(-EACCES); } + ctx = task->perf_counter_ctxp; + if (!ctx) { + ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL); + if (!ctx) { + put_task_struct(task); + return ERR_PTR(-ENOMEM); + } + __perf_counter_init_context(ctx, task); + /* + * Make sure other cpus see correct values for *ctx + * once task->perf_counter_ctxp is visible to them. + */ + smp_wmb(); + tctx = cmpxchg(&task->perf_counter_ctxp, NULL, ctx); + if (tctx) { + /* + * We raced with some other task; use + * the context they set. + */ + kfree(ctx); + ctx = tctx; + } + } + return ctx; } @@ -1242,6 +1311,7 @@ static void free_counter_rcu(struct rcu_head *head) struct perf_counter *counter; counter = container_of(head, struct perf_counter, rcu_head); + put_ctx(counter->ctx); kfree(counter); } @@ -2247,7 +2317,7 @@ static void perf_counter_comm_event(struct perf_comm_event *comm_event) perf_counter_comm_ctx(&cpuctx->ctx, comm_event); put_cpu_var(perf_cpu_context); - perf_counter_comm_ctx(¤t->perf_counter_ctx, comm_event); + perf_counter_comm_ctx(current->perf_counter_ctxp, comm_event); } void perf_counter_comm(struct task_struct *task) @@ -2256,7 +2326,9 @@ void perf_counter_comm(struct task_struct *task) if (!atomic_read(&nr_comm_tracking)) return; - + if (!current->perf_counter_ctxp) + return; + comm_event = (struct perf_comm_event){ .task = task, .event = { @@ -2372,7 +2444,7 @@ got_name: perf_counter_mmap_ctx(&cpuctx->ctx, mmap_event); put_cpu_var(perf_cpu_context); - perf_counter_mmap_ctx(¤t->perf_counter_ctx, mmap_event); + perf_counter_mmap_ctx(current->perf_counter_ctxp, mmap_event); kfree(buf); } @@ -2384,6 +2456,8 @@ void perf_counter_mmap(unsigned long addr, unsigned long len, if (!atomic_read(&nr_mmap_tracking)) return; + if (!current->perf_counter_ctxp) + return; mmap_event = (struct perf_mmap_event){ .file = file, @@ -2985,6 +3059,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, counter->group_leader = group_leader; counter->pmu = NULL; counter->ctx = ctx; + get_ctx(ctx); counter->state = PERF_COUNTER_STATE_INACTIVE; if (hw_event->disabled) @@ -3149,21 +3224,6 @@ err_put_context: goto out_fput; } -/* - * Initialize the perf_counter context in a task_struct: - */ -static void -__perf_counter_init_context(struct perf_counter_context *ctx, - struct task_struct *task) -{ - memset(ctx, 0, sizeof(*ctx)); - spin_lock_init(&ctx->lock); - mutex_init(&ctx->mutex); - INIT_LIST_HEAD(&ctx->counter_list); - INIT_LIST_HEAD(&ctx->event_list); - ctx->task = task; -} - /* * inherit a counter from parent task to child task: */ @@ -3195,7 +3255,6 @@ inherit_counter(struct perf_counter *parent_counter, /* * Link it up in the child's context: */ - child_counter->task = child; add_counter_to_ctx(child_counter, child_ctx); child_counter->parent = parent_counter; @@ -3294,40 +3353,15 @@ __perf_counter_exit_task(struct task_struct *child, struct perf_counter *parent_counter; /* - * If we do not self-reap then we have to wait for the - * child task to unschedule (it will happen for sure), - * so that its counter is at its final count. (This - * condition triggers rarely - child tasks usually get - * off their CPU before the parent has a chance to - * get this far into the reaping action) + * Protect against concurrent operations on child_counter + * due its fd getting closed, etc. */ - if (child != current) { - wait_task_inactive(child, 0); - update_counter_times(child_counter); - list_del_counter(child_counter, child_ctx); - } else { - struct perf_cpu_context *cpuctx; - unsigned long flags; - - /* - * Disable and unlink this counter. - * - * Be careful about zapping the list - IRQ/NMI context - * could still be processing it: - */ - local_irq_save(flags); - perf_disable(); - - cpuctx = &__get_cpu_var(perf_cpu_context); + mutex_lock(&child_counter->mutex); - group_sched_out(child_counter, cpuctx, child_ctx); - update_counter_times(child_counter); + update_counter_times(child_counter); + list_del_counter(child_counter, child_ctx); - list_del_counter(child_counter, child_ctx); - - perf_enable(); - local_irq_restore(flags); - } + mutex_unlock(&child_counter->mutex); parent_counter = child_counter->parent; /* @@ -3346,19 +3380,29 @@ __perf_counter_exit_task(struct task_struct *child, * * Note: we may be running in child context, but the PID is not hashed * anymore so new counters will not be added. + * (XXX not sure that is true when we get called from flush_old_exec. + * -- paulus) */ void perf_counter_exit_task(struct task_struct *child) { struct perf_counter *child_counter, *tmp; struct perf_counter_context *child_ctx; + unsigned long flags; WARN_ON_ONCE(child != current); - child_ctx = &child->perf_counter_ctx; + child_ctx = child->perf_counter_ctxp; - if (likely(!child_ctx->nr_counters)) + if (likely(!child_ctx)) return; + local_irq_save(flags); + __perf_counter_task_sched_out(child_ctx); + child->perf_counter_ctxp = NULL; + local_irq_restore(flags); + + mutex_lock(&child_ctx->mutex); + again: list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list, list_entry) @@ -3371,6 +3415,10 @@ again: */ if (!list_empty(&child_ctx->counter_list)) goto again; + + mutex_unlock(&child_ctx->mutex); + + put_ctx(child_ctx); } /* @@ -3382,19 +3430,25 @@ void perf_counter_init_task(struct task_struct *child) struct perf_counter *counter; struct task_struct *parent = current; - child_ctx = &child->perf_counter_ctx; - parent_ctx = &parent->perf_counter_ctx; - - __perf_counter_init_context(child_ctx, child); + child->perf_counter_ctxp = NULL; /* * This is executed from the parent task context, so inherit - * counters that have been marked for cloning: + * counters that have been marked for cloning. + * First allocate and initialize a context for the child. */ - if (likely(!parent_ctx->nr_counters)) + child_ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL); + if (!child_ctx) + return; + + parent_ctx = parent->perf_counter_ctxp; + if (likely(!parent_ctx || !parent_ctx->nr_counters)) return; + __perf_counter_init_context(child_ctx, child); + child->perf_counter_ctxp = child_ctx; + /* * Lock the parent list. No need to lock the child - not PID * hashed yet and not running, so nobody can access it. -- cgit v1.2.3-70-g09d2 From 564c2b210add41df9a3a5aaa365c1d97cff6110d Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 22 May 2009 14:27:22 +1000 Subject: perf_counter: Optimize context switch between identical inherited contexts When monitoring a process and its descendants with a set of inherited counters, we can often get the situation in a context switch where both the old (outgoing) and new (incoming) process have the same set of counters, and their values are ultimately going to be added together. In that situation it doesn't matter which set of counters are used to count the activity for the new process, so there is really no need to go through the process of reading the hardware counters and updating the old task's counters and then setting up the PMU for the new task. This optimizes the context switch in this situation. Instead of scheduling out the perf_counter_context for the old task and scheduling in the new context, we simply transfer the old context to the new task and keep using it without interruption. The new context gets transferred to the old task. This means that both tasks still have a valid perf_counter_context, so no special case is introduced when the old task gets scheduled in again, either on this CPU or another CPU. The equivalence of contexts is detected by keeping a pointer in each cloned context pointing to the context it was cloned from. To cope with the situation where a context is changed by adding or removing counters after it has been cloned, we also keep a generation number on each context which is incremented every time a context is changed. When a context is cloned we take a copy of the parent's generation number, and two cloned contexts are equivalent only if they have the same parent and the same generation number. In order that the parent context pointer remains valid (and is not reused), we increment the parent context's reference count for each context cloned from it. Since we don't have individual fds for the counters in a cloned context, the only thing that can make two clones of a given parent different after they have been cloned is enabling or disabling all counters with prctl. To account for this, we keep a count of the number of enabled counters in each context. Two contexts must have the same number of enabled counters to be considered equivalent. Here are some measurements of the context switch time as measured with the lat_ctx benchmark from lmbench, comparing the times obtained with and without this patch series: -----Unmodified----- With this patch series Counters: none 2 HW 4H+4S none 2 HW 4H+4S 2 processes: Average 3.44 6.45 11.24 3.12 3.39 3.60 St dev 0.04 0.04 0.13 0.05 0.17 0.19 8 processes: Average 6.45 8.79 14.00 5.57 6.23 7.57 St dev 1.27 1.04 0.88 1.42 1.46 1.42 32 processes: Average 5.56 8.43 13.78 5.28 5.55 7.15 St dev 0.41 0.47 0.53 0.54 0.57 0.81 The numbers are the mean and standard deviation of 20 runs of lat_ctx. The "none" columns are lat_ctx run directly without any counters. The "2 HW" columns are with lat_ctx run under perfstat, counting cycles and instructions. The "4H+4S" columns are lat_ctx run under perfstat with 4 hardware counters and 4 software counters (cycles, instructions, cache references, cache misses, task clock, context switch, cpu migrations, and page faults). [ Impact: performance optimization of counter context-switches ] Signed-off-by: Paul Mackerras Acked-by: Peter Zijlstra Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo LKML-Reference: <18966.10666.517218.332164@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 12 ++++- kernel/perf_counter.c | 109 +++++++++++++++++++++++++++++++++++++------ kernel/sched.c | 2 +- 3 files changed, 107 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 07130900546..4cae01a5045 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -513,6 +513,7 @@ struct perf_counter_context { struct list_head event_list; int nr_counters; int nr_active; + int nr_enabled; int is_active; atomic_t refcount; struct task_struct *task; @@ -522,6 +523,14 @@ struct perf_counter_context { */ u64 time; u64 timestamp; + + /* + * These fields let us detect when two contexts have both + * been cloned (inherited) from a common ancestor. + */ + struct perf_counter_context *parent_ctx; + u32 parent_gen; + u32 generation; }; /** @@ -552,7 +561,8 @@ extern int perf_max_counters; extern const struct pmu *hw_perf_counter_init(struct perf_counter *counter); extern void perf_counter_task_sched_in(struct task_struct *task, int cpu); -extern void perf_counter_task_sched_out(struct task_struct *task, int cpu); +extern void perf_counter_task_sched_out(struct task_struct *task, + struct task_struct *next, int cpu); extern void perf_counter_task_tick(struct task_struct *task, int cpu); extern void perf_counter_init_task(struct task_struct *child); extern void perf_counter_exit_task(struct task_struct *child); diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 06ea3eae886..c10055416de 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -104,8 +104,11 @@ static void get_ctx(struct perf_counter_context *ctx) static void put_ctx(struct perf_counter_context *ctx) { - if (atomic_dec_and_test(&ctx->refcount)) + if (atomic_dec_and_test(&ctx->refcount)) { + if (ctx->parent_ctx) + put_ctx(ctx->parent_ctx); kfree(ctx); + } } static void @@ -127,6 +130,8 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) list_add_rcu(&counter->event_entry, &ctx->event_list); ctx->nr_counters++; + if (counter->state >= PERF_COUNTER_STATE_INACTIVE) + ctx->nr_enabled++; } /* @@ -141,6 +146,8 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx) if (list_empty(&counter->list_entry)) return; ctx->nr_counters--; + if (counter->state >= PERF_COUNTER_STATE_INACTIVE) + ctx->nr_enabled--; list_del_init(&counter->list_entry); list_del_rcu(&counter->event_entry); @@ -203,6 +210,22 @@ group_sched_out(struct perf_counter *group_counter, cpuctx->exclusive = 0; } +/* + * Mark this context as not being a clone of another. + * Called when counters are added to or removed from this context. + * We also increment our generation number so that anything that + * was cloned from this context before this will not match anything + * cloned from this context after this. + */ +static void unclone_ctx(struct perf_counter_context *ctx) +{ + ++ctx->generation; + if (!ctx->parent_ctx) + return; + put_ctx(ctx->parent_ctx); + ctx->parent_ctx = NULL; +} + /* * Cross CPU call to remove a performance counter * @@ -263,6 +286,7 @@ static void perf_counter_remove_from_context(struct perf_counter *counter) struct perf_counter_context *ctx = counter->ctx; struct task_struct *task = ctx->task; + unclone_ctx(ctx); if (!task) { /* * Per cpu counters are removed via an smp call and @@ -378,6 +402,7 @@ static void __perf_counter_disable(void *info) else counter_sched_out(counter, cpuctx, ctx); counter->state = PERF_COUNTER_STATE_OFF; + ctx->nr_enabled--; } spin_unlock_irqrestore(&ctx->lock, flags); @@ -419,6 +444,7 @@ static void perf_counter_disable(struct perf_counter *counter) if (counter->state == PERF_COUNTER_STATE_INACTIVE) { update_counter_times(counter); counter->state = PERF_COUNTER_STATE_OFF; + ctx->nr_enabled--; } spin_unlock_irq(&ctx->lock); @@ -727,6 +753,7 @@ static void __perf_counter_enable(void *info) goto unlock; counter->state = PERF_COUNTER_STATE_INACTIVE; counter->tstamp_enabled = ctx->time - counter->total_time_enabled; + ctx->nr_enabled++; /* * If the counter is in a group and isn't the group leader, @@ -817,6 +844,7 @@ static void perf_counter_enable(struct perf_counter *counter) counter->state = PERF_COUNTER_STATE_INACTIVE; counter->tstamp_enabled = ctx->time - counter->total_time_enabled; + ctx->nr_enabled++; } out: spin_unlock_irq(&ctx->lock); @@ -861,6 +889,25 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx, spin_unlock(&ctx->lock); } +/* + * Test whether two contexts are equivalent, i.e. whether they + * have both been cloned from the same version of the same context + * and they both have the same number of enabled counters. + * If the number of enabled counters is the same, then the set + * of enabled counters should be the same, because these are both + * inherited contexts, therefore we can't access individual counters + * in them directly with an fd; we can only enable/disable all + * counters via prctl, or enable/disable all counters in a family + * via ioctl, which will have the same effect on both contexts. + */ +static int context_equiv(struct perf_counter_context *ctx1, + struct perf_counter_context *ctx2) +{ + return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx + && ctx1->parent_gen == ctx2->parent_gen + && ctx1->nr_enabled == ctx2->nr_enabled; +} + /* * Called from scheduler to remove the counters of the current task, * with interrupts disabled. @@ -872,10 +919,12 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx, * accessing the counter control register. If a NMI hits, then it will * not restart the counter. */ -void perf_counter_task_sched_out(struct task_struct *task, int cpu) +void perf_counter_task_sched_out(struct task_struct *task, + struct task_struct *next, int cpu) { struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); struct perf_counter_context *ctx = task->perf_counter_ctxp; + struct perf_counter_context *next_ctx; struct pt_regs *regs; if (likely(!ctx || !cpuctx->task_ctx)) @@ -885,6 +934,16 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu) regs = task_pt_regs(task); perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs, 0); + + next_ctx = next->perf_counter_ctxp; + if (next_ctx && context_equiv(ctx, next_ctx)) { + task->perf_counter_ctxp = next_ctx; + next->perf_counter_ctxp = ctx; + ctx->task = next; + next_ctx->task = task; + return; + } + __perf_counter_sched_out(ctx, cpuctx); cpuctx->task_ctx = NULL; @@ -998,6 +1057,8 @@ void perf_counter_task_sched_in(struct task_struct *task, int cpu) if (likely(!ctx)) return; + if (cpuctx->task_ctx == ctx) + return; __perf_counter_sched_in(ctx, cpuctx, cpu); cpuctx->task_ctx = ctx; } @@ -3252,6 +3313,16 @@ inherit_counter(struct perf_counter *parent_counter, if (IS_ERR(child_counter)) return child_counter; + /* + * Make the child state follow the state of the parent counter, + * not its hw_event.disabled bit. We hold the parent's mutex, + * so we won't race with perf_counter_{en,dis}able_family. + */ + if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE) + child_counter->state = PERF_COUNTER_STATE_INACTIVE; + else + child_counter->state = PERF_COUNTER_STATE_OFF; + /* * Link it up in the child's context: */ @@ -3277,16 +3348,6 @@ inherit_counter(struct perf_counter *parent_counter, mutex_lock(&parent_counter->mutex); list_add_tail(&child_counter->child_list, &parent_counter->child_list); - /* - * Make the child state follow the state of the parent counter, - * not its hw_event.disabled bit. We hold the parent's mutex, - * so we won't race with perf_counter_{en,dis}able_family. - */ - if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE) - child_counter->state = PERF_COUNTER_STATE_INACTIVE; - else - child_counter->state = PERF_COUNTER_STATE_OFF; - mutex_unlock(&parent_counter->mutex); return child_counter; @@ -3429,6 +3490,7 @@ void perf_counter_init_task(struct task_struct *child) struct perf_counter_context *child_ctx, *parent_ctx; struct perf_counter *counter; struct task_struct *parent = current; + int inherited_all = 1; child->perf_counter_ctxp = NULL; @@ -3463,12 +3525,31 @@ void perf_counter_init_task(struct task_struct *child) if (counter != counter->group_leader) continue; - if (!counter->hw_event.inherit) + if (!counter->hw_event.inherit) { + inherited_all = 0; continue; + } if (inherit_group(counter, parent, - parent_ctx, child, child_ctx)) + parent_ctx, child, child_ctx)) { + inherited_all = 0; break; + } + } + + if (inherited_all) { + /* + * Mark the child context as a clone of the parent + * context, or of whatever the parent is a clone of. + */ + if (parent_ctx->parent_ctx) { + child_ctx->parent_ctx = parent_ctx->parent_ctx; + child_ctx->parent_gen = parent_ctx->parent_gen; + } else { + child_ctx->parent_ctx = parent_ctx; + child_ctx->parent_gen = parent_ctx->generation; + } + get_ctx(child_ctx->parent_ctx); } mutex_unlock(&parent_ctx->mutex); diff --git a/kernel/sched.c b/kernel/sched.c index 419a39d0988..4c0d58bce6b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5091,7 +5091,7 @@ need_resched_nonpreemptible: if (likely(prev != next)) { sched_info_switch(prev, next); - perf_counter_task_sched_out(prev, cpu); + perf_counter_task_sched_out(prev, next, cpu); rq->nr_switches++; rq->curr = next; -- cgit v1.2.3-70-g09d2 From 910431c7f2e963017d767b29c80ae706421e569f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 22 May 2009 12:32:15 +0200 Subject: perf_counter: fix !PERF_COUNTERS build failure Update the !CONFIG_PERF_COUNTERS prototype too, for perf_counter_task_sched_out(). [ Impact: build fix ] Signed-off-by: Paul Mackerras Acked-by: Peter Zijlstra Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo LKML-Reference: <18966.10666.517218.332164@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 4cae01a5045..2eedae8498d 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -625,7 +625,8 @@ extern void perf_counter_init(void); static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } static inline void -perf_counter_task_sched_out(struct task_struct *task, int cpu) { } +perf_counter_task_sched_out(struct task_struct *task, + struct task_struct *next, int cpu) { } static inline void perf_counter_task_tick(struct task_struct *task, int cpu) { } static inline void perf_counter_init_task(struct task_struct *child) { } -- cgit v1.2.3-70-g09d2 From 86ed3669f068b514ab85ffd548456a342b3fb8d3 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 22 May 2009 15:01:19 +0100 Subject: ASoC: WM9081 mono DAC with integrated 2.6W class AB/D amplifier driver The WM9081 is designed to provide high power output at low distortion levels in space-constrained portable applications. Signed-off-by: Mark Brown --- include/sound/wm9081.h | 25 + sound/soc/codecs/Kconfig | 4 + sound/soc/codecs/Makefile | 2 + sound/soc/codecs/wm9081.c | 1532 +++++++++++++++++++++++++++++++++++++++++++++ sound/soc/codecs/wm9081.h | 787 +++++++++++++++++++++++ 5 files changed, 2350 insertions(+) create mode 100644 include/sound/wm9081.h create mode 100644 sound/soc/codecs/wm9081.c create mode 100644 sound/soc/codecs/wm9081.h (limited to 'include') diff --git a/include/sound/wm9081.h b/include/sound/wm9081.h new file mode 100644 index 00000000000..e173ddbf6bd --- /dev/null +++ b/include/sound/wm9081.h @@ -0,0 +1,25 @@ +/* + * linux/sound/wm9081.h -- Platform data for WM9081 + * + * Copyright 2009 Wolfson Microelectronics. PLC. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __LINUX_SND_WM_9081_H +#define __LINUX_SND_WM_9081_H + +struct wm9081_retune_mobile_setting { + const char *name; + unsigned int rate; + u16 config[20]; +}; + +struct wm9081_retune_mobile_config { + struct wm9081_retune_mobile_setting *configs; + int num_configs; +}; + +#endif diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index 1c19ad54a9f..7f78b65fc4e 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -40,6 +40,7 @@ config SND_SOC_ALL_CODECS select SND_SOC_WM8971 if I2C select SND_SOC_WM8988 if SND_SOC_I2C_AND_SPI select SND_SOC_WM8990 if I2C + select SND_SOC_WM9081 if I2C select SND_SOC_WM9705 if SND_SOC_AC97_BUS select SND_SOC_WM9712 if SND_SOC_AC97_BUS select SND_SOC_WM9713 if SND_SOC_AC97_BUS @@ -156,6 +157,9 @@ config SND_SOC_WM8988 config SND_SOC_WM8990 tristate +config SND_SOC_WM9081 + tristate + config SND_SOC_WM9705 tristate diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile index 3d31b6bea83..70c55fa2c43 100644 --- a/sound/soc/codecs/Makefile +++ b/sound/soc/codecs/Makefile @@ -28,6 +28,7 @@ snd-soc-wm8960-objs := wm8960.o snd-soc-wm8971-objs := wm8971.o snd-soc-wm8988-objs := wm8988.o snd-soc-wm8990-objs := wm8990.o +snd-soc-wm9081-objs := wm9081.o snd-soc-wm9705-objs := wm9705.o snd-soc-wm9712-objs := wm9712.o snd-soc-wm9713-objs := wm9713.o @@ -62,6 +63,7 @@ obj-$(CONFIG_SND_SOC_WM8940) += snd-soc-wm8940.o obj-$(CONFIG_SND_SOC_WM8960) += snd-soc-wm8960.o obj-$(CONFIG_SND_SOC_WM8988) += snd-soc-wm8988.o obj-$(CONFIG_SND_SOC_WM8990) += snd-soc-wm8990.o +obj-$(CONFIG_SND_SOC_WM9081) += snd-soc-wm9081.o obj-$(CONFIG_SND_SOC_WM9705) += snd-soc-wm9705.o obj-$(CONFIG_SND_SOC_WM9712) += snd-soc-wm9712.o obj-$(CONFIG_SND_SOC_WM9713) += snd-soc-wm9713.o diff --git a/sound/soc/codecs/wm9081.c b/sound/soc/codecs/wm9081.c new file mode 100644 index 00000000000..83e3148c258 --- /dev/null +++ b/sound/soc/codecs/wm9081.c @@ -0,0 +1,1532 @@ +/* + * wm9081.c -- WM9081 ALSA SoC Audio driver + * + * Author: Mark Brown + * + * Copyright 2009 Wolfson Microelectronics plc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "wm9081.h" + +static u16 wm9081_reg_defaults[] = { + 0x0000, /* R0 - Software Reset */ + 0x0000, /* R1 */ + 0x00B9, /* R2 - Analogue Lineout */ + 0x00B9, /* R3 - Analogue Speaker PGA */ + 0x0001, /* R4 - VMID Control */ + 0x0068, /* R5 - Bias Control 1 */ + 0x0000, /* R6 */ + 0x0000, /* R7 - Analogue Mixer */ + 0x0000, /* R8 - Anti Pop Control */ + 0x01DB, /* R9 - Analogue Speaker 1 */ + 0x0018, /* R10 - Analogue Speaker 2 */ + 0x0180, /* R11 - Power Management */ + 0x0000, /* R12 - Clock Control 1 */ + 0x0038, /* R13 - Clock Control 2 */ + 0x4000, /* R14 - Clock Control 3 */ + 0x0000, /* R15 */ + 0x0000, /* R16 - FLL Control 1 */ + 0x0200, /* R17 - FLL Control 2 */ + 0x0000, /* R18 - FLL Control 3 */ + 0x0204, /* R19 - FLL Control 4 */ + 0x0000, /* R20 - FLL Control 5 */ + 0x0000, /* R21 */ + 0x0000, /* R22 - Audio Interface 1 */ + 0x0002, /* R23 - Audio Interface 2 */ + 0x0008, /* R24 - Audio Interface 3 */ + 0x0022, /* R25 - Audio Interface 4 */ + 0x0000, /* R26 - Interrupt Status */ + 0x0006, /* R27 - Interrupt Status Mask */ + 0x0000, /* R28 - Interrupt Polarity */ + 0x0000, /* R29 - Interrupt Control */ + 0x00C0, /* R30 - DAC Digital 1 */ + 0x0008, /* R31 - DAC Digital 2 */ + 0x09AF, /* R32 - DRC 1 */ + 0x4201, /* R33 - DRC 2 */ + 0x0000, /* R34 - DRC 3 */ + 0x0000, /* R35 - DRC 4 */ + 0x0000, /* R36 */ + 0x0000, /* R37 */ + 0x0000, /* R38 - Write Sequencer 1 */ + 0x0000, /* R39 - Write Sequencer 2 */ + 0x0002, /* R40 - MW Slave 1 */ + 0x0000, /* R41 */ + 0x0000, /* R42 - EQ 1 */ + 0x0000, /* R43 - EQ 2 */ + 0x0FCA, /* R44 - EQ 3 */ + 0x0400, /* R45 - EQ 4 */ + 0x00B8, /* R46 - EQ 5 */ + 0x1EB5, /* R47 - EQ 6 */ + 0xF145, /* R48 - EQ 7 */ + 0x0B75, /* R49 - EQ 8 */ + 0x01C5, /* R50 - EQ 9 */ + 0x169E, /* R51 - EQ 10 */ + 0xF829, /* R52 - EQ 11 */ + 0x07AD, /* R53 - EQ 12 */ + 0x1103, /* R54 - EQ 13 */ + 0x1C58, /* R55 - EQ 14 */ + 0xF373, /* R56 - EQ 15 */ + 0x0A54, /* R57 - EQ 16 */ + 0x0558, /* R58 - EQ 17 */ + 0x0564, /* R59 - EQ 18 */ + 0x0559, /* R60 - EQ 19 */ + 0x4000, /* R61 - EQ 20 */ +}; + +static struct { + int ratio; + int clk_sys_rate; +} clk_sys_rates[] = { + { 64, 0 }, + { 128, 1 }, + { 192, 2 }, + { 256, 3 }, + { 384, 4 }, + { 512, 5 }, + { 768, 6 }, + { 1024, 7 }, + { 1408, 8 }, + { 1536, 9 }, +}; + +static struct { + int rate; + int sample_rate; +} sample_rates[] = { + { 8000, 0 }, + { 11025, 1 }, + { 12000, 2 }, + { 16000, 3 }, + { 22050, 4 }, + { 24000, 5 }, + { 32000, 6 }, + { 44100, 7 }, + { 48000, 8 }, + { 88200, 9 }, + { 96000, 10 }, +}; + +static struct { + int div; /* *10 due to .5s */ + int bclk_div; +} bclk_divs[] = { + { 10, 0 }, + { 15, 1 }, + { 20, 2 }, + { 30, 3 }, + { 40, 4 }, + { 50, 5 }, + { 55, 6 }, + { 60, 7 }, + { 80, 8 }, + { 100, 9 }, + { 110, 10 }, + { 120, 11 }, + { 160, 12 }, + { 200, 13 }, + { 220, 14 }, + { 240, 15 }, + { 250, 16 }, + { 300, 17 }, + { 320, 18 }, + { 440, 19 }, + { 480, 20 }, +}; + +struct wm9081_priv { + struct snd_soc_codec codec; + u16 reg_cache[WM9081_MAX_REGISTER + 1]; + int sysclk_source; + int mclk_rate; + int sysclk_rate; + int fs; + int bclk; + int master; + int fll_fref; + int fll_fout; + struct wm9081_retune_mobile_config *retune; +}; + +static int wm9081_reg_is_volatile(int reg) +{ + switch (reg) { + default: + return 0; + } +} + +static unsigned int wm9081_read_reg_cache(struct snd_soc_codec *codec, + unsigned int reg) +{ + u16 *cache = codec->reg_cache; + BUG_ON(reg > WM9081_MAX_REGISTER); + return cache[reg]; +} + +static unsigned int wm9081_read_hw(struct snd_soc_codec *codec, u8 reg) +{ + struct i2c_msg xfer[2]; + u16 data; + int ret; + struct i2c_client *client = codec->control_data; + + BUG_ON(reg > WM9081_MAX_REGISTER); + + /* Write register */ + xfer[0].addr = client->addr; + xfer[0].flags = 0; + xfer[0].len = 1; + xfer[0].buf = ® + + /* Read data */ + xfer[1].addr = client->addr; + xfer[1].flags = I2C_M_RD; + xfer[1].len = 2; + xfer[1].buf = (u8 *)&data; + + ret = i2c_transfer(client->adapter, xfer, 2); + if (ret != 2) { + dev_err(&client->dev, "i2c_transfer() returned %d\n", ret); + return 0; + } + + return (data >> 8) | ((data & 0xff) << 8); +} + +static unsigned int wm9081_read(struct snd_soc_codec *codec, unsigned int reg) +{ + if (wm9081_reg_is_volatile(reg)) + return wm9081_read_hw(codec, reg); + else + return wm9081_read_reg_cache(codec, reg); +} + +static int wm9081_write(struct snd_soc_codec *codec, unsigned int reg, + unsigned int value) +{ + u16 *cache = codec->reg_cache; + u8 data[3]; + + BUG_ON(reg > WM9081_MAX_REGISTER); + + if (!wm9081_reg_is_volatile(reg)) + cache[reg] = value; + + data[0] = reg; + data[1] = value >> 8; + data[2] = value & 0x00ff; + + if (codec->hw_write(codec->control_data, data, 3) == 3) + return 0; + else + return -EIO; +} + +static int wm9081_reset(struct snd_soc_codec *codec) +{ + return wm9081_write(codec, WM9081_SOFTWARE_RESET, 0); +} + +static const DECLARE_TLV_DB_SCALE(drc_in_tlv, -4500, 75, 0); +static const DECLARE_TLV_DB_SCALE(drc_out_tlv, -2250, 75, 0); +static const DECLARE_TLV_DB_SCALE(drc_min_tlv, -1800, 600, 0); +static unsigned int drc_max_tlv[] = { + TLV_DB_RANGE_HEAD(4), + 0, 0, TLV_DB_SCALE_ITEM(1200, 0, 0), + 1, 1, TLV_DB_SCALE_ITEM(1800, 0, 0), + 2, 2, TLV_DB_SCALE_ITEM(2400, 0, 0), + 3, 3, TLV_DB_SCALE_ITEM(3600, 0, 0), +}; +static const DECLARE_TLV_DB_SCALE(drc_qr_tlv, 1200, 600, 0); +static const DECLARE_TLV_DB_SCALE(drc_startup_tlv, -300, 50, 0); + +static const DECLARE_TLV_DB_SCALE(eq_tlv, -1200, 100, 0); + +static const DECLARE_TLV_DB_SCALE(in_tlv, -600, 600, 0); +static const DECLARE_TLV_DB_SCALE(dac_tlv, -7200, 75, 1); +static const DECLARE_TLV_DB_SCALE(out_tlv, -5700, 100, 0); + +static const char *drc_high_text[] = { + "1", + "1/2", + "1/4", + "1/8", + "1/16", + "0", +}; + +static const struct soc_enum drc_high = + SOC_ENUM_SINGLE(WM9081_DRC_3, 3, 6, drc_high_text); + +static const char *drc_low_text[] = { + "1", + "1/2", + "1/4", + "1/8", + "0", +}; + +static const struct soc_enum drc_low = + SOC_ENUM_SINGLE(WM9081_DRC_3, 0, 5, drc_low_text); + +static const char *drc_atk_text[] = { + "181us", + "181us", + "363us", + "726us", + "1.45ms", + "2.9ms", + "5.8ms", + "11.6ms", + "23.2ms", + "46.4ms", + "92.8ms", + "185.6ms", +}; + +static const struct soc_enum drc_atk = + SOC_ENUM_SINGLE(WM9081_DRC_2, 12, 12, drc_atk_text); + +static const char *drc_dcy_text[] = { + "186ms", + "372ms", + "743ms", + "1.49s", + "2.97s", + "5.94s", + "11.89s", + "23.78s", + "47.56s", +}; + +static const struct soc_enum drc_dcy = + SOC_ENUM_SINGLE(WM9081_DRC_2, 8, 9, drc_dcy_text); + +static const char *drc_qr_dcy_text[] = { + "0.725ms", + "1.45ms", + "5.8ms", +}; + +static const struct soc_enum drc_qr_dcy = + SOC_ENUM_SINGLE(WM9081_DRC_2, 4, 3, drc_qr_dcy_text); + +static const char *dac_deemph_text[] = { + "None", + "32kHz", + "44.1kHz", + "48kHz", +}; + +static const struct soc_enum dac_deemph = + SOC_ENUM_SINGLE(WM9081_DAC_DIGITAL_2, 1, 4, dac_deemph_text); + +static const char *speaker_mode_text[] = { + "Class D", + "Class AB", +}; + +static const struct soc_enum speaker_mode = + SOC_ENUM_SINGLE(WM9081_ANALOGUE_SPEAKER_2, 6, 2, speaker_mode_text); + +static int speaker_mode_get(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); + unsigned int reg; + + reg = wm9081_read(codec, WM9081_ANALOGUE_SPEAKER_2); + if (reg & WM9081_SPK_MODE) + ucontrol->value.integer.value[0] = 1; + else + ucontrol->value.integer.value[0] = 0; + + return 0; +} + +/* + * Stop any attempts to change speaker mode while the speaker is enabled. + * + * We also have some special anti-pop controls dependant on speaker + * mode which must be changed along with the mode. + */ +static int speaker_mode_put(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); + unsigned int reg_pwr = wm9081_read(codec, WM9081_POWER_MANAGEMENT); + unsigned int reg2 = wm9081_read(codec, WM9081_ANALOGUE_SPEAKER_2); + + /* Are we changing anything? */ + if (ucontrol->value.integer.value[0] == + ((reg2 & WM9081_SPK_MODE) != 0)) + return 0; + + /* Don't try to change modes while enabled */ + if (reg_pwr & WM9081_SPK_ENA) + return -EINVAL; + + if (ucontrol->value.integer.value[0]) { + /* Class AB */ + reg2 &= ~(WM9081_SPK_INV_MUTE | WM9081_OUT_SPK_CTRL); + reg2 |= WM9081_SPK_MODE; + } else { + /* Class D */ + reg2 |= WM9081_SPK_INV_MUTE | WM9081_OUT_SPK_CTRL; + reg2 &= ~WM9081_SPK_MODE; + } + + wm9081_write(codec, WM9081_ANALOGUE_SPEAKER_2, reg2); + + return 0; +} + +static const struct snd_kcontrol_new wm9081_snd_controls[] = { +SOC_SINGLE_TLV("IN1 Volume", WM9081_ANALOGUE_MIXER, 1, 1, 1, in_tlv), +SOC_SINGLE_TLV("IN2 Volume", WM9081_ANALOGUE_MIXER, 3, 1, 1, in_tlv), + +SOC_SINGLE_TLV("Playback Volume", WM9081_DAC_DIGITAL_1, 1, 96, 0, dac_tlv), + +SOC_SINGLE("LINEOUT Switch", WM9081_ANALOGUE_LINEOUT, 7, 1, 1), +SOC_SINGLE("LINEOUT ZC Switch", WM9081_ANALOGUE_LINEOUT, 6, 1, 0), +SOC_SINGLE_TLV("LINEOUT Volume", WM9081_ANALOGUE_LINEOUT, 0, 63, 0, out_tlv), + +SOC_SINGLE("DRC Switch", WM9081_DRC_1, 15, 1, 0), +SOC_ENUM("DRC High Slope", drc_high), +SOC_ENUM("DRC Low Slope", drc_low), +SOC_SINGLE_TLV("DRC Input Volume", WM9081_DRC_4, 5, 60, 1, drc_in_tlv), +SOC_SINGLE_TLV("DRC Output Volume", WM9081_DRC_4, 0, 30, 1, drc_out_tlv), +SOC_SINGLE_TLV("DRC Minimum Volume", WM9081_DRC_2, 2, 3, 1, drc_min_tlv), +SOC_SINGLE_TLV("DRC Maximum Volume", WM9081_DRC_2, 0, 3, 0, drc_max_tlv), +SOC_ENUM("DRC Attack", drc_atk), +SOC_ENUM("DRC Decay", drc_dcy), +SOC_SINGLE("DRC Quick Release Switch", WM9081_DRC_1, 2, 1, 0), +SOC_SINGLE_TLV("DRC Quick Release Volume", WM9081_DRC_2, 6, 3, 0, drc_qr_tlv), +SOC_ENUM("DRC Quick Release Decay", drc_qr_dcy), +SOC_SINGLE_TLV("DRC Startup Volume", WM9081_DRC_1, 6, 18, 0, drc_startup_tlv), + +SOC_SINGLE("EQ Switch", WM9081_EQ_1, 0, 1, 0), + +SOC_SINGLE("Speaker DC Volume", WM9081_ANALOGUE_SPEAKER_1, 3, 5, 0), +SOC_SINGLE("Speaker AC Volume", WM9081_ANALOGUE_SPEAKER_1, 0, 5, 0), +SOC_SINGLE("Speaker Switch", WM9081_ANALOGUE_SPEAKER_PGA, 7, 1, 1), +SOC_SINGLE("Speaker ZC Switch", WM9081_ANALOGUE_SPEAKER_PGA, 6, 1, 0), +SOC_SINGLE_TLV("Speaker Volume", WM9081_ANALOGUE_SPEAKER_PGA, 0, 63, 0, + out_tlv), +SOC_ENUM("DAC Deemphasis", dac_deemph), +SOC_ENUM_EXT("Speaker Mode", speaker_mode, speaker_mode_get, speaker_mode_put), +}; + +static const struct snd_kcontrol_new wm9081_eq_controls[] = { +SOC_SINGLE_TLV("EQ1 Volume", WM9081_EQ_1, 11, 24, 0, eq_tlv), +SOC_SINGLE_TLV("EQ2 Volume", WM9081_EQ_1, 6, 24, 0, eq_tlv), +SOC_SINGLE_TLV("EQ3 Volume", WM9081_EQ_1, 1, 24, 0, eq_tlv), +SOC_SINGLE_TLV("EQ4 Volume", WM9081_EQ_2, 11, 24, 0, eq_tlv), +SOC_SINGLE_TLV("EQ5 Volume", WM9081_EQ_2, 6, 24, 0, eq_tlv), +}; + +static const struct snd_kcontrol_new mixer[] = { +SOC_DAPM_SINGLE("IN1 Switch", WM9081_ANALOGUE_MIXER, 0, 1, 0), +SOC_DAPM_SINGLE("IN2 Switch", WM9081_ANALOGUE_MIXER, 2, 1, 0), +SOC_DAPM_SINGLE("Playback Switch", WM9081_ANALOGUE_MIXER, 4, 1, 0), +}; + +static int speaker_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + struct snd_soc_codec *codec = w->codec; + unsigned int reg = wm9081_read(codec, WM9081_POWER_MANAGEMENT); + + switch (event) { + case SND_SOC_DAPM_POST_PMU: + reg |= WM9081_SPK_ENA; + break; + + case SND_SOC_DAPM_PRE_PMD: + reg &= ~WM9081_SPK_ENA; + break; + } + + wm9081_write(codec, WM9081_POWER_MANAGEMENT, reg); + + return 0; +} + +struct _fll_div { + u16 fll_fratio; + u16 fll_outdiv; + u16 fll_clk_ref_div; + u16 n; + u16 k; +}; + +/* The size in bits of the FLL divide multiplied by 10 + * to allow rounding later */ +#define FIXED_FLL_SIZE ((1 << 16) * 10) + +static struct { + unsigned int min; + unsigned int max; + u16 fll_fratio; + int ratio; +} fll_fratios[] = { + { 0, 64000, 4, 16 }, + { 64000, 128000, 3, 8 }, + { 128000, 256000, 2, 4 }, + { 256000, 1000000, 1, 2 }, + { 1000000, 13500000, 0, 1 }, +}; + +static int fll_factors(struct _fll_div *fll_div, unsigned int Fref, + unsigned int Fout) +{ + u64 Kpart; + unsigned int K, Ndiv, Nmod, target; + unsigned int div; + int i; + + /* Fref must be <=13.5MHz */ + div = 1; + while ((Fref / div) > 13500000) { + div *= 2; + + if (div > 8) { + pr_err("Can't scale %dMHz input down to <=13.5MHz\n", + Fref); + return -EINVAL; + } + } + fll_div->fll_clk_ref_div = div / 2; + + pr_debug("Fref=%u Fout=%u\n", Fref, Fout); + + /* Apply the division for our remaining calculations */ + Fref /= div; + + /* Fvco should be 90-100MHz; don't check the upper bound */ + div = 0; + target = Fout * 2; + while (target < 90000000) { + div++; + target *= 2; + if (div > 7) { + pr_err("Unable to find FLL_OUTDIV for Fout=%uHz\n", + Fout); + return -EINVAL; + } + } + fll_div->fll_outdiv = div; + + pr_debug("Fvco=%dHz\n", target); + + /* Find an appropraite FLL_FRATIO and factor it out of the target */ + for (i = 0; i < ARRAY_SIZE(fll_fratios); i++) { + if (fll_fratios[i].min <= Fref && Fref <= fll_fratios[i].max) { + fll_div->fll_fratio = fll_fratios[i].fll_fratio; + target /= fll_fratios[i].ratio; + break; + } + } + if (i == ARRAY_SIZE(fll_fratios)) { + pr_err("Unable to find FLL_FRATIO for Fref=%uHz\n", Fref); + return -EINVAL; + } + + /* Now, calculate N.K */ + Ndiv = target / Fref; + + fll_div->n = Ndiv; + Nmod = target % Fref; + pr_debug("Nmod=%d\n", Nmod); + + /* Calculate fractional part - scale up so we can round. */ + Kpart = FIXED_FLL_SIZE * (long long)Nmod; + + do_div(Kpart, Fref); + + K = Kpart & 0xFFFFFFFF; + + if ((K % 10) >= 5) + K += 5; + + /* Move down to proper range now rounding is done */ + fll_div->k = K / 10; + + pr_debug("N=%x K=%x FLL_FRATIO=%x FLL_OUTDIV=%x FLL_CLK_REF_DIV=%x\n", + fll_div->n, fll_div->k, + fll_div->fll_fratio, fll_div->fll_outdiv, + fll_div->fll_clk_ref_div); + + return 0; +} + +static int wm9081_set_fll(struct snd_soc_codec *codec, int fll_id, + unsigned int Fref, unsigned int Fout) +{ + struct wm9081_priv *wm9081 = codec->private_data; + u16 reg1, reg4, reg5; + struct _fll_div fll_div; + int ret; + int clk_sys_reg; + + /* Any change? */ + if (Fref == wm9081->fll_fref && Fout == wm9081->fll_fout) + return 0; + + /* Disable the FLL */ + if (Fout == 0) { + dev_dbg(codec->dev, "FLL disabled\n"); + wm9081->fll_fref = 0; + wm9081->fll_fout = 0; + + return 0; + } + + ret = fll_factors(&fll_div, Fref, Fout); + if (ret != 0) + return ret; + + reg5 = wm9081_read(codec, WM9081_FLL_CONTROL_5); + reg5 &= ~WM9081_FLL_CLK_SRC_MASK; + + switch (fll_id) { + case WM9081_SYSCLK_FLL_MCLK: + reg5 |= 0x1; + break; + + default: + dev_err(codec->dev, "Unknown FLL ID %d\n", fll_id); + return -EINVAL; + } + + /* Disable CLK_SYS while we reconfigure */ + clk_sys_reg = wm9081_read(codec, WM9081_CLOCK_CONTROL_3); + if (clk_sys_reg & WM9081_CLK_SYS_ENA) + wm9081_write(codec, WM9081_CLOCK_CONTROL_3, + clk_sys_reg & ~WM9081_CLK_SYS_ENA); + + /* Any FLL configuration change requires that the FLL be + * disabled first. */ + reg1 = wm9081_read(codec, WM9081_FLL_CONTROL_1); + reg1 &= ~WM9081_FLL_ENA; + wm9081_write(codec, WM9081_FLL_CONTROL_1, reg1); + + /* Apply the configuration */ + if (fll_div.k) + reg1 |= WM9081_FLL_FRAC_MASK; + else + reg1 &= ~WM9081_FLL_FRAC_MASK; + wm9081_write(codec, WM9081_FLL_CONTROL_1, reg1); + + wm9081_write(codec, WM9081_FLL_CONTROL_2, + (fll_div.fll_outdiv << WM9081_FLL_OUTDIV_SHIFT) | + (fll_div.fll_fratio << WM9081_FLL_FRATIO_SHIFT)); + wm9081_write(codec, WM9081_FLL_CONTROL_3, fll_div.k); + + reg4 = wm9081_read(codec, WM9081_FLL_CONTROL_4); + reg4 &= ~WM9081_FLL_N_MASK; + reg4 |= fll_div.n << WM9081_FLL_N_SHIFT; + wm9081_write(codec, WM9081_FLL_CONTROL_4, reg4); + + reg5 &= ~WM9081_FLL_CLK_REF_DIV_MASK; + reg5 |= fll_div.fll_clk_ref_div << WM9081_FLL_CLK_REF_DIV_SHIFT; + wm9081_write(codec, WM9081_FLL_CONTROL_5, reg5); + + /* Enable the FLL */ + wm9081_write(codec, WM9081_FLL_CONTROL_1, reg1 | WM9081_FLL_ENA); + + /* Then bring CLK_SYS up again if it was disabled */ + if (clk_sys_reg & WM9081_CLK_SYS_ENA) + wm9081_write(codec, WM9081_CLOCK_CONTROL_3, clk_sys_reg); + + dev_dbg(codec->dev, "FLL enabled at %dHz->%dHz\n", Fref, Fout); + + wm9081->fll_fref = Fref; + wm9081->fll_fout = Fout; + + return 0; +} + +static int configure_clock(struct snd_soc_codec *codec) +{ + struct wm9081_priv *wm9081 = codec->private_data; + int new_sysclk, i, target; + unsigned int reg; + int ret = 0; + int mclkdiv = 0; + int fll = 0; + + switch (wm9081->sysclk_source) { + case WM9081_SYSCLK_MCLK: + if (wm9081->mclk_rate > 12225000) { + mclkdiv = 1; + wm9081->sysclk_rate = wm9081->mclk_rate / 2; + } else { + wm9081->sysclk_rate = wm9081->mclk_rate; + } + wm9081_set_fll(codec, WM9081_SYSCLK_FLL_MCLK, 0, 0); + break; + + case WM9081_SYSCLK_FLL_MCLK: + /* If we have a sample rate calculate a CLK_SYS that + * gives us a suitable DAC configuration, plus BCLK. + * Ideally we would check to see if we can clock + * directly from MCLK and only use the FLL if this is + * not the case, though care must be taken with free + * running mode. + */ + if (wm9081->master && wm9081->bclk) { + /* Make sure we can generate CLK_SYS and BCLK + * and that we've got 3MHz for optimal + * performance. */ + for (i = 0; i < ARRAY_SIZE(clk_sys_rates); i++) { + target = wm9081->fs * clk_sys_rates[i].ratio; + if (target >= wm9081->bclk && + target > 3000000) + new_sysclk = target; + } + } else if (wm9081->fs) { + for (i = 0; i < ARRAY_SIZE(clk_sys_rates); i++) { + new_sysclk = clk_sys_rates[i].ratio + * wm9081->fs; + if (new_sysclk > 3000000) + break; + } + } else { + new_sysclk = 12288000; + } + + ret = wm9081_set_fll(codec, WM9081_SYSCLK_FLL_MCLK, + wm9081->mclk_rate, new_sysclk); + if (ret == 0) { + wm9081->sysclk_rate = new_sysclk; + + /* Switch SYSCLK over to FLL */ + fll = 1; + } else { + wm9081->sysclk_rate = wm9081->mclk_rate; + } + break; + + default: + return -EINVAL; + } + + reg = wm9081_read(codec, WM9081_CLOCK_CONTROL_1); + if (mclkdiv) + reg |= WM9081_MCLKDIV2; + else + reg &= ~WM9081_MCLKDIV2; + wm9081_write(codec, WM9081_CLOCK_CONTROL_1, reg); + + reg = wm9081_read(codec, WM9081_CLOCK_CONTROL_3); + if (fll) + reg |= WM9081_CLK_SRC_SEL; + else + reg &= ~WM9081_CLK_SRC_SEL; + wm9081_write(codec, WM9081_CLOCK_CONTROL_3, reg); + + dev_dbg(codec->dev, "CLK_SYS is %dHz\n", wm9081->sysclk_rate); + + return ret; +} + +static int clk_sys_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + struct snd_soc_codec *codec = w->codec; + struct wm9081_priv *wm9081 = codec->private_data; + + /* This should be done on init() for bypass paths */ + switch (wm9081->sysclk_source) { + case WM9081_SYSCLK_MCLK: + dev_dbg(codec->dev, "Using %dHz MCLK\n", wm9081->mclk_rate); + break; + case WM9081_SYSCLK_FLL_MCLK: + dev_dbg(codec->dev, "Using %dHz MCLK with FLL\n", + wm9081->mclk_rate); + break; + default: + dev_err(codec->dev, "System clock not configured\n"); + return -EINVAL; + } + + switch (event) { + case SND_SOC_DAPM_PRE_PMU: + configure_clock(codec); + break; + + case SND_SOC_DAPM_POST_PMD: + /* Disable the FLL if it's running */ + wm9081_set_fll(codec, 0, 0, 0); + break; + } + + return 0; +} + +static const struct snd_soc_dapm_widget wm9081_dapm_widgets[] = { +SND_SOC_DAPM_INPUT("IN1"), +SND_SOC_DAPM_INPUT("IN2"), + +SND_SOC_DAPM_DAC("DAC", "HiFi Playback", WM9081_POWER_MANAGEMENT, 0, 0), + +SND_SOC_DAPM_MIXER_NAMED_CTL("Mixer", SND_SOC_NOPM, 0, 0, + mixer, ARRAY_SIZE(mixer)), + +SND_SOC_DAPM_PGA("LINEOUT PGA", WM9081_POWER_MANAGEMENT, 4, 0, NULL, 0), + +SND_SOC_DAPM_PGA_E("Speaker PGA", WM9081_POWER_MANAGEMENT, 2, 0, NULL, 0, + speaker_event, + SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_PRE_PMD), + +SND_SOC_DAPM_OUTPUT("LINEOUT"), +SND_SOC_DAPM_OUTPUT("SPKN"), +SND_SOC_DAPM_OUTPUT("SPKP"), + +SND_SOC_DAPM_SUPPLY("CLK_SYS", WM9081_CLOCK_CONTROL_3, 0, 0, clk_sys_event, + SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), +SND_SOC_DAPM_SUPPLY("CLK_DSP", WM9081_CLOCK_CONTROL_3, 1, 0, NULL, 0), +SND_SOC_DAPM_SUPPLY("TOCLK", WM9081_CLOCK_CONTROL_3, 2, 0, NULL, 0), +}; + + +static const struct snd_soc_dapm_route audio_paths[] = { + { "DAC", NULL, "CLK_SYS" }, + { "DAC", NULL, "CLK_DSP" }, + + { "Mixer", "IN1 Switch", "IN1" }, + { "Mixer", "IN2 Switch", "IN2" }, + { "Mixer", "Playback Switch", "DAC" }, + + { "LINEOUT PGA", NULL, "Mixer" }, + { "LINEOUT PGA", NULL, "TOCLK" }, + { "LINEOUT PGA", NULL, "CLK_SYS" }, + + { "LINEOUT", NULL, "LINEOUT PGA" }, + + { "Speaker PGA", NULL, "Mixer" }, + { "Speaker PGA", NULL, "TOCLK" }, + { "Speaker PGA", NULL, "CLK_SYS" }, + + { "SPKN", NULL, "Speaker PGA" }, + { "SPKP", NULL, "Speaker PGA" }, +}; + +static int wm9081_set_bias_level(struct snd_soc_codec *codec, + enum snd_soc_bias_level level) +{ + u16 reg; + + switch (level) { + case SND_SOC_BIAS_ON: + break; + + case SND_SOC_BIAS_PREPARE: + /* VMID=2*40k */ + reg = wm9081_read(codec, WM9081_VMID_CONTROL); + reg &= ~WM9081_VMID_SEL_MASK; + reg |= 0x2; + wm9081_write(codec, WM9081_VMID_CONTROL, reg); + + /* Normal bias current */ + reg = wm9081_read(codec, WM9081_BIAS_CONTROL_1); + reg &= ~WM9081_STBY_BIAS_ENA; + wm9081_write(codec, WM9081_BIAS_CONTROL_1, reg); + break; + + case SND_SOC_BIAS_STANDBY: + /* Initial cold start */ + if (codec->bias_level == SND_SOC_BIAS_OFF) { + /* Disable LINEOUT discharge */ + reg = wm9081_read(codec, WM9081_ANTI_POP_CONTROL); + reg &= ~WM9081_LINEOUT_DISCH; + wm9081_write(codec, WM9081_ANTI_POP_CONTROL, reg); + + /* Select startup bias source */ + reg = wm9081_read(codec, WM9081_BIAS_CONTROL_1); + reg |= WM9081_BIAS_SRC | WM9081_BIAS_ENA; + wm9081_write(codec, WM9081_BIAS_CONTROL_1, reg); + + /* VMID 2*4k; Soft VMID ramp enable */ + reg = wm9081_read(codec, WM9081_VMID_CONTROL); + reg |= WM9081_VMID_RAMP | 0x6; + wm9081_write(codec, WM9081_VMID_CONTROL, reg); + + mdelay(100); + + /* Normal bias enable & soft start off */ + reg |= WM9081_BIAS_ENA; + reg &= ~WM9081_VMID_RAMP; + wm9081_write(codec, WM9081_VMID_CONTROL, reg); + + /* Standard bias source */ + reg = wm9081_read(codec, WM9081_BIAS_CONTROL_1); + reg &= ~WM9081_BIAS_SRC; + wm9081_write(codec, WM9081_BIAS_CONTROL_1, reg); + } + + /* VMID 2*240k */ + reg = wm9081_read(codec, WM9081_BIAS_CONTROL_1); + reg &= ~WM9081_VMID_SEL_MASK; + reg |= 0x40; + wm9081_write(codec, WM9081_VMID_CONTROL, reg); + + /* Standby bias current on */ + reg = wm9081_read(codec, WM9081_BIAS_CONTROL_1); + reg |= WM9081_STBY_BIAS_ENA; + wm9081_write(codec, WM9081_BIAS_CONTROL_1, reg); + break; + + case SND_SOC_BIAS_OFF: + /* Startup bias source */ + reg = wm9081_read(codec, WM9081_BIAS_CONTROL_1); + reg |= WM9081_BIAS_SRC; + wm9081_write(codec, WM9081_BIAS_CONTROL_1, reg); + + /* Disable VMID and biases with soft ramping */ + reg = wm9081_read(codec, WM9081_VMID_CONTROL); + reg &= ~(WM9081_VMID_SEL_MASK | WM9081_BIAS_ENA); + reg |= WM9081_VMID_RAMP; + wm9081_write(codec, WM9081_VMID_CONTROL, reg); + + /* Actively discharge LINEOUT */ + reg = wm9081_read(codec, WM9081_ANTI_POP_CONTROL); + reg |= WM9081_LINEOUT_DISCH; + wm9081_write(codec, WM9081_ANTI_POP_CONTROL, reg); + break; + } + + codec->bias_level = level; + + return 0; +} + +static int wm9081_set_dai_fmt(struct snd_soc_dai *dai, + unsigned int fmt) +{ + struct snd_soc_codec *codec = dai->codec; + struct wm9081_priv *wm9081 = codec->private_data; + unsigned int aif2 = wm9081_read(codec, WM9081_AUDIO_INTERFACE_2); + + aif2 &= ~(WM9081_AIF_BCLK_INV | WM9081_AIF_LRCLK_INV | + WM9081_BCLK_DIR | WM9081_LRCLK_DIR | WM9081_AIF_FMT_MASK); + + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBS_CFS: + wm9081->master = 0; + break; + case SND_SOC_DAIFMT_CBS_CFM: + aif2 |= WM9081_LRCLK_DIR; + wm9081->master = 1; + break; + case SND_SOC_DAIFMT_CBM_CFS: + aif2 |= WM9081_BCLK_DIR; + wm9081->master = 1; + break; + case SND_SOC_DAIFMT_CBM_CFM: + aif2 |= WM9081_LRCLK_DIR | WM9081_BCLK_DIR; + wm9081->master = 1; + break; + default: + return -EINVAL; + } + + switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { + case SND_SOC_DAIFMT_DSP_B: + aif2 |= WM9081_AIF_LRCLK_INV; + case SND_SOC_DAIFMT_DSP_A: + aif2 |= 0x3; + break; + case SND_SOC_DAIFMT_I2S: + aif2 |= 0x2; + break; + case SND_SOC_DAIFMT_RIGHT_J: + break; + case SND_SOC_DAIFMT_LEFT_J: + aif2 |= 0x1; + break; + default: + return -EINVAL; + } + + switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { + case SND_SOC_DAIFMT_DSP_A: + case SND_SOC_DAIFMT_DSP_B: + /* frame inversion not valid for DSP modes */ + switch (fmt & SND_SOC_DAIFMT_INV_MASK) { + case SND_SOC_DAIFMT_NB_NF: + break; + case SND_SOC_DAIFMT_IB_NF: + aif2 |= WM9081_AIF_BCLK_INV; + break; + default: + return -EINVAL; + } + break; + + case SND_SOC_DAIFMT_I2S: + case SND_SOC_DAIFMT_RIGHT_J: + case SND_SOC_DAIFMT_LEFT_J: + switch (fmt & SND_SOC_DAIFMT_INV_MASK) { + case SND_SOC_DAIFMT_NB_NF: + break; + case SND_SOC_DAIFMT_IB_IF: + aif2 |= WM9081_AIF_BCLK_INV | WM9081_AIF_LRCLK_INV; + break; + case SND_SOC_DAIFMT_IB_NF: + aif2 |= WM9081_AIF_BCLK_INV; + break; + case SND_SOC_DAIFMT_NB_IF: + aif2 |= WM9081_AIF_LRCLK_INV; + break; + default: + return -EINVAL; + } + break; + default: + return -EINVAL; + } + + wm9081_write(codec, WM9081_AUDIO_INTERFACE_2, aif2); + + return 0; +} + +static int wm9081_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) +{ + struct snd_soc_codec *codec = dai->codec; + struct wm9081_priv *wm9081 = codec->private_data; + int ret, i, best, best_val, cur_val; + unsigned int clk_ctrl2, aif1, aif2, aif3, aif4; + + clk_ctrl2 = wm9081_read(codec, WM9081_CLOCK_CONTROL_2); + clk_ctrl2 &= ~(WM9081_CLK_SYS_RATE_MASK | WM9081_SAMPLE_RATE_MASK); + + aif1 = wm9081_read(codec, WM9081_AUDIO_INTERFACE_1); + + aif2 = wm9081_read(codec, WM9081_AUDIO_INTERFACE_2); + aif2 &= ~WM9081_AIF_WL_MASK; + + aif3 = wm9081_read(codec, WM9081_AUDIO_INTERFACE_3); + aif3 &= ~WM9081_BCLK_DIV_MASK; + + aif4 = wm9081_read(codec, WM9081_AUDIO_INTERFACE_4); + aif4 &= ~WM9081_LRCLK_RATE_MASK; + + /* What BCLK do we need? */ + wm9081->fs = params_rate(params); + wm9081->bclk = 2 * wm9081->fs; + switch (params_format(params)) { + case SNDRV_PCM_FORMAT_S16_LE: + wm9081->bclk *= 16; + break; + case SNDRV_PCM_FORMAT_S20_3LE: + wm9081->bclk *= 20; + aif2 |= 0x4; + break; + case SNDRV_PCM_FORMAT_S24_LE: + wm9081->bclk *= 24; + aif2 |= 0x8; + break; + case SNDRV_PCM_FORMAT_S32_LE: + wm9081->bclk *= 32; + aif2 |= 0xc; + break; + default: + return -EINVAL; + } + + if (aif1 & WM9081_AIFDAC_TDM_MODE_MASK) { + int slots = ((aif1 & WM9081_AIFDAC_TDM_MODE_MASK) >> + WM9081_AIFDAC_TDM_MODE_SHIFT) + 1; + wm9081->bclk *= slots; + } + + dev_dbg(codec->dev, "Target BCLK is %dHz\n", wm9081->bclk); + + ret = configure_clock(codec); + if (ret != 0) + return ret; + + /* Select nearest CLK_SYS_RATE */ + best = 0; + best_val = abs((wm9081->sysclk_rate / clk_sys_rates[0].ratio) + - wm9081->fs); + for (i = 1; i < ARRAY_SIZE(clk_sys_rates); i++) { + cur_val = abs((wm9081->sysclk_rate / + clk_sys_rates[i].ratio) - wm9081->fs);; + if (cur_val < best_val) { + best = i; + best_val = cur_val; + } + } + dev_dbg(codec->dev, "Selected CLK_SYS_RATIO of %d\n", + clk_sys_rates[best].ratio); + clk_ctrl2 |= (clk_sys_rates[best].clk_sys_rate + << WM9081_CLK_SYS_RATE_SHIFT); + + /* SAMPLE_RATE */ + best = 0; + best_val = abs(wm9081->fs - sample_rates[0].rate); + for (i = 1; i < ARRAY_SIZE(sample_rates); i++) { + /* Closest match */ + cur_val = abs(wm9081->fs - sample_rates[i].rate); + if (cur_val < best_val) { + best = i; + best_val = cur_val; + } + } + dev_dbg(codec->dev, "Selected SAMPLE_RATE of %dHz\n", + sample_rates[best].rate); + clk_ctrl2 |= (sample_rates[i].sample_rate << WM9081_SAMPLE_RATE_SHIFT); + + /* BCLK_DIV */ + best = 0; + best_val = INT_MAX; + for (i = 0; i < ARRAY_SIZE(bclk_divs); i++) { + cur_val = ((wm9081->sysclk_rate * 10) / bclk_divs[i].div) + - wm9081->bclk; + if (cur_val < 0) /* Table is sorted */ + break; + if (cur_val < best_val) { + best = i; + best_val = cur_val; + } + } + wm9081->bclk = (wm9081->sysclk_rate * 10) / bclk_divs[best].div; + dev_dbg(codec->dev, "Selected BCLK_DIV of %d for %dHz BCLK\n", + bclk_divs[best].div, wm9081->bclk); + aif3 |= bclk_divs[best].bclk_div; + + /* LRCLK is a simple fraction of BCLK */ + dev_dbg(codec->dev, "LRCLK_RATE is %d\n", wm9081->bclk / wm9081->fs); + aif4 |= wm9081->bclk / wm9081->fs; + + /* Apply a ReTune Mobile configuration if it's in use */ + if (wm9081->retune) { + struct wm9081_retune_mobile_config *retune = wm9081->retune; + struct wm9081_retune_mobile_setting *s; + int eq1; + + best = 0; + best_val = abs(retune->configs[0].rate - wm9081->fs); + for (i = 0; i < retune->num_configs; i++) { + cur_val = abs(retune->configs[i].rate - wm9081->fs); + if (cur_val < best_val) { + best_val = cur_val; + best = i; + } + } + s = &retune->configs[best]; + + dev_dbg(codec->dev, "ReTune Mobile %s tuned for %dHz\n", + s->name, s->rate); + + /* If the EQ is enabled then disable it while we write out */ + eq1 = wm9081_read(codec, WM9081_EQ_1) & WM9081_EQ_ENA; + if (eq1 & WM9081_EQ_ENA) + wm9081_write(codec, WM9081_EQ_1, 0); + + /* Write out the other values */ + for (i = 1; i < ARRAY_SIZE(s->config); i++) + wm9081_write(codec, WM9081_EQ_1 + i, s->config[i]); + + eq1 |= (s->config[0] & ~WM9081_EQ_ENA); + wm9081_write(codec, WM9081_EQ_1, eq1); + } + + wm9081_write(codec, WM9081_CLOCK_CONTROL_2, clk_ctrl2); + wm9081_write(codec, WM9081_AUDIO_INTERFACE_2, aif2); + wm9081_write(codec, WM9081_AUDIO_INTERFACE_3, aif3); + wm9081_write(codec, WM9081_AUDIO_INTERFACE_4, aif4); + + return 0; +} + +static int wm9081_digital_mute(struct snd_soc_dai *codec_dai, int mute) +{ + struct snd_soc_codec *codec = codec_dai->codec; + unsigned int reg; + + reg = wm9081_read(codec, WM9081_DAC_DIGITAL_2); + + if (mute) + reg |= WM9081_DAC_MUTE; + else + reg &= ~WM9081_DAC_MUTE; + + wm9081_write(codec, WM9081_DAC_DIGITAL_2, reg); + + return 0; +} + +static int wm9081_set_sysclk(struct snd_soc_dai *codec_dai, + int clk_id, unsigned int freq, int dir) +{ + struct snd_soc_codec *codec = codec_dai->codec; + struct wm9081_priv *wm9081 = codec->private_data; + + switch (clk_id) { + case WM9081_SYSCLK_MCLK: + case WM9081_SYSCLK_FLL_MCLK: + wm9081->sysclk_source = clk_id; + wm9081->mclk_rate = freq; + break; + + default: + return -EINVAL; + } + + return 0; +} + +static int wm9081_set_tdm_slot(struct snd_soc_dai *dai, + unsigned int mask, int slots) +{ + struct snd_soc_codec *codec = dai->codec; + unsigned int aif1 = wm9081_read(codec, WM9081_AUDIO_INTERFACE_1); + + aif1 &= ~(WM9081_AIFDAC_TDM_SLOT_MASK | WM9081_AIFDAC_TDM_MODE_MASK); + + if (slots < 1 || slots > 4) + return -EINVAL; + + aif1 |= (slots - 1) << WM9081_AIFDAC_TDM_MODE_SHIFT; + + switch (mask) { + case 1: + break; + case 2: + aif1 |= 0x10; + break; + case 4: + aif1 |= 0x20; + break; + case 8: + aif1 |= 0x30; + break; + default: + return -EINVAL; + } + + wm9081_write(codec, WM9081_AUDIO_INTERFACE_1, aif1); + + return 0; +} + +#define WM9081_RATES SNDRV_PCM_RATE_8000_96000 + +#define WM9081_FORMATS \ + (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S20_3LE | \ + SNDRV_PCM_FMTBIT_S24_LE | SNDRV_PCM_FMTBIT_S32_LE) + +static struct snd_soc_dai_ops wm9081_dai_ops = { + .hw_params = wm9081_hw_params, + .set_sysclk = wm9081_set_sysclk, + .set_fmt = wm9081_set_dai_fmt, + .digital_mute = wm9081_digital_mute, + .set_tdm_slot = wm9081_set_tdm_slot, +}; + +/* We report two channels because the CODEC processes a stereo signal, even + * though it is only capable of handling a mono output. + */ +struct snd_soc_dai wm9081_dai = { + .name = "WM9081", + .playback = { + .stream_name = "HiFi Playback", + .channels_min = 1, + .channels_max = 2, + .rates = WM9081_RATES, + .formats = WM9081_FORMATS, + }, + .ops = &wm9081_dai_ops, +}; +EXPORT_SYMBOL_GPL(wm9081_dai); + + +static struct snd_soc_codec *wm9081_codec; + +static int wm9081_probe(struct platform_device *pdev) +{ + struct snd_soc_device *socdev = platform_get_drvdata(pdev); + struct snd_soc_codec *codec; + struct wm9081_priv *wm9081; + int ret = 0; + + if (wm9081_codec == NULL) { + dev_err(&pdev->dev, "Codec device not registered\n"); + return -ENODEV; + } + + socdev->card->codec = wm9081_codec; + codec = wm9081_codec; + wm9081 = codec->private_data; + + /* register pcms */ + ret = snd_soc_new_pcms(socdev, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1); + if (ret < 0) { + dev_err(codec->dev, "failed to create pcms: %d\n", ret); + goto pcm_err; + } + + snd_soc_add_controls(codec, wm9081_snd_controls, + ARRAY_SIZE(wm9081_snd_controls)); + if (!wm9081->retune) { + dev_dbg(codec->dev, + "No ReTune Mobile data, using normal EQ\n"); + snd_soc_add_controls(codec, wm9081_eq_controls, + ARRAY_SIZE(wm9081_eq_controls)); + } + + snd_soc_dapm_new_controls(codec, wm9081_dapm_widgets, + ARRAY_SIZE(wm9081_dapm_widgets)); + snd_soc_dapm_add_routes(codec, audio_paths, ARRAY_SIZE(audio_paths)); + snd_soc_dapm_new_widgets(codec); + + ret = snd_soc_init_card(socdev); + if (ret < 0) { + dev_err(codec->dev, "failed to register card: %d\n", ret); + goto card_err; + } + + return ret; + +card_err: + snd_soc_free_pcms(socdev); + snd_soc_dapm_free(socdev); +pcm_err: + return ret; +} + +static int wm9081_remove(struct platform_device *pdev) +{ + struct snd_soc_device *socdev = platform_get_drvdata(pdev); + + snd_soc_free_pcms(socdev); + snd_soc_dapm_free(socdev); + + return 0; +} + +#ifdef CONFIG_PM +static int wm9081_suspend(struct platform_device *pdev, pm_message_t state) +{ + struct snd_soc_device *socdev = platform_get_drvdata(pdev); + struct snd_soc_codec *codec = socdev->card->codec; + + wm9081_set_bias_level(codec, SND_SOC_BIAS_OFF); + + return 0; +} + +static int wm9081_resume(struct platform_device *pdev) +{ + struct snd_soc_device *socdev = platform_get_drvdata(pdev); + struct snd_soc_codec *codec = socdev->card->codec; + u16 *reg_cache = codec->reg_cache; + int i; + + for (i = 0; i < codec->reg_cache_size; i++) { + if (i == WM9081_SOFTWARE_RESET) + continue; + + wm9081_write(codec, i, reg_cache[i]); + } + + wm9081_set_bias_level(codec, SND_SOC_BIAS_STANDBY); + + return 0; +} +#else +#define wm9081_suspend NULL +#define wm9081_resume NULL +#endif + +struct snd_soc_codec_device soc_codec_dev_wm9081 = { + .probe = wm9081_probe, + .remove = wm9081_remove, + .suspend = wm9081_suspend, + .resume = wm9081_resume, +}; +EXPORT_SYMBOL_GPL(soc_codec_dev_wm9081); + +static int wm9081_register(struct wm9081_priv *wm9081) +{ + struct snd_soc_codec *codec = &wm9081->codec; + int ret; + u16 reg; + + if (wm9081_codec) { + dev_err(codec->dev, "Another WM9081 is registered\n"); + ret = -EINVAL; + goto err; + } + + mutex_init(&codec->mutex); + INIT_LIST_HEAD(&codec->dapm_widgets); + INIT_LIST_HEAD(&codec->dapm_paths); + + codec->private_data = wm9081; + codec->name = "WM9081"; + codec->owner = THIS_MODULE; + codec->read = wm9081_read; + codec->write = wm9081_write; + codec->dai = &wm9081_dai; + codec->num_dai = 1; + codec->reg_cache_size = ARRAY_SIZE(wm9081->reg_cache); + codec->reg_cache = &wm9081->reg_cache; + codec->bias_level = SND_SOC_BIAS_OFF; + codec->set_bias_level = wm9081_set_bias_level; + + memcpy(codec->reg_cache, wm9081_reg_defaults, + sizeof(wm9081_reg_defaults)); + + reg = wm9081_read_hw(codec, WM9081_SOFTWARE_RESET); + if (reg != 0x9081) { + dev_err(codec->dev, "Device is not a WM9081: ID=0x%x\n", reg); + ret = -EINVAL; + goto err; + } + + ret = wm9081_reset(codec); + if (ret < 0) { + dev_err(codec->dev, "Failed to issue reset\n"); + return ret; + } + + wm9081_set_bias_level(codec, SND_SOC_BIAS_STANDBY); + + /* Enable zero cross by default */ + reg = wm9081_read(codec, WM9081_ANALOGUE_LINEOUT); + wm9081_write(codec, WM9081_ANALOGUE_LINEOUT, reg | WM9081_LINEOUTZC); + reg = wm9081_read(codec, WM9081_ANALOGUE_SPEAKER_PGA); + wm9081_write(codec, WM9081_ANALOGUE_SPEAKER_PGA, + reg | WM9081_SPKPGAZC); + + wm9081_dai.dev = codec->dev; + + wm9081_codec = codec; + + ret = snd_soc_register_codec(codec); + if (ret != 0) { + dev_err(codec->dev, "Failed to register codec: %d\n", ret); + return ret; + } + + ret = snd_soc_register_dai(&wm9081_dai); + if (ret != 0) { + dev_err(codec->dev, "Failed to register DAI: %d\n", ret); + snd_soc_unregister_codec(codec); + return ret; + } + + return 0; + +err: + kfree(wm9081); + return ret; +} + +static void wm9081_unregister(struct wm9081_priv *wm9081) +{ + wm9081_set_bias_level(&wm9081->codec, SND_SOC_BIAS_OFF); + snd_soc_unregister_dai(&wm9081_dai); + snd_soc_unregister_codec(&wm9081->codec); + kfree(wm9081); + wm9081_codec = NULL; +} + +static __devinit int wm9081_i2c_probe(struct i2c_client *i2c, + const struct i2c_device_id *id) +{ + struct wm9081_priv *wm9081; + struct snd_soc_codec *codec; + + wm9081 = kzalloc(sizeof(struct wm9081_priv), GFP_KERNEL); + if (wm9081 == NULL) + return -ENOMEM; + + codec = &wm9081->codec; + codec->hw_write = (hw_write_t)i2c_master_send; + wm9081->retune = i2c->dev.platform_data; + + i2c_set_clientdata(i2c, wm9081); + codec->control_data = i2c; + + codec->dev = &i2c->dev; + + return wm9081_register(wm9081); +} + +static __devexit int wm9081_i2c_remove(struct i2c_client *client) +{ + struct wm9081_priv *wm9081 = i2c_get_clientdata(client); + wm9081_unregister(wm9081); + return 0; +} + +static const struct i2c_device_id wm9081_i2c_id[] = { + { "wm9081", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, wm9081_i2c_id); + +static struct i2c_driver wm9081_i2c_driver = { + .driver = { + .name = "wm9081", + .owner = THIS_MODULE, + }, + .probe = wm9081_i2c_probe, + .remove = __devexit_p(wm9081_i2c_remove), + .id_table = wm9081_i2c_id, +}; + +static int __init wm9081_modinit(void) +{ + int ret; + + ret = i2c_add_driver(&wm9081_i2c_driver); + if (ret != 0) { + printk(KERN_ERR "Failed to register WM9081 I2C driver: %d\n", + ret); + } + + return ret; +} +module_init(wm9081_modinit); + +static void __exit wm9081_exit(void) +{ + i2c_del_driver(&wm9081_i2c_driver); +} +module_exit(wm9081_exit); + + +MODULE_DESCRIPTION("ASoC WM9081 driver"); +MODULE_AUTHOR("Mark Brown "); +MODULE_LICENSE("GPL"); diff --git a/sound/soc/codecs/wm9081.h b/sound/soc/codecs/wm9081.h new file mode 100644 index 00000000000..42d3bc75702 --- /dev/null +++ b/sound/soc/codecs/wm9081.h @@ -0,0 +1,787 @@ +#ifndef WM9081_H +#define WM9081_H + +/* + * wm9081.c -- WM9081 ALSA SoC Audio driver + * + * Author: Mark Brown + * + * Copyright 2009 Wolfson Microelectronics plc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include + +extern struct snd_soc_dai wm9081_dai; +extern struct snd_soc_codec_device soc_codec_dev_wm9081; + +/* + * SYSCLK sources + */ +#define WM9081_SYSCLK_MCLK 1 /* Use MCLK without FLL */ +#define WM9081_SYSCLK_FLL_MCLK 2 /* Use MCLK, enabling FLL if required */ + +/* + * Register values. + */ +#define WM9081_SOFTWARE_RESET 0x00 +#define WM9081_ANALOGUE_LINEOUT 0x02 +#define WM9081_ANALOGUE_SPEAKER_PGA 0x03 +#define WM9081_VMID_CONTROL 0x04 +#define WM9081_BIAS_CONTROL_1 0x05 +#define WM9081_ANALOGUE_MIXER 0x07 +#define WM9081_ANTI_POP_CONTROL 0x08 +#define WM9081_ANALOGUE_SPEAKER_1 0x09 +#define WM9081_ANALOGUE_SPEAKER_2 0x0A +#define WM9081_POWER_MANAGEMENT 0x0B +#define WM9081_CLOCK_CONTROL_1 0x0C +#define WM9081_CLOCK_CONTROL_2 0x0D +#define WM9081_CLOCK_CONTROL_3 0x0E +#define WM9081_FLL_CONTROL_1 0x10 +#define WM9081_FLL_CONTROL_2 0x11 +#define WM9081_FLL_CONTROL_3 0x12 +#define WM9081_FLL_CONTROL_4 0x13 +#define WM9081_FLL_CONTROL_5 0x14 +#define WM9081_AUDIO_INTERFACE_1 0x16 +#define WM9081_AUDIO_INTERFACE_2 0x17 +#define WM9081_AUDIO_INTERFACE_3 0x18 +#define WM9081_AUDIO_INTERFACE_4 0x19 +#define WM9081_INTERRUPT_STATUS 0x1A +#define WM9081_INTERRUPT_STATUS_MASK 0x1B +#define WM9081_INTERRUPT_POLARITY 0x1C +#define WM9081_INTERRUPT_CONTROL 0x1D +#define WM9081_DAC_DIGITAL_1 0x1E +#define WM9081_DAC_DIGITAL_2 0x1F +#define WM9081_DRC_1 0x20 +#define WM9081_DRC_2 0x21 +#define WM9081_DRC_3 0x22 +#define WM9081_DRC_4 0x23 +#define WM9081_WRITE_SEQUENCER_1 0x26 +#define WM9081_WRITE_SEQUENCER_2 0x27 +#define WM9081_MW_SLAVE_1 0x28 +#define WM9081_EQ_1 0x2A +#define WM9081_EQ_2 0x2B +#define WM9081_EQ_3 0x2C +#define WM9081_EQ_4 0x2D +#define WM9081_EQ_5 0x2E +#define WM9081_EQ_6 0x2F +#define WM9081_EQ_7 0x30 +#define WM9081_EQ_8 0x31 +#define WM9081_EQ_9 0x32 +#define WM9081_EQ_10 0x33 +#define WM9081_EQ_11 0x34 +#define WM9081_EQ_12 0x35 +#define WM9081_EQ_13 0x36 +#define WM9081_EQ_14 0x37 +#define WM9081_EQ_15 0x38 +#define WM9081_EQ_16 0x39 +#define WM9081_EQ_17 0x3A +#define WM9081_EQ_18 0x3B +#define WM9081_EQ_19 0x3C +#define WM9081_EQ_20 0x3D + +#define WM9081_REGISTER_COUNT 55 +#define WM9081_MAX_REGISTER 0x3D + +/* + * Field Definitions. + */ + +/* + * R0 (0x00) - Software Reset + */ +#define WM9081_SW_RST_DEV_ID1_MASK 0xFFFF /* SW_RST_DEV_ID1 - [15:0] */ +#define WM9081_SW_RST_DEV_ID1_SHIFT 0 /* SW_RST_DEV_ID1 - [15:0] */ +#define WM9081_SW_RST_DEV_ID1_WIDTH 16 /* SW_RST_DEV_ID1 - [15:0] */ + +/* + * R2 (0x02) - Analogue Lineout + */ +#define WM9081_LINEOUT_MUTE 0x0080 /* LINEOUT_MUTE */ +#define WM9081_LINEOUT_MUTE_MASK 0x0080 /* LINEOUT_MUTE */ +#define WM9081_LINEOUT_MUTE_SHIFT 7 /* LINEOUT_MUTE */ +#define WM9081_LINEOUT_MUTE_WIDTH 1 /* LINEOUT_MUTE */ +#define WM9081_LINEOUTZC 0x0040 /* LINEOUTZC */ +#define WM9081_LINEOUTZC_MASK 0x0040 /* LINEOUTZC */ +#define WM9081_LINEOUTZC_SHIFT 6 /* LINEOUTZC */ +#define WM9081_LINEOUTZC_WIDTH 1 /* LINEOUTZC */ +#define WM9081_LINEOUT_VOL_MASK 0x003F /* LINEOUT_VOL - [5:0] */ +#define WM9081_LINEOUT_VOL_SHIFT 0 /* LINEOUT_VOL - [5:0] */ +#define WM9081_LINEOUT_VOL_WIDTH 6 /* LINEOUT_VOL - [5:0] */ + +/* + * R3 (0x03) - Analogue Speaker PGA + */ +#define WM9081_SPKPGA_MUTE 0x0080 /* SPKPGA_MUTE */ +#define WM9081_SPKPGA_MUTE_MASK 0x0080 /* SPKPGA_MUTE */ +#define WM9081_SPKPGA_MUTE_SHIFT 7 /* SPKPGA_MUTE */ +#define WM9081_SPKPGA_MUTE_WIDTH 1 /* SPKPGA_MUTE */ +#define WM9081_SPKPGAZC 0x0040 /* SPKPGAZC */ +#define WM9081_SPKPGAZC_MASK 0x0040 /* SPKPGAZC */ +#define WM9081_SPKPGAZC_SHIFT 6 /* SPKPGAZC */ +#define WM9081_SPKPGAZC_WIDTH 1 /* SPKPGAZC */ +#define WM9081_SPKPGA_VOL_MASK 0x003F /* SPKPGA_VOL - [5:0] */ +#define WM9081_SPKPGA_VOL_SHIFT 0 /* SPKPGA_VOL - [5:0] */ +#define WM9081_SPKPGA_VOL_WIDTH 6 /* SPKPGA_VOL - [5:0] */ + +/* + * R4 (0x04) - VMID Control + */ +#define WM9081_VMID_BUF_ENA 0x0020 /* VMID_BUF_ENA */ +#define WM9081_VMID_BUF_ENA_MASK 0x0020 /* VMID_BUF_ENA */ +#define WM9081_VMID_BUF_ENA_SHIFT 5 /* VMID_BUF_ENA */ +#define WM9081_VMID_BUF_ENA_WIDTH 1 /* VMID_BUF_ENA */ +#define WM9081_VMID_RAMP 0x0008 /* VMID_RAMP */ +#define WM9081_VMID_RAMP_MASK 0x0008 /* VMID_RAMP */ +#define WM9081_VMID_RAMP_SHIFT 3 /* VMID_RAMP */ +#define WM9081_VMID_RAMP_WIDTH 1 /* VMID_RAMP */ +#define WM9081_VMID_SEL_MASK 0x0006 /* VMID_SEL - [2:1] */ +#define WM9081_VMID_SEL_SHIFT 1 /* VMID_SEL - [2:1] */ +#define WM9081_VMID_SEL_WIDTH 2 /* VMID_SEL - [2:1] */ +#define WM9081_VMID_FAST_ST 0x0001 /* VMID_FAST_ST */ +#define WM9081_VMID_FAST_ST_MASK 0x0001 /* VMID_FAST_ST */ +#define WM9081_VMID_FAST_ST_SHIFT 0 /* VMID_FAST_ST */ +#define WM9081_VMID_FAST_ST_WIDTH 1 /* VMID_FAST_ST */ + +/* + * R5 (0x05) - Bias Control 1 + */ +#define WM9081_BIAS_SRC 0x0040 /* BIAS_SRC */ +#define WM9081_BIAS_SRC_MASK 0x0040 /* BIAS_SRC */ +#define WM9081_BIAS_SRC_SHIFT 6 /* BIAS_SRC */ +#define WM9081_BIAS_SRC_WIDTH 1 /* BIAS_SRC */ +#define WM9081_STBY_BIAS_LVL 0x0020 /* STBY_BIAS_LVL */ +#define WM9081_STBY_BIAS_LVL_MASK 0x0020 /* STBY_BIAS_LVL */ +#define WM9081_STBY_BIAS_LVL_SHIFT 5 /* STBY_BIAS_LVL */ +#define WM9081_STBY_BIAS_LVL_WIDTH 1 /* STBY_BIAS_LVL */ +#define WM9081_STBY_BIAS_ENA 0x0010 /* STBY_BIAS_ENA */ +#define WM9081_STBY_BIAS_ENA_MASK 0x0010 /* STBY_BIAS_ENA */ +#define WM9081_STBY_BIAS_ENA_SHIFT 4 /* STBY_BIAS_ENA */ +#define WM9081_STBY_BIAS_ENA_WIDTH 1 /* STBY_BIAS_ENA */ +#define WM9081_BIAS_LVL_MASK 0x000C /* BIAS_LVL - [3:2] */ +#define WM9081_BIAS_LVL_SHIFT 2 /* BIAS_LVL - [3:2] */ +#define WM9081_BIAS_LVL_WIDTH 2 /* BIAS_LVL - [3:2] */ +#define WM9081_BIAS_ENA 0x0002 /* BIAS_ENA */ +#define WM9081_BIAS_ENA_MASK 0x0002 /* BIAS_ENA */ +#define WM9081_BIAS_ENA_SHIFT 1 /* BIAS_ENA */ +#define WM9081_BIAS_ENA_WIDTH 1 /* BIAS_ENA */ +#define WM9081_STARTUP_BIAS_ENA 0x0001 /* STARTUP_BIAS_ENA */ +#define WM9081_STARTUP_BIAS_ENA_MASK 0x0001 /* STARTUP_BIAS_ENA */ +#define WM9081_STARTUP_BIAS_ENA_SHIFT 0 /* STARTUP_BIAS_ENA */ +#define WM9081_STARTUP_BIAS_ENA_WIDTH 1 /* STARTUP_BIAS_ENA */ + +/* + * R7 (0x07) - Analogue Mixer + */ +#define WM9081_DAC_SEL 0x0010 /* DAC_SEL */ +#define WM9081_DAC_SEL_MASK 0x0010 /* DAC_SEL */ +#define WM9081_DAC_SEL_SHIFT 4 /* DAC_SEL */ +#define WM9081_DAC_SEL_WIDTH 1 /* DAC_SEL */ +#define WM9081_IN2_VOL 0x0008 /* IN2_VOL */ +#define WM9081_IN2_VOL_MASK 0x0008 /* IN2_VOL */ +#define WM9081_IN2_VOL_SHIFT 3 /* IN2_VOL */ +#define WM9081_IN2_VOL_WIDTH 1 /* IN2_VOL */ +#define WM9081_IN2_ENA 0x0004 /* IN2_ENA */ +#define WM9081_IN2_ENA_MASK 0x0004 /* IN2_ENA */ +#define WM9081_IN2_ENA_SHIFT 2 /* IN2_ENA */ +#define WM9081_IN2_ENA_WIDTH 1 /* IN2_ENA */ +#define WM9081_IN1_VOL 0x0002 /* IN1_VOL */ +#define WM9081_IN1_VOL_MASK 0x0002 /* IN1_VOL */ +#define WM9081_IN1_VOL_SHIFT 1 /* IN1_VOL */ +#define WM9081_IN1_VOL_WIDTH 1 /* IN1_VOL */ +#define WM9081_IN1_ENA 0x0001 /* IN1_ENA */ +#define WM9081_IN1_ENA_MASK 0x0001 /* IN1_ENA */ +#define WM9081_IN1_ENA_SHIFT 0 /* IN1_ENA */ +#define WM9081_IN1_ENA_WIDTH 1 /* IN1_ENA */ + +/* + * R8 (0x08) - Anti Pop Control + */ +#define WM9081_LINEOUT_DISCH 0x0004 /* LINEOUT_DISCH */ +#define WM9081_LINEOUT_DISCH_MASK 0x0004 /* LINEOUT_DISCH */ +#define WM9081_LINEOUT_DISCH_SHIFT 2 /* LINEOUT_DISCH */ +#define WM9081_LINEOUT_DISCH_WIDTH 1 /* LINEOUT_DISCH */ +#define WM9081_LINEOUT_VROI 0x0002 /* LINEOUT_VROI */ +#define WM9081_LINEOUT_VROI_MASK 0x0002 /* LINEOUT_VROI */ +#define WM9081_LINEOUT_VROI_SHIFT 1 /* LINEOUT_VROI */ +#define WM9081_LINEOUT_VROI_WIDTH 1 /* LINEOUT_VROI */ +#define WM9081_LINEOUT_CLAMP 0x0001 /* LINEOUT_CLAMP */ +#define WM9081_LINEOUT_CLAMP_MASK 0x0001 /* LINEOUT_CLAMP */ +#define WM9081_LINEOUT_CLAMP_SHIFT 0 /* LINEOUT_CLAMP */ +#define WM9081_LINEOUT_CLAMP_WIDTH 1 /* LINEOUT_CLAMP */ + +/* + * R9 (0x09) - Analogue Speaker 1 + */ +#define WM9081_SPK_DCGAIN_MASK 0x0038 /* SPK_DCGAIN - [5:3] */ +#define WM9081_SPK_DCGAIN_SHIFT 3 /* SPK_DCGAIN - [5:3] */ +#define WM9081_SPK_DCGAIN_WIDTH 3 /* SPK_DCGAIN - [5:3] */ +#define WM9081_SPK_ACGAIN_MASK 0x0007 /* SPK_ACGAIN - [2:0] */ +#define WM9081_SPK_ACGAIN_SHIFT 0 /* SPK_ACGAIN - [2:0] */ +#define WM9081_SPK_ACGAIN_WIDTH 3 /* SPK_ACGAIN - [2:0] */ + +/* + * R10 (0x0A) - Analogue Speaker 2 + */ +#define WM9081_SPK_MODE 0x0040 /* SPK_MODE */ +#define WM9081_SPK_MODE_MASK 0x0040 /* SPK_MODE */ +#define WM9081_SPK_MODE_SHIFT 6 /* SPK_MODE */ +#define WM9081_SPK_MODE_WIDTH 1 /* SPK_MODE */ +#define WM9081_SPK_INV_MUTE 0x0010 /* SPK_INV_MUTE */ +#define WM9081_SPK_INV_MUTE_MASK 0x0010 /* SPK_INV_MUTE */ +#define WM9081_SPK_INV_MUTE_SHIFT 4 /* SPK_INV_MUTE */ +#define WM9081_SPK_INV_MUTE_WIDTH 1 /* SPK_INV_MUTE */ +#define WM9081_OUT_SPK_CTRL 0x0008 /* OUT_SPK_CTRL */ +#define WM9081_OUT_SPK_CTRL_MASK 0x0008 /* OUT_SPK_CTRL */ +#define WM9081_OUT_SPK_CTRL_SHIFT 3 /* OUT_SPK_CTRL */ +#define WM9081_OUT_SPK_CTRL_WIDTH 1 /* OUT_SPK_CTRL */ + +/* + * R11 (0x0B) - Power Management + */ +#define WM9081_TSHUT_ENA 0x0100 /* TSHUT_ENA */ +#define WM9081_TSHUT_ENA_MASK 0x0100 /* TSHUT_ENA */ +#define WM9081_TSHUT_ENA_SHIFT 8 /* TSHUT_ENA */ +#define WM9081_TSHUT_ENA_WIDTH 1 /* TSHUT_ENA */ +#define WM9081_TSENSE_ENA 0x0080 /* TSENSE_ENA */ +#define WM9081_TSENSE_ENA_MASK 0x0080 /* TSENSE_ENA */ +#define WM9081_TSENSE_ENA_SHIFT 7 /* TSENSE_ENA */ +#define WM9081_TSENSE_ENA_WIDTH 1 /* TSENSE_ENA */ +#define WM9081_TEMP_SHUT 0x0040 /* TEMP_SHUT */ +#define WM9081_TEMP_SHUT_MASK 0x0040 /* TEMP_SHUT */ +#define WM9081_TEMP_SHUT_SHIFT 6 /* TEMP_SHUT */ +#define WM9081_TEMP_SHUT_WIDTH 1 /* TEMP_SHUT */ +#define WM9081_LINEOUT_ENA 0x0010 /* LINEOUT_ENA */ +#define WM9081_LINEOUT_ENA_MASK 0x0010 /* LINEOUT_ENA */ +#define WM9081_LINEOUT_ENA_SHIFT 4 /* LINEOUT_ENA */ +#define WM9081_LINEOUT_ENA_WIDTH 1 /* LINEOUT_ENA */ +#define WM9081_SPKPGA_ENA 0x0004 /* SPKPGA_ENA */ +#define WM9081_SPKPGA_ENA_MASK 0x0004 /* SPKPGA_ENA */ +#define WM9081_SPKPGA_ENA_SHIFT 2 /* SPKPGA_ENA */ +#define WM9081_SPKPGA_ENA_WIDTH 1 /* SPKPGA_ENA */ +#define WM9081_SPK_ENA 0x0002 /* SPK_ENA */ +#define WM9081_SPK_ENA_MASK 0x0002 /* SPK_ENA */ +#define WM9081_SPK_ENA_SHIFT 1 /* SPK_ENA */ +#define WM9081_SPK_ENA_WIDTH 1 /* SPK_ENA */ +#define WM9081_DAC_ENA 0x0001 /* DAC_ENA */ +#define WM9081_DAC_ENA_MASK 0x0001 /* DAC_ENA */ +#define WM9081_DAC_ENA_SHIFT 0 /* DAC_ENA */ +#define WM9081_DAC_ENA_WIDTH 1 /* DAC_ENA */ + +/* + * R12 (0x0C) - Clock Control 1 + */ +#define WM9081_CLK_OP_DIV_MASK 0x1C00 /* CLK_OP_DIV - [12:10] */ +#define WM9081_CLK_OP_DIV_SHIFT 10 /* CLK_OP_DIV - [12:10] */ +#define WM9081_CLK_OP_DIV_WIDTH 3 /* CLK_OP_DIV - [12:10] */ +#define WM9081_CLK_TO_DIV_MASK 0x0300 /* CLK_TO_DIV - [9:8] */ +#define WM9081_CLK_TO_DIV_SHIFT 8 /* CLK_TO_DIV - [9:8] */ +#define WM9081_CLK_TO_DIV_WIDTH 2 /* CLK_TO_DIV - [9:8] */ +#define WM9081_MCLKDIV2 0x0080 /* MCLKDIV2 */ +#define WM9081_MCLKDIV2_MASK 0x0080 /* MCLKDIV2 */ +#define WM9081_MCLKDIV2_SHIFT 7 /* MCLKDIV2 */ +#define WM9081_MCLKDIV2_WIDTH 1 /* MCLKDIV2 */ + +/* + * R13 (0x0D) - Clock Control 2 + */ +#define WM9081_CLK_SYS_RATE_MASK 0x00F0 /* CLK_SYS_RATE - [7:4] */ +#define WM9081_CLK_SYS_RATE_SHIFT 4 /* CLK_SYS_RATE - [7:4] */ +#define WM9081_CLK_SYS_RATE_WIDTH 4 /* CLK_SYS_RATE - [7:4] */ +#define WM9081_SAMPLE_RATE_MASK 0x000F /* SAMPLE_RATE - [3:0] */ +#define WM9081_SAMPLE_RATE_SHIFT 0 /* SAMPLE_RATE - [3:0] */ +#define WM9081_SAMPLE_RATE_WIDTH 4 /* SAMPLE_RATE - [3:0] */ + +/* + * R14 (0x0E) - Clock Control 3 + */ +#define WM9081_CLK_SRC_SEL 0x2000 /* CLK_SRC_SEL */ +#define WM9081_CLK_SRC_SEL_MASK 0x2000 /* CLK_SRC_SEL */ +#define WM9081_CLK_SRC_SEL_SHIFT 13 /* CLK_SRC_SEL */ +#define WM9081_CLK_SRC_SEL_WIDTH 1 /* CLK_SRC_SEL */ +#define WM9081_CLK_OP_ENA 0x0020 /* CLK_OP_ENA */ +#define WM9081_CLK_OP_ENA_MASK 0x0020 /* CLK_OP_ENA */ +#define WM9081_CLK_OP_ENA_SHIFT 5 /* CLK_OP_ENA */ +#define WM9081_CLK_OP_ENA_WIDTH 1 /* CLK_OP_ENA */ +#define WM9081_CLK_TO_ENA 0x0004 /* CLK_TO_ENA */ +#define WM9081_CLK_TO_ENA_MASK 0x0004 /* CLK_TO_ENA */ +#define WM9081_CLK_TO_ENA_SHIFT 2 /* CLK_TO_ENA */ +#define WM9081_CLK_TO_ENA_WIDTH 1 /* CLK_TO_ENA */ +#define WM9081_CLK_DSP_ENA 0x0002 /* CLK_DSP_ENA */ +#define WM9081_CLK_DSP_ENA_MASK 0x0002 /* CLK_DSP_ENA */ +#define WM9081_CLK_DSP_ENA_SHIFT 1 /* CLK_DSP_ENA */ +#define WM9081_CLK_DSP_ENA_WIDTH 1 /* CLK_DSP_ENA */ +#define WM9081_CLK_SYS_ENA 0x0001 /* CLK_SYS_ENA */ +#define WM9081_CLK_SYS_ENA_MASK 0x0001 /* CLK_SYS_ENA */ +#define WM9081_CLK_SYS_ENA_SHIFT 0 /* CLK_SYS_ENA */ +#define WM9081_CLK_SYS_ENA_WIDTH 1 /* CLK_SYS_ENA */ + +/* + * R16 (0x10) - FLL Control 1 + */ +#define WM9081_FLL_HOLD 0x0008 /* FLL_HOLD */ +#define WM9081_FLL_HOLD_MASK 0x0008 /* FLL_HOLD */ +#define WM9081_FLL_HOLD_SHIFT 3 /* FLL_HOLD */ +#define WM9081_FLL_HOLD_WIDTH 1 /* FLL_HOLD */ +#define WM9081_FLL_FRAC 0x0004 /* FLL_FRAC */ +#define WM9081_FLL_FRAC_MASK 0x0004 /* FLL_FRAC */ +#define WM9081_FLL_FRAC_SHIFT 2 /* FLL_FRAC */ +#define WM9081_FLL_FRAC_WIDTH 1 /* FLL_FRAC */ +#define WM9081_FLL_ENA 0x0001 /* FLL_ENA */ +#define WM9081_FLL_ENA_MASK 0x0001 /* FLL_ENA */ +#define WM9081_FLL_ENA_SHIFT 0 /* FLL_ENA */ +#define WM9081_FLL_ENA_WIDTH 1 /* FLL_ENA */ + +/* + * R17 (0x11) - FLL Control 2 + */ +#define WM9081_FLL_OUTDIV_MASK 0x0700 /* FLL_OUTDIV - [10:8] */ +#define WM9081_FLL_OUTDIV_SHIFT 8 /* FLL_OUTDIV - [10:8] */ +#define WM9081_FLL_OUTDIV_WIDTH 3 /* FLL_OUTDIV - [10:8] */ +#define WM9081_FLL_CTRL_RATE_MASK 0x0070 /* FLL_CTRL_RATE - [6:4] */ +#define WM9081_FLL_CTRL_RATE_SHIFT 4 /* FLL_CTRL_RATE - [6:4] */ +#define WM9081_FLL_CTRL_RATE_WIDTH 3 /* FLL_CTRL_RATE - [6:4] */ +#define WM9081_FLL_FRATIO_MASK 0x0007 /* FLL_FRATIO - [2:0] */ +#define WM9081_FLL_FRATIO_SHIFT 0 /* FLL_FRATIO - [2:0] */ +#define WM9081_FLL_FRATIO_WIDTH 3 /* FLL_FRATIO - [2:0] */ + +/* + * R18 (0x12) - FLL Control 3 + */ +#define WM9081_FLL_K_MASK 0xFFFF /* FLL_K - [15:0] */ +#define WM9081_FLL_K_SHIFT 0 /* FLL_K - [15:0] */ +#define WM9081_FLL_K_WIDTH 16 /* FLL_K - [15:0] */ + +/* + * R19 (0x13) - FLL Control 4 + */ +#define WM9081_FLL_N_MASK 0x7FE0 /* FLL_N - [14:5] */ +#define WM9081_FLL_N_SHIFT 5 /* FLL_N - [14:5] */ +#define WM9081_FLL_N_WIDTH 10 /* FLL_N - [14:5] */ +#define WM9081_FLL_GAIN_MASK 0x000F /* FLL_GAIN - [3:0] */ +#define WM9081_FLL_GAIN_SHIFT 0 /* FLL_GAIN - [3:0] */ +#define WM9081_FLL_GAIN_WIDTH 4 /* FLL_GAIN - [3:0] */ + +/* + * R20 (0x14) - FLL Control 5 + */ +#define WM9081_FLL_CLK_REF_DIV_MASK 0x0018 /* FLL_CLK_REF_DIV - [4:3] */ +#define WM9081_FLL_CLK_REF_DIV_SHIFT 3 /* FLL_CLK_REF_DIV - [4:3] */ +#define WM9081_FLL_CLK_REF_DIV_WIDTH 2 /* FLL_CLK_REF_DIV - [4:3] */ +#define WM9081_FLL_CLK_SRC_MASK 0x0003 /* FLL_CLK_SRC - [1:0] */ +#define WM9081_FLL_CLK_SRC_SHIFT 0 /* FLL_CLK_SRC - [1:0] */ +#define WM9081_FLL_CLK_SRC_WIDTH 2 /* FLL_CLK_SRC - [1:0] */ + +/* + * R22 (0x16) - Audio Interface 1 + */ +#define WM9081_AIFDAC_CHAN 0x0040 /* AIFDAC_CHAN */ +#define WM9081_AIFDAC_CHAN_MASK 0x0040 /* AIFDAC_CHAN */ +#define WM9081_AIFDAC_CHAN_SHIFT 6 /* AIFDAC_CHAN */ +#define WM9081_AIFDAC_CHAN_WIDTH 1 /* AIFDAC_CHAN */ +#define WM9081_AIFDAC_TDM_SLOT_MASK 0x0030 /* AIFDAC_TDM_SLOT - [5:4] */ +#define WM9081_AIFDAC_TDM_SLOT_SHIFT 4 /* AIFDAC_TDM_SLOT - [5:4] */ +#define WM9081_AIFDAC_TDM_SLOT_WIDTH 2 /* AIFDAC_TDM_SLOT - [5:4] */ +#define WM9081_AIFDAC_TDM_MODE_MASK 0x000C /* AIFDAC_TDM_MODE - [3:2] */ +#define WM9081_AIFDAC_TDM_MODE_SHIFT 2 /* AIFDAC_TDM_MODE - [3:2] */ +#define WM9081_AIFDAC_TDM_MODE_WIDTH 2 /* AIFDAC_TDM_MODE - [3:2] */ +#define WM9081_DAC_COMP 0x0002 /* DAC_COMP */ +#define WM9081_DAC_COMP_MASK 0x0002 /* DAC_COMP */ +#define WM9081_DAC_COMP_SHIFT 1 /* DAC_COMP */ +#define WM9081_DAC_COMP_WIDTH 1 /* DAC_COMP */ +#define WM9081_DAC_COMPMODE 0x0001 /* DAC_COMPMODE */ +#define WM9081_DAC_COMPMODE_MASK 0x0001 /* DAC_COMPMODE */ +#define WM9081_DAC_COMPMODE_SHIFT 0 /* DAC_COMPMODE */ +#define WM9081_DAC_COMPMODE_WIDTH 1 /* DAC_COMPMODE */ + +/* + * R23 (0x17) - Audio Interface 2 + */ +#define WM9081_AIF_TRIS 0x0200 /* AIF_TRIS */ +#define WM9081_AIF_TRIS_MASK 0x0200 /* AIF_TRIS */ +#define WM9081_AIF_TRIS_SHIFT 9 /* AIF_TRIS */ +#define WM9081_AIF_TRIS_WIDTH 1 /* AIF_TRIS */ +#define WM9081_DAC_DAT_INV 0x0100 /* DAC_DAT_INV */ +#define WM9081_DAC_DAT_INV_MASK 0x0100 /* DAC_DAT_INV */ +#define WM9081_DAC_DAT_INV_SHIFT 8 /* DAC_DAT_INV */ +#define WM9081_DAC_DAT_INV_WIDTH 1 /* DAC_DAT_INV */ +#define WM9081_AIF_BCLK_INV 0x0080 /* AIF_BCLK_INV */ +#define WM9081_AIF_BCLK_INV_MASK 0x0080 /* AIF_BCLK_INV */ +#define WM9081_AIF_BCLK_INV_SHIFT 7 /* AIF_BCLK_INV */ +#define WM9081_AIF_BCLK_INV_WIDTH 1 /* AIF_BCLK_INV */ +#define WM9081_BCLK_DIR 0x0040 /* BCLK_DIR */ +#define WM9081_BCLK_DIR_MASK 0x0040 /* BCLK_DIR */ +#define WM9081_BCLK_DIR_SHIFT 6 /* BCLK_DIR */ +#define WM9081_BCLK_DIR_WIDTH 1 /* BCLK_DIR */ +#define WM9081_LRCLK_DIR 0x0020 /* LRCLK_DIR */ +#define WM9081_LRCLK_DIR_MASK 0x0020 /* LRCLK_DIR */ +#define WM9081_LRCLK_DIR_SHIFT 5 /* LRCLK_DIR */ +#define WM9081_LRCLK_DIR_WIDTH 1 /* LRCLK_DIR */ +#define WM9081_AIF_LRCLK_INV 0x0010 /* AIF_LRCLK_INV */ +#define WM9081_AIF_LRCLK_INV_MASK 0x0010 /* AIF_LRCLK_INV */ +#define WM9081_AIF_LRCLK_INV_SHIFT 4 /* AIF_LRCLK_INV */ +#define WM9081_AIF_LRCLK_INV_WIDTH 1 /* AIF_LRCLK_INV */ +#define WM9081_AIF_WL_MASK 0x000C /* AIF_WL - [3:2] */ +#define WM9081_AIF_WL_SHIFT 2 /* AIF_WL - [3:2] */ +#define WM9081_AIF_WL_WIDTH 2 /* AIF_WL - [3:2] */ +#define WM9081_AIF_FMT_MASK 0x0003 /* AIF_FMT - [1:0] */ +#define WM9081_AIF_FMT_SHIFT 0 /* AIF_FMT - [1:0] */ +#define WM9081_AIF_FMT_WIDTH 2 /* AIF_FMT - [1:0] */ + +/* + * R24 (0x18) - Audio Interface 3 + */ +#define WM9081_BCLK_DIV_MASK 0x001F /* BCLK_DIV - [4:0] */ +#define WM9081_BCLK_DIV_SHIFT 0 /* BCLK_DIV - [4:0] */ +#define WM9081_BCLK_DIV_WIDTH 5 /* BCLK_DIV - [4:0] */ + +/* + * R25 (0x19) - Audio Interface 4 + */ +#define WM9081_LRCLK_RATE_MASK 0x07FF /* LRCLK_RATE - [10:0] */ +#define WM9081_LRCLK_RATE_SHIFT 0 /* LRCLK_RATE - [10:0] */ +#define WM9081_LRCLK_RATE_WIDTH 11 /* LRCLK_RATE - [10:0] */ + +/* + * R26 (0x1A) - Interrupt Status + */ +#define WM9081_WSEQ_BUSY_EINT 0x0004 /* WSEQ_BUSY_EINT */ +#define WM9081_WSEQ_BUSY_EINT_MASK 0x0004 /* WSEQ_BUSY_EINT */ +#define WM9081_WSEQ_BUSY_EINT_SHIFT 2 /* WSEQ_BUSY_EINT */ +#define WM9081_WSEQ_BUSY_EINT_WIDTH 1 /* WSEQ_BUSY_EINT */ +#define WM9081_TSHUT_EINT 0x0001 /* TSHUT_EINT */ +#define WM9081_TSHUT_EINT_MASK 0x0001 /* TSHUT_EINT */ +#define WM9081_TSHUT_EINT_SHIFT 0 /* TSHUT_EINT */ +#define WM9081_TSHUT_EINT_WIDTH 1 /* TSHUT_EINT */ + +/* + * R27 (0x1B) - Interrupt Status Mask + */ +#define WM9081_IM_WSEQ_BUSY_EINT 0x0004 /* IM_WSEQ_BUSY_EINT */ +#define WM9081_IM_WSEQ_BUSY_EINT_MASK 0x0004 /* IM_WSEQ_BUSY_EINT */ +#define WM9081_IM_WSEQ_BUSY_EINT_SHIFT 2 /* IM_WSEQ_BUSY_EINT */ +#define WM9081_IM_WSEQ_BUSY_EINT_WIDTH 1 /* IM_WSEQ_BUSY_EINT */ +#define WM9081_IM_TSHUT_EINT 0x0001 /* IM_TSHUT_EINT */ +#define WM9081_IM_TSHUT_EINT_MASK 0x0001 /* IM_TSHUT_EINT */ +#define WM9081_IM_TSHUT_EINT_SHIFT 0 /* IM_TSHUT_EINT */ +#define WM9081_IM_TSHUT_EINT_WIDTH 1 /* IM_TSHUT_EINT */ + +/* + * R28 (0x1C) - Interrupt Polarity + */ +#define WM9081_TSHUT_INV 0x0001 /* TSHUT_INV */ +#define WM9081_TSHUT_INV_MASK 0x0001 /* TSHUT_INV */ +#define WM9081_TSHUT_INV_SHIFT 0 /* TSHUT_INV */ +#define WM9081_TSHUT_INV_WIDTH 1 /* TSHUT_INV */ + +/* + * R29 (0x1D) - Interrupt Control + */ +#define WM9081_IRQ_POL 0x8000 /* IRQ_POL */ +#define WM9081_IRQ_POL_MASK 0x8000 /* IRQ_POL */ +#define WM9081_IRQ_POL_SHIFT 15 /* IRQ_POL */ +#define WM9081_IRQ_POL_WIDTH 1 /* IRQ_POL */ +#define WM9081_IRQ_OP_CTRL 0x0001 /* IRQ_OP_CTRL */ +#define WM9081_IRQ_OP_CTRL_MASK 0x0001 /* IRQ_OP_CTRL */ +#define WM9081_IRQ_OP_CTRL_SHIFT 0 /* IRQ_OP_CTRL */ +#define WM9081_IRQ_OP_CTRL_WIDTH 1 /* IRQ_OP_CTRL */ + +/* + * R30 (0x1E) - DAC Digital 1 + */ +#define WM9081_DAC_VOL_MASK 0x00FF /* DAC_VOL - [7:0] */ +#define WM9081_DAC_VOL_SHIFT 0 /* DAC_VOL - [7:0] */ +#define WM9081_DAC_VOL_WIDTH 8 /* DAC_VOL - [7:0] */ + +/* + * R31 (0x1F) - DAC Digital 2 + */ +#define WM9081_DAC_MUTERATE 0x0400 /* DAC_MUTERATE */ +#define WM9081_DAC_MUTERATE_MASK 0x0400 /* DAC_MUTERATE */ +#define WM9081_DAC_MUTERATE_SHIFT 10 /* DAC_MUTERATE */ +#define WM9081_DAC_MUTERATE_WIDTH 1 /* DAC_MUTERATE */ +#define WM9081_DAC_MUTEMODE 0x0200 /* DAC_MUTEMODE */ +#define WM9081_DAC_MUTEMODE_MASK 0x0200 /* DAC_MUTEMODE */ +#define WM9081_DAC_MUTEMODE_SHIFT 9 /* DAC_MUTEMODE */ +#define WM9081_DAC_MUTEMODE_WIDTH 1 /* DAC_MUTEMODE */ +#define WM9081_DAC_MUTE 0x0008 /* DAC_MUTE */ +#define WM9081_DAC_MUTE_MASK 0x0008 /* DAC_MUTE */ +#define WM9081_DAC_MUTE_SHIFT 3 /* DAC_MUTE */ +#define WM9081_DAC_MUTE_WIDTH 1 /* DAC_MUTE */ +#define WM9081_DEEMPH_MASK 0x0006 /* DEEMPH - [2:1] */ +#define WM9081_DEEMPH_SHIFT 1 /* DEEMPH - [2:1] */ +#define WM9081_DEEMPH_WIDTH 2 /* DEEMPH - [2:1] */ + +/* + * R32 (0x20) - DRC 1 + */ +#define WM9081_DRC_ENA 0x8000 /* DRC_ENA */ +#define WM9081_DRC_ENA_MASK 0x8000 /* DRC_ENA */ +#define WM9081_DRC_ENA_SHIFT 15 /* DRC_ENA */ +#define WM9081_DRC_ENA_WIDTH 1 /* DRC_ENA */ +#define WM9081_DRC_STARTUP_GAIN_MASK 0x07C0 /* DRC_STARTUP_GAIN - [10:6] */ +#define WM9081_DRC_STARTUP_GAIN_SHIFT 6 /* DRC_STARTUP_GAIN - [10:6] */ +#define WM9081_DRC_STARTUP_GAIN_WIDTH 5 /* DRC_STARTUP_GAIN - [10:6] */ +#define WM9081_DRC_FF_DLY 0x0020 /* DRC_FF_DLY */ +#define WM9081_DRC_FF_DLY_MASK 0x0020 /* DRC_FF_DLY */ +#define WM9081_DRC_FF_DLY_SHIFT 5 /* DRC_FF_DLY */ +#define WM9081_DRC_FF_DLY_WIDTH 1 /* DRC_FF_DLY */ +#define WM9081_DRC_QR 0x0004 /* DRC_QR */ +#define WM9081_DRC_QR_MASK 0x0004 /* DRC_QR */ +#define WM9081_DRC_QR_SHIFT 2 /* DRC_QR */ +#define WM9081_DRC_QR_WIDTH 1 /* DRC_QR */ +#define WM9081_DRC_ANTICLIP 0x0002 /* DRC_ANTICLIP */ +#define WM9081_DRC_ANTICLIP_MASK 0x0002 /* DRC_ANTICLIP */ +#define WM9081_DRC_ANTICLIP_SHIFT 1 /* DRC_ANTICLIP */ +#define WM9081_DRC_ANTICLIP_WIDTH 1 /* DRC_ANTICLIP */ + +/* + * R33 (0x21) - DRC 2 + */ +#define WM9081_DRC_ATK_MASK 0xF000 /* DRC_ATK - [15:12] */ +#define WM9081_DRC_ATK_SHIFT 12 /* DRC_ATK - [15:12] */ +#define WM9081_DRC_ATK_WIDTH 4 /* DRC_ATK - [15:12] */ +#define WM9081_DRC_DCY_MASK 0x0F00 /* DRC_DCY - [11:8] */ +#define WM9081_DRC_DCY_SHIFT 8 /* DRC_DCY - [11:8] */ +#define WM9081_DRC_DCY_WIDTH 4 /* DRC_DCY - [11:8] */ +#define WM9081_DRC_QR_THR_MASK 0x00C0 /* DRC_QR_THR - [7:6] */ +#define WM9081_DRC_QR_THR_SHIFT 6 /* DRC_QR_THR - [7:6] */ +#define WM9081_DRC_QR_THR_WIDTH 2 /* DRC_QR_THR - [7:6] */ +#define WM9081_DRC_QR_DCY_MASK 0x0030 /* DRC_QR_DCY - [5:4] */ +#define WM9081_DRC_QR_DCY_SHIFT 4 /* DRC_QR_DCY - [5:4] */ +#define WM9081_DRC_QR_DCY_WIDTH 2 /* DRC_QR_DCY - [5:4] */ +#define WM9081_DRC_MINGAIN_MASK 0x000C /* DRC_MINGAIN - [3:2] */ +#define WM9081_DRC_MINGAIN_SHIFT 2 /* DRC_MINGAIN - [3:2] */ +#define WM9081_DRC_MINGAIN_WIDTH 2 /* DRC_MINGAIN - [3:2] */ +#define WM9081_DRC_MAXGAIN_MASK 0x0003 /* DRC_MAXGAIN - [1:0] */ +#define WM9081_DRC_MAXGAIN_SHIFT 0 /* DRC_MAXGAIN - [1:0] */ +#define WM9081_DRC_MAXGAIN_WIDTH 2 /* DRC_MAXGAIN - [1:0] */ + +/* + * R34 (0x22) - DRC 3 + */ +#define WM9081_DRC_HI_COMP_MASK 0x0038 /* DRC_HI_COMP - [5:3] */ +#define WM9081_DRC_HI_COMP_SHIFT 3 /* DRC_HI_COMP - [5:3] */ +#define WM9081_DRC_HI_COMP_WIDTH 3 /* DRC_HI_COMP - [5:3] */ +#define WM9081_DRC_LO_COMP_MASK 0x0007 /* DRC_LO_COMP - [2:0] */ +#define WM9081_DRC_LO_COMP_SHIFT 0 /* DRC_LO_COMP - [2:0] */ +#define WM9081_DRC_LO_COMP_WIDTH 3 /* DRC_LO_COMP - [2:0] */ + +/* + * R35 (0x23) - DRC 4 + */ +#define WM9081_DRC_KNEE_IP_MASK 0x07E0 /* DRC_KNEE_IP - [10:5] */ +#define WM9081_DRC_KNEE_IP_SHIFT 5 /* DRC_KNEE_IP - [10:5] */ +#define WM9081_DRC_KNEE_IP_WIDTH 6 /* DRC_KNEE_IP - [10:5] */ +#define WM9081_DRC_KNEE_OP_MASK 0x001F /* DRC_KNEE_OP - [4:0] */ +#define WM9081_DRC_KNEE_OP_SHIFT 0 /* DRC_KNEE_OP - [4:0] */ +#define WM9081_DRC_KNEE_OP_WIDTH 5 /* DRC_KNEE_OP - [4:0] */ + +/* + * R38 (0x26) - Write Sequencer 1 + */ +#define WM9081_WSEQ_ENA 0x8000 /* WSEQ_ENA */ +#define WM9081_WSEQ_ENA_MASK 0x8000 /* WSEQ_ENA */ +#define WM9081_WSEQ_ENA_SHIFT 15 /* WSEQ_ENA */ +#define WM9081_WSEQ_ENA_WIDTH 1 /* WSEQ_ENA */ +#define WM9081_WSEQ_ABORT 0x0200 /* WSEQ_ABORT */ +#define WM9081_WSEQ_ABORT_MASK 0x0200 /* WSEQ_ABORT */ +#define WM9081_WSEQ_ABORT_SHIFT 9 /* WSEQ_ABORT */ +#define WM9081_WSEQ_ABORT_WIDTH 1 /* WSEQ_ABORT */ +#define WM9081_WSEQ_START 0x0100 /* WSEQ_START */ +#define WM9081_WSEQ_START_MASK 0x0100 /* WSEQ_START */ +#define WM9081_WSEQ_START_SHIFT 8 /* WSEQ_START */ +#define WM9081_WSEQ_START_WIDTH 1 /* WSEQ_START */ +#define WM9081_WSEQ_START_INDEX_MASK 0x007F /* WSEQ_START_INDEX - [6:0] */ +#define WM9081_WSEQ_START_INDEX_SHIFT 0 /* WSEQ_START_INDEX - [6:0] */ +#define WM9081_WSEQ_START_INDEX_WIDTH 7 /* WSEQ_START_INDEX - [6:0] */ + +/* + * R39 (0x27) - Write Sequencer 2 + */ +#define WM9081_WSEQ_CURRENT_INDEX_MASK 0x07F0 /* WSEQ_CURRENT_INDEX - [10:4] */ +#define WM9081_WSEQ_CURRENT_INDEX_SHIFT 4 /* WSEQ_CURRENT_INDEX - [10:4] */ +#define WM9081_WSEQ_CURRENT_INDEX_WIDTH 7 /* WSEQ_CURRENT_INDEX - [10:4] */ +#define WM9081_WSEQ_BUSY 0x0001 /* WSEQ_BUSY */ +#define WM9081_WSEQ_BUSY_MASK 0x0001 /* WSEQ_BUSY */ +#define WM9081_WSEQ_BUSY_SHIFT 0 /* WSEQ_BUSY */ +#define WM9081_WSEQ_BUSY_WIDTH 1 /* WSEQ_BUSY */ + +/* + * R40 (0x28) - MW Slave 1 + */ +#define WM9081_SPI_CFG 0x0020 /* SPI_CFG */ +#define WM9081_SPI_CFG_MASK 0x0020 /* SPI_CFG */ +#define WM9081_SPI_CFG_SHIFT 5 /* SPI_CFG */ +#define WM9081_SPI_CFG_WIDTH 1 /* SPI_CFG */ +#define WM9081_SPI_4WIRE 0x0010 /* SPI_4WIRE */ +#define WM9081_SPI_4WIRE_MASK 0x0010 /* SPI_4WIRE */ +#define WM9081_SPI_4WIRE_SHIFT 4 /* SPI_4WIRE */ +#define WM9081_SPI_4WIRE_WIDTH 1 /* SPI_4WIRE */ +#define WM9081_ARA_ENA 0x0008 /* ARA_ENA */ +#define WM9081_ARA_ENA_MASK 0x0008 /* ARA_ENA */ +#define WM9081_ARA_ENA_SHIFT 3 /* ARA_ENA */ +#define WM9081_ARA_ENA_WIDTH 1 /* ARA_ENA */ +#define WM9081_AUTO_INC 0x0002 /* AUTO_INC */ +#define WM9081_AUTO_INC_MASK 0x0002 /* AUTO_INC */ +#define WM9081_AUTO_INC_SHIFT 1 /* AUTO_INC */ +#define WM9081_AUTO_INC_WIDTH 1 /* AUTO_INC */ + +/* + * R42 (0x2A) - EQ 1 + */ +#define WM9081_EQ_B1_GAIN_MASK 0xF800 /* EQ_B1_GAIN - [15:11] */ +#define WM9081_EQ_B1_GAIN_SHIFT 11 /* EQ_B1_GAIN - [15:11] */ +#define WM9081_EQ_B1_GAIN_WIDTH 5 /* EQ_B1_GAIN - [15:11] */ +#define WM9081_EQ_B2_GAIN_MASK 0x07C0 /* EQ_B2_GAIN - [10:6] */ +#define WM9081_EQ_B2_GAIN_SHIFT 6 /* EQ_B2_GAIN - [10:6] */ +#define WM9081_EQ_B2_GAIN_WIDTH 5 /* EQ_B2_GAIN - [10:6] */ +#define WM9081_EQ_B4_GAIN_MASK 0x003E /* EQ_B4_GAIN - [5:1] */ +#define WM9081_EQ_B4_GAIN_SHIFT 1 /* EQ_B4_GAIN - [5:1] */ +#define WM9081_EQ_B4_GAIN_WIDTH 5 /* EQ_B4_GAIN - [5:1] */ +#define WM9081_EQ_ENA 0x0001 /* EQ_ENA */ +#define WM9081_EQ_ENA_MASK 0x0001 /* EQ_ENA */ +#define WM9081_EQ_ENA_SHIFT 0 /* EQ_ENA */ +#define WM9081_EQ_ENA_WIDTH 1 /* EQ_ENA */ + +/* + * R43 (0x2B) - EQ 2 + */ +#define WM9081_EQ_B3_GAIN_MASK 0xF800 /* EQ_B3_GAIN - [15:11] */ +#define WM9081_EQ_B3_GAIN_SHIFT 11 /* EQ_B3_GAIN - [15:11] */ +#define WM9081_EQ_B3_GAIN_WIDTH 5 /* EQ_B3_GAIN - [15:11] */ +#define WM9081_EQ_B5_GAIN_MASK 0x07C0 /* EQ_B5_GAIN - [10:6] */ +#define WM9081_EQ_B5_GAIN_SHIFT 6 /* EQ_B5_GAIN - [10:6] */ +#define WM9081_EQ_B5_GAIN_WIDTH 5 /* EQ_B5_GAIN - [10:6] */ + +/* + * R44 (0x2C) - EQ 3 + */ +#define WM9081_EQ_B1_A_MASK 0xFFFF /* EQ_B1_A - [15:0] */ +#define WM9081_EQ_B1_A_SHIFT 0 /* EQ_B1_A - [15:0] */ +#define WM9081_EQ_B1_A_WIDTH 16 /* EQ_B1_A - [15:0] */ + +/* + * R45 (0x2D) - EQ 4 + */ +#define WM9081_EQ_B1_B_MASK 0xFFFF /* EQ_B1_B - [15:0] */ +#define WM9081_EQ_B1_B_SHIFT 0 /* EQ_B1_B - [15:0] */ +#define WM9081_EQ_B1_B_WIDTH 16 /* EQ_B1_B - [15:0] */ + +/* + * R46 (0x2E) - EQ 5 + */ +#define WM9081_EQ_B1_PG_MASK 0xFFFF /* EQ_B1_PG - [15:0] */ +#define WM9081_EQ_B1_PG_SHIFT 0 /* EQ_B1_PG - [15:0] */ +#define WM9081_EQ_B1_PG_WIDTH 16 /* EQ_B1_PG - [15:0] */ + +/* + * R47 (0x2F) - EQ 6 + */ +#define WM9081_EQ_B2_A_MASK 0xFFFF /* EQ_B2_A - [15:0] */ +#define WM9081_EQ_B2_A_SHIFT 0 /* EQ_B2_A - [15:0] */ +#define WM9081_EQ_B2_A_WIDTH 16 /* EQ_B2_A - [15:0] */ + +/* + * R48 (0x30) - EQ 7 + */ +#define WM9081_EQ_B2_B_MASK 0xFFFF /* EQ_B2_B - [15:0] */ +#define WM9081_EQ_B2_B_SHIFT 0 /* EQ_B2_B - [15:0] */ +#define WM9081_EQ_B2_B_WIDTH 16 /* EQ_B2_B - [15:0] */ + +/* + * R49 (0x31) - EQ 8 + */ +#define WM9081_EQ_B2_C_MASK 0xFFFF /* EQ_B2_C - [15:0] */ +#define WM9081_EQ_B2_C_SHIFT 0 /* EQ_B2_C - [15:0] */ +#define WM9081_EQ_B2_C_WIDTH 16 /* EQ_B2_C - [15:0] */ + +/* + * R50 (0x32) - EQ 9 + */ +#define WM9081_EQ_B2_PG_MASK 0xFFFF /* EQ_B2_PG - [15:0] */ +#define WM9081_EQ_B2_PG_SHIFT 0 /* EQ_B2_PG - [15:0] */ +#define WM9081_EQ_B2_PG_WIDTH 16 /* EQ_B2_PG - [15:0] */ + +/* + * R51 (0x33) - EQ 10 + */ +#define WM9081_EQ_B4_A_MASK 0xFFFF /* EQ_B4_A - [15:0] */ +#define WM9081_EQ_B4_A_SHIFT 0 /* EQ_B4_A - [15:0] */ +#define WM9081_EQ_B4_A_WIDTH 16 /* EQ_B4_A - [15:0] */ + +/* + * R52 (0x34) - EQ 11 + */ +#define WM9081_EQ_B4_B_MASK 0xFFFF /* EQ_B4_B - [15:0] */ +#define WM9081_EQ_B4_B_SHIFT 0 /* EQ_B4_B - [15:0] */ +#define WM9081_EQ_B4_B_WIDTH 16 /* EQ_B4_B - [15:0] */ + +/* + * R53 (0x35) - EQ 12 + */ +#define WM9081_EQ_B4_C_MASK 0xFFFF /* EQ_B4_C - [15:0] */ +#define WM9081_EQ_B4_C_SHIFT 0 /* EQ_B4_C - [15:0] */ +#define WM9081_EQ_B4_C_WIDTH 16 /* EQ_B4_C - [15:0] */ + +/* + * R54 (0x36) - EQ 13 + */ +#define WM9081_EQ_B4_PG_MASK 0xFFFF /* EQ_B4_PG - [15:0] */ +#define WM9081_EQ_B4_PG_SHIFT 0 /* EQ_B4_PG - [15:0] */ +#define WM9081_EQ_B4_PG_WIDTH 16 /* EQ_B4_PG - [15:0] */ + +/* + * R55 (0x37) - EQ 14 + */ +#define WM9081_EQ_B3_A_MASK 0xFFFF /* EQ_B3_A - [15:0] */ +#define WM9081_EQ_B3_A_SHIFT 0 /* EQ_B3_A - [15:0] */ +#define WM9081_EQ_B3_A_WIDTH 16 /* EQ_B3_A - [15:0] */ + +/* + * R56 (0x38) - EQ 15 + */ +#define WM9081_EQ_B3_B_MASK 0xFFFF /* EQ_B3_B - [15:0] */ +#define WM9081_EQ_B3_B_SHIFT 0 /* EQ_B3_B - [15:0] */ +#define WM9081_EQ_B3_B_WIDTH 16 /* EQ_B3_B - [15:0] */ + +/* + * R57 (0x39) - EQ 16 + */ +#define WM9081_EQ_B3_C_MASK 0xFFFF /* EQ_B3_C - [15:0] */ +#define WM9081_EQ_B3_C_SHIFT 0 /* EQ_B3_C - [15:0] */ +#define WM9081_EQ_B3_C_WIDTH 16 /* EQ_B3_C - [15:0] */ + +/* + * R58 (0x3A) - EQ 17 + */ +#define WM9081_EQ_B3_PG_MASK 0xFFFF /* EQ_B3_PG - [15:0] */ +#define WM9081_EQ_B3_PG_SHIFT 0 /* EQ_B3_PG - [15:0] */ +#define WM9081_EQ_B3_PG_WIDTH 16 /* EQ_B3_PG - [15:0] */ + +/* + * R59 (0x3B) - EQ 18 + */ +#define WM9081_EQ_B5_A_MASK 0xFFFF /* EQ_B5_A - [15:0] */ +#define WM9081_EQ_B5_A_SHIFT 0 /* EQ_B5_A - [15:0] */ +#define WM9081_EQ_B5_A_WIDTH 16 /* EQ_B5_A - [15:0] */ + +/* + * R60 (0x3C) - EQ 19 + */ +#define WM9081_EQ_B5_B_MASK 0xFFFF /* EQ_B5_B - [15:0] */ +#define WM9081_EQ_B5_B_SHIFT 0 /* EQ_B5_B - [15:0] */ +#define WM9081_EQ_B5_B_WIDTH 16 /* EQ_B5_B - [15:0] */ + +/* + * R61 (0x3D) - EQ 20 + */ +#define WM9081_EQ_B5_PG_MASK 0xFFFF /* EQ_B5_PG - [15:0] */ +#define WM9081_EQ_B5_PG_SHIFT 0 /* EQ_B5_PG - [15:0] */ +#define WM9081_EQ_B5_PG_WIDTH 16 /* EQ_B5_PG - [15:0] */ + + +#endif -- cgit v1.2.3-70-g09d2 From e31a16d6f64ef0e324c6f54d5112703c3f13a9c4 Mon Sep 17 00:00:00 2001 From: Zhu Yi Date: Thu, 21 May 2009 21:47:03 +0800 Subject: wireless: move some utility functions from mac80211 to cfg80211 The patch moves some utility functions from mac80211 to cfg80211. Because these functions are doing generic 802.11 operations so they are not mac80211 specific. The moving allows some fullmac drivers to be also benefit from these utility functions. Signed-off-by: Zhu Yi Signed-off-by: Samuel Ortiz Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ar9170/main.c | 2 +- drivers/net/wireless/ath/ath5k/pcu.c | 4 +- drivers/net/wireless/ath/ath9k/hw.c | 8 +- drivers/net/wireless/b43/main.c | 2 +- drivers/net/wireless/rt2x00/rt2x00crypto.c | 2 +- include/linux/ieee80211.h | 9 + include/net/cfg80211.h | 47 +++++ include/net/mac80211.h | 28 --- net/mac80211/ieee80211_i.h | 2 - net/mac80211/mesh.h | 4 - net/mac80211/rx.c | 89 +-------- net/mac80211/util.c | 73 ------- net/mac80211/wme.c | 30 +-- net/wireless/util.c | 305 +++++++++++++++++++++++++++++ 14 files changed, 375 insertions(+), 230 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/ath/ar9170/main.c b/drivers/net/wireless/ath/ar9170/main.c index 4ef1d2fc859..99df9ddae9c 100644 --- a/drivers/net/wireless/ath/ar9170/main.c +++ b/drivers/net/wireless/ath/ar9170/main.c @@ -1555,7 +1555,7 @@ static int ar9170_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, switch (key->alg) { case ALG_WEP: - if (key->keylen == LEN_WEP40) + if (key->keylen == WLAN_KEY_LEN_WEP40) ktype = AR9170_ENC_ALG_WEP64; else ktype = AR9170_ENC_ALG_WEP128; diff --git a/drivers/net/wireless/ath/ath5k/pcu.c b/drivers/net/wireless/ath/ath5k/pcu.c index 579aa0a96ab..ec35503f6a4 100644 --- a/drivers/net/wireless/ath/ath5k/pcu.c +++ b/drivers/net/wireless/ath/ath5k/pcu.c @@ -1038,9 +1038,9 @@ int ath5k_keycache_type(const struct ieee80211_key_conf *key) case ALG_CCMP: return AR5K_KEYTABLE_TYPE_CCM; case ALG_WEP: - if (key->keylen == LEN_WEP40) + if (key->keylen == WLAN_KEY_LEN_WEP40) return AR5K_KEYTABLE_TYPE_40; - else if (key->keylen == LEN_WEP104) + else if (key->keylen == WLAN_KEY_LEN_WEP104) return AR5K_KEYTABLE_TYPE_104; return -EINVAL; default: diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c index 4acfab51491..1579c9407ed 100644 --- a/drivers/net/wireless/ath/ath9k/hw.c +++ b/drivers/net/wireless/ath/ath9k/hw.c @@ -2472,14 +2472,14 @@ bool ath9k_hw_set_keycache_entry(struct ath_hw *ah, u16 entry, } break; case ATH9K_CIPHER_WEP: - if (k->kv_len < LEN_WEP40) { + if (k->kv_len < WLAN_KEY_LEN_WEP40) { DPRINTF(ah->ah_sc, ATH_DBG_ANY, "WEP key length %u too small\n", k->kv_len); return false; } - if (k->kv_len <= LEN_WEP40) + if (k->kv_len <= WLAN_KEY_LEN_WEP40) keyType = AR_KEYTABLE_TYPE_40; - else if (k->kv_len <= LEN_WEP104) + else if (k->kv_len <= WLAN_KEY_LEN_WEP104) keyType = AR_KEYTABLE_TYPE_104; else keyType = AR_KEYTABLE_TYPE_128; @@ -2498,7 +2498,7 @@ bool ath9k_hw_set_keycache_entry(struct ath_hw *ah, u16 entry, key2 = get_unaligned_le32(k->kv_val + 6); key3 = get_unaligned_le16(k->kv_val + 10); key4 = get_unaligned_le32(k->kv_val + 12); - if (k->kv_len <= LEN_WEP104) + if (k->kv_len <= WLAN_KEY_LEN_WEP104) key4 &= 0xff; /* diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index ec8516eadc4..cb4a8712946 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -3637,7 +3637,7 @@ static int b43_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, err = -EINVAL; switch (key->alg) { case ALG_WEP: - if (key->keylen == LEN_WEP40) + if (key->keylen == WLAN_KEY_LEN_WEP40) algorithm = B43_SEC_ALGO_WEP40; else algorithm = B43_SEC_ALGO_WEP104; diff --git a/drivers/net/wireless/rt2x00/rt2x00crypto.c b/drivers/net/wireless/rt2x00/rt2x00crypto.c index 57ab42cfed3..bc4e81e2184 100644 --- a/drivers/net/wireless/rt2x00/rt2x00crypto.c +++ b/drivers/net/wireless/rt2x00/rt2x00crypto.c @@ -33,7 +33,7 @@ enum cipher rt2x00crypto_key_to_cipher(struct ieee80211_key_conf *key) { switch (key->alg) { case ALG_WEP: - if (key->keylen == LEN_WEP40) + if (key->keylen == WLAN_KEY_LEN_WEP40) return CIPHER_WEP64; else return CIPHER_WEP128; diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 05c29c01174..34de8b21f6d 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -493,6 +493,7 @@ struct ieee80211s_hdr { /* Mesh flags */ #define MESH_FLAGS_AE_A4 0x1 #define MESH_FLAGS_AE_A5_A6 0x2 +#define MESH_FLAGS_AE 0x3 #define MESH_FLAGS_PS_DEEP 0x4 /** @@ -1085,6 +1086,14 @@ enum ieee80211_spectrum_mgmt_actioncode { WLAN_ACTION_SPCT_CHL_SWITCH = 4, }; +/* Security key length */ +enum ieee80211_key_len { + WLAN_KEY_LEN_WEP40 = 5, + WLAN_KEY_LEN_WEP104 = 13, + WLAN_KEY_LEN_CCMP = 16, + WLAN_KEY_LEN_TKIP = 32, +}; + /* * IEEE 802.11-2007 7.3.2.9 Country information element * diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 389f1d20adf..f20da7d63b1 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1244,6 +1244,53 @@ extern int ieee80211_radiotap_iterator_init( extern int ieee80211_radiotap_iterator_next( struct ieee80211_radiotap_iterator *iterator); +extern const unsigned char rfc1042_header[6]; +extern const unsigned char bridge_tunnel_header[6]; + +/** + * ieee80211_get_hdrlen_from_skb - get header length from data + * + * Given an skb with a raw 802.11 header at the data pointer this function + * returns the 802.11 header length in bytes (not including encryption + * headers). If the data in the sk_buff is too short to contain a valid 802.11 + * header the function returns 0. + * + * @skb: the frame + */ +unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb); + +/** + * ieee80211_hdrlen - get header length in bytes from frame control + * @fc: frame control field in little-endian format + */ +unsigned int ieee80211_hdrlen(__le16 fc); + +/** + * ieee80211_data_to_8023 - convert an 802.11 data frame to 802.3 + * @skb: the 802.11 data frame + * @addr: the device MAC address + * @iftype: the virtual interface type + */ +int ieee80211_data_to_8023(struct sk_buff *skb, u8 *addr, + enum nl80211_iftype iftype); + +/** + * ieee80211_data_from_8023 - convert an 802.3 frame to 802.11 + * @skb: the 802.3 frame + * @addr: the device MAC address + * @iftype: the virtual interface type + * @bssid: the network bssid (used only for iftype STATION and ADHOC) + * @qos: build 802.11 QoS data frame + */ +int ieee80211_data_from_8023(struct sk_buff *skb, u8 *addr, + enum nl80211_iftype iftype, u8 *bssid, bool qos); + +/** + * cfg80211_classify8021d - determine the 802.1p/1d tag for a data frame + * @skb: the data frame + */ +unsigned int cfg80211_classify8021d(struct sk_buff *skb); + /* * Regulatory helper functions for wiphys */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 2d0610581ef..d72346ff324 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -671,16 +671,6 @@ enum ieee80211_key_alg { ALG_AES_CMAC, }; -/** - * enum ieee80211_key_len - key length - * @LEN_WEP40: WEP 5-byte long key - * @LEN_WEP104: WEP 13-byte long key - */ -enum ieee80211_key_len { - LEN_WEP40 = 5, - LEN_WEP104 = 13, -}; - /** * enum ieee80211_key_flags - key flags * @@ -1811,24 +1801,6 @@ __le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw, struct sk_buff * ieee80211_get_buffered_bc(struct ieee80211_hw *hw, struct ieee80211_vif *vif); -/** - * ieee80211_get_hdrlen_from_skb - get header length from data - * - * Given an skb with a raw 802.11 header at the data pointer this function - * returns the 802.11 header length in bytes (not including encryption - * headers). If the data in the sk_buff is too short to contain a valid 802.11 - * header the function returns 0. - * - * @skb: the frame - */ -unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb); - -/** - * ieee80211_hdrlen - get header length in bytes from frame control - * @fc: frame control field in little-endian format - */ -unsigned int ieee80211_hdrlen(__le16 fc); - /** * ieee80211_get_tkip_key - get a TKIP rc4 for skb * diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 8db8d16d206..c088c46704a 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1085,8 +1085,6 @@ static inline int __ieee80211_resume(struct ieee80211_hw *hw) /* utility functions/constants */ extern void *mac80211_wiphy_privid; /* for wiphy privid */ -extern const unsigned char rfc1042_header[6]; -extern const unsigned char bridge_tunnel_header[6]; u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len, enum nl80211_iftype type); int ieee80211_frame_duration(struct ieee80211_local *local, size_t len, diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 832bb503ca9..c7d72819cdd 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -191,12 +191,8 @@ struct mesh_rmc { #define PLINK_CATEGORY 30 #define MESH_PATH_SEL_CATEGORY 32 -/* Mesh Header Flags */ -#define IEEE80211S_FLAGS_AE 0x3 - /* Public interfaces */ /* Various */ -int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr); int ieee80211_new_mesh_header(struct ieee80211s_hdr *meshhdr, struct ieee80211_sub_if_data *sdata); int mesh_rmc_check(u8 *addr, struct ieee80211s_hdr *mesh_hdr, diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index f3a041cc5dc..6a9b8e63a6b 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1247,93 +1247,12 @@ ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx, __le16 fc) } static int -ieee80211_data_to_8023(struct ieee80211_rx_data *rx) +__ieee80211_data_to_8023(struct ieee80211_rx_data *rx) { struct net_device *dev = rx->dev; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; - u16 hdrlen, ethertype; - u8 *payload; - u8 dst[ETH_ALEN]; - u8 src[ETH_ALEN] __aligned(2); - struct sk_buff *skb = rx->skb; struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (unlikely(!ieee80211_is_data_present(hdr->frame_control))) - return -1; - - hdrlen = ieee80211_hdrlen(hdr->frame_control); - - /* convert IEEE 802.11 header + possible LLC headers into Ethernet - * header - * IEEE 802.11 address fields: - * ToDS FromDS Addr1 Addr2 Addr3 Addr4 - * 0 0 DA SA BSSID n/a - * 0 1 DA BSSID SA n/a - * 1 0 BSSID SA DA n/a - * 1 1 RA TA DA SA - */ - memcpy(dst, ieee80211_get_DA(hdr), ETH_ALEN); - memcpy(src, ieee80211_get_SA(hdr), ETH_ALEN); - - switch (hdr->frame_control & - cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { - case cpu_to_le16(IEEE80211_FCTL_TODS): - if (unlikely(sdata->vif.type != NL80211_IFTYPE_AP && - sdata->vif.type != NL80211_IFTYPE_AP_VLAN)) - return -1; - break; - case cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS): - if (unlikely(sdata->vif.type != NL80211_IFTYPE_WDS && - sdata->vif.type != NL80211_IFTYPE_MESH_POINT)) - return -1; - if (ieee80211_vif_is_mesh(&sdata->vif)) { - struct ieee80211s_hdr *meshdr = (struct ieee80211s_hdr *) - (skb->data + hdrlen); - hdrlen += ieee80211_get_mesh_hdrlen(meshdr); - if (meshdr->flags & MESH_FLAGS_AE_A5_A6) { - memcpy(dst, meshdr->eaddr1, ETH_ALEN); - memcpy(src, meshdr->eaddr2, ETH_ALEN); - } - } - break; - case cpu_to_le16(IEEE80211_FCTL_FROMDS): - if (sdata->vif.type != NL80211_IFTYPE_STATION || - (is_multicast_ether_addr(dst) && - !compare_ether_addr(src, dev->dev_addr))) - return -1; - break; - case cpu_to_le16(0): - if (sdata->vif.type != NL80211_IFTYPE_ADHOC) - return -1; - break; - } - - if (unlikely(skb->len - hdrlen < 8)) - return -1; - - payload = skb->data + hdrlen; - ethertype = (payload[6] << 8) | payload[7]; - - if (likely((compare_ether_addr(payload, rfc1042_header) == 0 && - ethertype != ETH_P_AARP && ethertype != ETH_P_IPX) || - compare_ether_addr(payload, bridge_tunnel_header) == 0)) { - /* remove RFC1042 or Bridge-Tunnel encapsulation and - * replace EtherType */ - skb_pull(skb, hdrlen + 6); - memcpy(skb_push(skb, ETH_ALEN), src, ETH_ALEN); - memcpy(skb_push(skb, ETH_ALEN), dst, ETH_ALEN); - } else { - struct ethhdr *ehdr; - __be16 len; - - skb_pull(skb, hdrlen); - len = htons(skb->len); - ehdr = (struct ethhdr *) skb_push(skb, sizeof(struct ethhdr)); - memcpy(ehdr->h_dest, dst, ETH_ALEN); - memcpy(ehdr->h_source, src, ETH_ALEN); - ehdr->h_proto = len; - } - return 0; + return ieee80211_data_to_8023(rx->skb, dev->dev_addr, sdata->vif.type); } /* @@ -1472,7 +1391,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) if (!(rx->flags & IEEE80211_RX_AMSDU)) return RX_CONTINUE; - err = ieee80211_data_to_8023(rx); + err = __ieee80211_data_to_8023(rx); if (unlikely(err)) return RX_DROP_UNUSABLE; @@ -1658,7 +1577,7 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx) if (unlikely(!ieee80211_is_data_present(hdr->frame_control))) return RX_DROP_MONITOR; - err = ieee80211_data_to_8023(rx); + err = __ieee80211_data_to_8023(rx); if (unlikely(err)) return RX_DROP_UNUSABLE; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index ffb6e88f2ec..949d857debd 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -35,15 +35,6 @@ /* privid for wiphys to determine whether they belong to us or not */ void *mac80211_wiphy_privid = &mac80211_wiphy_privid; -/* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */ -/* Ethernet-II snap header (RFC1042 for most EtherTypes) */ -const unsigned char rfc1042_header[] __aligned(2) = - { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00 }; - -/* Bridge-Tunnel header (for EtherTypes ETH_P_AARP and ETH_P_IPX) */ -const unsigned char bridge_tunnel_header[] __aligned(2) = - { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 }; - struct ieee80211_hw *wiphy_to_ieee80211_hw(struct wiphy *wiphy) { struct ieee80211_local *local; @@ -103,70 +94,6 @@ u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len, return NULL; } -unsigned int ieee80211_hdrlen(__le16 fc) -{ - unsigned int hdrlen = 24; - - if (ieee80211_is_data(fc)) { - if (ieee80211_has_a4(fc)) - hdrlen = 30; - if (ieee80211_is_data_qos(fc)) - hdrlen += IEEE80211_QOS_CTL_LEN; - goto out; - } - - if (ieee80211_is_ctl(fc)) { - /* - * ACK and CTS are 10 bytes, all others 16. To see how - * to get this condition consider - * subtype mask: 0b0000000011110000 (0x00F0) - * ACK subtype: 0b0000000011010000 (0x00D0) - * CTS subtype: 0b0000000011000000 (0x00C0) - * bits that matter: ^^^ (0x00E0) - * value of those: 0b0000000011000000 (0x00C0) - */ - if ((fc & cpu_to_le16(0x00E0)) == cpu_to_le16(0x00C0)) - hdrlen = 10; - else - hdrlen = 16; - } -out: - return hdrlen; -} -EXPORT_SYMBOL(ieee80211_hdrlen); - -unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb) -{ - const struct ieee80211_hdr *hdr = (const struct ieee80211_hdr *)skb->data; - unsigned int hdrlen; - - if (unlikely(skb->len < 10)) - return 0; - hdrlen = ieee80211_hdrlen(hdr->frame_control); - if (unlikely(hdrlen > skb->len)) - return 0; - return hdrlen; -} -EXPORT_SYMBOL(ieee80211_get_hdrlen_from_skb); - -int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr) -{ - int ae = meshhdr->flags & IEEE80211S_FLAGS_AE; - /* 7.1.3.5a.2 */ - switch (ae) { - case 0: - return 6; - case 1: - return 12; - case 2: - return 18; - case 3: - return 24; - default: - return 6; - } -} - void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx) { struct sk_buff *skb = tx->skb; diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 45b74f38b86..694343b9102 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -23,34 +23,6 @@ */ const int ieee802_1d_to_ac[8] = { 2, 3, 3, 2, 1, 1, 0, 0 }; -static const char llc_ip_hdr[8] = {0xAA, 0xAA, 0x3, 0, 0, 0, 0x08, 0}; - -/* Given a data frame determine the 802.1p/1d tag to use. */ -static unsigned int classify_1d(struct sk_buff *skb) -{ - unsigned int dscp; - - /* skb->priority values from 256->263 are magic values to - * directly indicate a specific 802.1d priority. This is used - * to allow 802.1d priority to be passed directly in from VLAN - * tags, etc. - */ - if (skb->priority >= 256 && skb->priority <= 263) - return skb->priority - 256; - - switch (skb->protocol) { - case htons(ETH_P_IP): - dscp = ip_hdr(skb)->tos & 0xfc; - break; - - default: - return 0; - } - - return dscp >> 5; -} - - static int wme_downgrade_ac(struct sk_buff *skb) { switch (skb->priority) { @@ -94,7 +66,7 @@ static u16 classify80211(struct ieee80211_local *local, struct sk_buff *skb) /* use the data classifier to determine what 802.1d tag the * data frame has */ - skb->priority = classify_1d(skb); + skb->priority = cfg80211_classify8021d(skb); /* in case we are a client verify acm is not set for this ac */ while (unlikely(local->wmm_acm & BIT(skb->priority))) { diff --git a/net/wireless/util.c b/net/wireless/util.c index b94c8604ad7..d072bff463a 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -4,7 +4,9 @@ * Copyright 2007-2009 Johannes Berg */ #include +#include #include +#include #include "core.h" struct ieee80211_rate * @@ -198,3 +200,306 @@ int cfg80211_validate_key_settings(struct key_params *params, int key_idx, return 0; } + +/* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */ +/* Ethernet-II snap header (RFC1042 for most EtherTypes) */ +const unsigned char rfc1042_header[] __aligned(2) = + { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00 }; +EXPORT_SYMBOL(rfc1042_header); + +/* Bridge-Tunnel header (for EtherTypes ETH_P_AARP and ETH_P_IPX) */ +const unsigned char bridge_tunnel_header[] __aligned(2) = + { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 }; +EXPORT_SYMBOL(bridge_tunnel_header); + +unsigned int ieee80211_hdrlen(__le16 fc) +{ + unsigned int hdrlen = 24; + + if (ieee80211_is_data(fc)) { + if (ieee80211_has_a4(fc)) + hdrlen = 30; + if (ieee80211_is_data_qos(fc)) + hdrlen += IEEE80211_QOS_CTL_LEN; + goto out; + } + + if (ieee80211_is_ctl(fc)) { + /* + * ACK and CTS are 10 bytes, all others 16. To see how + * to get this condition consider + * subtype mask: 0b0000000011110000 (0x00F0) + * ACK subtype: 0b0000000011010000 (0x00D0) + * CTS subtype: 0b0000000011000000 (0x00C0) + * bits that matter: ^^^ (0x00E0) + * value of those: 0b0000000011000000 (0x00C0) + */ + if ((fc & cpu_to_le16(0x00E0)) == cpu_to_le16(0x00C0)) + hdrlen = 10; + else + hdrlen = 16; + } +out: + return hdrlen; +} +EXPORT_SYMBOL(ieee80211_hdrlen); + +unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb) +{ + const struct ieee80211_hdr *hdr = + (const struct ieee80211_hdr *)skb->data; + unsigned int hdrlen; + + if (unlikely(skb->len < 10)) + return 0; + hdrlen = ieee80211_hdrlen(hdr->frame_control); + if (unlikely(hdrlen > skb->len)) + return 0; + return hdrlen; +} +EXPORT_SYMBOL(ieee80211_get_hdrlen_from_skb); + +int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr) +{ + int ae = meshhdr->flags & MESH_FLAGS_AE; + /* 7.1.3.5a.2 */ + switch (ae) { + case 0: + return 6; + case 1: + return 12; + case 2: + return 18; + case 3: + return 24; + default: + return 6; + } +} + +int ieee80211_data_to_8023(struct sk_buff *skb, u8 *addr, + enum nl80211_iftype iftype) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + u16 hdrlen, ethertype; + u8 *payload; + u8 dst[ETH_ALEN]; + u8 src[ETH_ALEN] __aligned(2); + + if (unlikely(!ieee80211_is_data_present(hdr->frame_control))) + return -1; + + hdrlen = ieee80211_hdrlen(hdr->frame_control); + + /* convert IEEE 802.11 header + possible LLC headers into Ethernet + * header + * IEEE 802.11 address fields: + * ToDS FromDS Addr1 Addr2 Addr3 Addr4 + * 0 0 DA SA BSSID n/a + * 0 1 DA BSSID SA n/a + * 1 0 BSSID SA DA n/a + * 1 1 RA TA DA SA + */ + memcpy(dst, ieee80211_get_DA(hdr), ETH_ALEN); + memcpy(src, ieee80211_get_SA(hdr), ETH_ALEN); + + switch (hdr->frame_control & + cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { + case cpu_to_le16(IEEE80211_FCTL_TODS): + if (unlikely(iftype != NL80211_IFTYPE_AP && + iftype != NL80211_IFTYPE_AP_VLAN)) + return -1; + break; + case cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS): + if (unlikely(iftype != NL80211_IFTYPE_WDS && + iftype != NL80211_IFTYPE_MESH_POINT)) + return -1; + if (iftype == NL80211_IFTYPE_MESH_POINT) { + struct ieee80211s_hdr *meshdr = + (struct ieee80211s_hdr *) (skb->data + hdrlen); + hdrlen += ieee80211_get_mesh_hdrlen(meshdr); + if (meshdr->flags & MESH_FLAGS_AE_A5_A6) { + memcpy(dst, meshdr->eaddr1, ETH_ALEN); + memcpy(src, meshdr->eaddr2, ETH_ALEN); + } + } + break; + case cpu_to_le16(IEEE80211_FCTL_FROMDS): + if (iftype != NL80211_IFTYPE_STATION || + (is_multicast_ether_addr(dst) && + !compare_ether_addr(src, addr))) + return -1; + break; + case cpu_to_le16(0): + if (iftype != NL80211_IFTYPE_ADHOC) + return -1; + break; + } + + if (unlikely(skb->len - hdrlen < 8)) + return -1; + + payload = skb->data + hdrlen; + ethertype = (payload[6] << 8) | payload[7]; + + if (likely((compare_ether_addr(payload, rfc1042_header) == 0 && + ethertype != ETH_P_AARP && ethertype != ETH_P_IPX) || + compare_ether_addr(payload, bridge_tunnel_header) == 0)) { + /* remove RFC1042 or Bridge-Tunnel encapsulation and + * replace EtherType */ + skb_pull(skb, hdrlen + 6); + memcpy(skb_push(skb, ETH_ALEN), src, ETH_ALEN); + memcpy(skb_push(skb, ETH_ALEN), dst, ETH_ALEN); + } else { + struct ethhdr *ehdr; + __be16 len; + + skb_pull(skb, hdrlen); + len = htons(skb->len); + ehdr = (struct ethhdr *) skb_push(skb, sizeof(struct ethhdr)); + memcpy(ehdr->h_dest, dst, ETH_ALEN); + memcpy(ehdr->h_source, src, ETH_ALEN); + ehdr->h_proto = len; + } + return 0; +} +EXPORT_SYMBOL(ieee80211_data_to_8023); + +int ieee80211_data_from_8023(struct sk_buff *skb, u8 *addr, + enum nl80211_iftype iftype, u8 *bssid, bool qos) +{ + struct ieee80211_hdr hdr; + u16 hdrlen, ethertype; + __le16 fc; + const u8 *encaps_data; + int encaps_len, skip_header_bytes; + int nh_pos, h_pos; + int head_need; + + if (unlikely(skb->len < ETH_HLEN)) + return -EINVAL; + + nh_pos = skb_network_header(skb) - skb->data; + h_pos = skb_transport_header(skb) - skb->data; + + /* convert Ethernet header to proper 802.11 header (based on + * operation mode) */ + ethertype = (skb->data[12] << 8) | skb->data[13]; + fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA); + + switch (iftype) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS); + /* DA BSSID SA */ + memcpy(hdr.addr1, skb->data, ETH_ALEN); + memcpy(hdr.addr2, addr, ETH_ALEN); + memcpy(hdr.addr3, skb->data + ETH_ALEN, ETH_ALEN); + hdrlen = 24; + break; + case NL80211_IFTYPE_STATION: + fc |= cpu_to_le16(IEEE80211_FCTL_TODS); + /* BSSID SA DA */ + memcpy(hdr.addr1, bssid, ETH_ALEN); + memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); + memcpy(hdr.addr3, skb->data, ETH_ALEN); + hdrlen = 24; + break; + case NL80211_IFTYPE_ADHOC: + /* DA SA BSSID */ + memcpy(hdr.addr1, skb->data, ETH_ALEN); + memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); + memcpy(hdr.addr3, bssid, ETH_ALEN); + hdrlen = 24; + break; + default: + return -EOPNOTSUPP; + } + + if (qos) { + fc |= cpu_to_le16(IEEE80211_STYPE_QOS_DATA); + hdrlen += 2; + } + + hdr.frame_control = fc; + hdr.duration_id = 0; + hdr.seq_ctrl = 0; + + skip_header_bytes = ETH_HLEN; + if (ethertype == ETH_P_AARP || ethertype == ETH_P_IPX) { + encaps_data = bridge_tunnel_header; + encaps_len = sizeof(bridge_tunnel_header); + skip_header_bytes -= 2; + } else if (ethertype > 0x600) { + encaps_data = rfc1042_header; + encaps_len = sizeof(rfc1042_header); + skip_header_bytes -= 2; + } else { + encaps_data = NULL; + encaps_len = 0; + } + + skb_pull(skb, skip_header_bytes); + nh_pos -= skip_header_bytes; + h_pos -= skip_header_bytes; + + head_need = hdrlen + encaps_len - skb_headroom(skb); + + if (head_need > 0 || skb_cloned(skb)) { + head_need = max(head_need, 0); + if (head_need) + skb_orphan(skb); + + if (pskb_expand_head(skb, head_need, 0, GFP_ATOMIC)) { + printk(KERN_ERR "failed to reallocate Tx buffer\n"); + return -ENOMEM; + } + skb->truesize += head_need; + } + + if (encaps_data) { + memcpy(skb_push(skb, encaps_len), encaps_data, encaps_len); + nh_pos += encaps_len; + h_pos += encaps_len; + } + + memcpy(skb_push(skb, hdrlen), &hdr, hdrlen); + + nh_pos += hdrlen; + h_pos += hdrlen; + + /* Update skb pointers to various headers since this modified frame + * is going to go through Linux networking code that may potentially + * need things like pointer to IP header. */ + skb_set_mac_header(skb, 0); + skb_set_network_header(skb, nh_pos); + skb_set_transport_header(skb, h_pos); + + return 0; +} +EXPORT_SYMBOL(ieee80211_data_from_8023); + +/* Given a data frame determine the 802.1p/1d tag to use. */ +unsigned int cfg80211_classify8021d(struct sk_buff *skb) +{ + unsigned int dscp; + + /* skb->priority values from 256->263 are magic values to + * directly indicate a specific 802.1d priority. This is used + * to allow 802.1d priority to be passed directly in from VLAN + * tags, etc. + */ + if (skb->priority >= 256 && skb->priority <= 263) + return skb->priority - 256; + + switch (skb->protocol) { + case htons(ETH_P_IP): + dscp = ip_hdr(skb)->tos & 0xfc; + break; + default: + return 0; + } + + return dscp >> 5; +} +EXPORT_SYMBOL(cfg80211_classify8021d); -- cgit v1.2.3-70-g09d2 From e1defc4ff0cf57aca6c5e3ff99fa503f5943c1f1 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 22 May 2009 17:17:49 -0400 Subject: block: Do away with the notion of hardsect_size Until now we have had a 1:1 mapping between storage device physical block size and the logical block sized used when addressing the device. With SATA 4KB drives coming out that will no longer be the case. The sector size will be 4KB but the logical block size will remain 512-bytes. Hence we need to distinguish between the physical block size and the logical ditto. This patch renames hardsect_size to logical_block_size. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- arch/powerpc/sysdev/axonram.c | 2 +- block/blk-integrity.c | 2 +- block/blk-settings.c | 21 ++++++++++----------- block/blk-sysfs.c | 12 +++++++++--- block/compat_ioctl.c | 2 +- block/ioctl.c | 2 +- drivers/block/cciss.c | 6 +++--- drivers/block/cpqarray.c | 4 ++-- drivers/block/hd.c | 2 +- drivers/block/mg_disk.c | 2 +- drivers/block/pktcdvd.c | 2 +- drivers/block/ps3disk.c | 2 +- drivers/block/ub.c | 6 +++--- drivers/block/virtio_blk.c | 2 +- drivers/block/xen-blkfront.c | 2 +- drivers/block/xsysace.c | 2 +- drivers/cdrom/gdrom.c | 2 +- drivers/cdrom/viocd.c | 4 ++-- drivers/char/raw.c | 2 +- drivers/ide/ide-cd.c | 12 ++++++------ drivers/md/bitmap.c | 4 ++-- drivers/md/dm-exception-store.c | 2 +- drivers/md/dm-log.c | 3 ++- drivers/md/dm-snap-persistent.c | 2 +- drivers/md/dm-table.c | 12 +++++++----- drivers/md/md.c | 2 +- drivers/memstick/core/mspro_block.c | 2 +- drivers/message/i2o/i2o_block.c | 5 +++-- drivers/mmc/card/block.c | 2 +- drivers/mtd/mtd_blkdevs.c | 2 +- drivers/s390/block/dasd.c | 2 +- drivers/s390/block/dcssblk.c | 2 +- drivers/s390/block/xpram.c | 2 +- drivers/s390/char/tape_block.c | 2 +- drivers/scsi/sd.c | 2 +- drivers/scsi/sr.c | 2 +- fs/bio.c | 3 ++- fs/block_dev.c | 6 +++--- fs/buffer.c | 6 +++--- fs/direct-io.c | 2 +- fs/ext3/super.c | 4 ++-- fs/ext4/super.c | 2 +- fs/gfs2/ops_fstype.c | 4 ++-- fs/gfs2/rgrp.c | 2 +- fs/nilfs2/the_nilfs.c | 2 +- fs/ntfs/super.c | 6 +++--- fs/ocfs2/cluster/heartbeat.c | 2 +- fs/ocfs2/super.c | 2 +- fs/partitions/ibm.c | 2 +- fs/partitions/msdos.c | 4 ++-- fs/udf/super.c | 2 +- fs/xfs/linux-2.6/xfs_buf.c | 2 +- include/linux/blkdev.h | 14 +++++++------- include/linux/device-mapper.h | 2 +- 54 files changed, 108 insertions(+), 98 deletions(-) (limited to 'include') diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index 9e105cbc5e5..a4779912a5c 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -250,7 +250,7 @@ axon_ram_probe(struct of_device *device, const struct of_device_id *device_id) set_capacity(bank->disk, bank->size >> AXON_RAM_SECTOR_SHIFT); blk_queue_make_request(bank->disk->queue, axon_ram_make_request); - blk_queue_hardsect_size(bank->disk->queue, AXON_RAM_SECTOR_SIZE); + blk_queue_logical_block_size(bank->disk->queue, AXON_RAM_SECTOR_SIZE); add_disk(bank->disk); bank->irq_id = irq_of_parse_and_map(device->node, 0); diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 91fa8e06b6a..73e28d35568 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -340,7 +340,7 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template) kobject_uevent(&bi->kobj, KOBJ_ADD); bi->flags |= INTEGRITY_FLAG_READ | INTEGRITY_FLAG_WRITE; - bi->sector_size = disk->queue->hardsect_size; + bi->sector_size = queue_logical_block_size(disk->queue); disk->integrity = bi; } else bi = disk->integrity; diff --git a/block/blk-settings.c b/block/blk-settings.c index 57af728d94b..15c3164537b 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -134,7 +134,7 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn) q->backing_dev_info.state = 0; q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; blk_queue_max_sectors(q, SAFE_MAX_SECTORS); - blk_queue_hardsect_size(q, 512); + blk_queue_logical_block_size(q, 512); blk_queue_dma_alignment(q, 511); blk_queue_congestion_threshold(q); q->nr_batching = BLK_BATCH_REQ; @@ -288,21 +288,20 @@ void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size) EXPORT_SYMBOL(blk_queue_max_segment_size); /** - * blk_queue_hardsect_size - set hardware sector size for the queue + * blk_queue_logical_block_size - set logical block size for the queue * @q: the request queue for the device - * @size: the hardware sector size, in bytes + * @size: the logical block size, in bytes * * Description: - * This should typically be set to the lowest possible sector size - * that the hardware can operate on (possible without reverting to - * even internal read-modify-write operations). Usually the default - * of 512 covers most hardware. + * This should be set to the lowest possible block size that the + * storage device can address. The default of 512 covers most + * hardware. **/ -void blk_queue_hardsect_size(struct request_queue *q, unsigned short size) +void blk_queue_logical_block_size(struct request_queue *q, unsigned short size) { - q->hardsect_size = size; + q->logical_block_size = size; } -EXPORT_SYMBOL(blk_queue_hardsect_size); +EXPORT_SYMBOL(blk_queue_logical_block_size); /* * Returns the minimum that is _not_ zero, unless both are zero. @@ -324,7 +323,7 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) t->max_phys_segments = min_not_zero(t->max_phys_segments, b->max_phys_segments); t->max_hw_segments = min_not_zero(t->max_hw_segments, b->max_hw_segments); t->max_segment_size = min_not_zero(t->max_segment_size, b->max_segment_size); - t->hardsect_size = max(t->hardsect_size, b->hardsect_size); + t->logical_block_size = max(t->logical_block_size, b->logical_block_size); if (!t->queue_lock) WARN_ON_ONCE(1); else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) { diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 3ff9bba3379..13d38b7e4d0 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -100,9 +100,9 @@ static ssize_t queue_max_sectors_show(struct request_queue *q, char *page) return queue_var_show(max_sectors_kb, (page)); } -static ssize_t queue_hw_sector_size_show(struct request_queue *q, char *page) +static ssize_t queue_logical_block_size_show(struct request_queue *q, char *page) { - return queue_var_show(q->hardsect_size, page); + return queue_var_show(queue_logical_block_size(q), page); } static ssize_t @@ -249,7 +249,12 @@ static struct queue_sysfs_entry queue_iosched_entry = { static struct queue_sysfs_entry queue_hw_sector_size_entry = { .attr = {.name = "hw_sector_size", .mode = S_IRUGO }, - .show = queue_hw_sector_size_show, + .show = queue_logical_block_size_show, +}; + +static struct queue_sysfs_entry queue_logical_block_size_entry = { + .attr = {.name = "logical_block_size", .mode = S_IRUGO }, + .show = queue_logical_block_size_show, }; static struct queue_sysfs_entry queue_nonrot_entry = { @@ -283,6 +288,7 @@ static struct attribute *default_attrs[] = { &queue_max_sectors_entry.attr, &queue_iosched_entry.attr, &queue_hw_sector_size_entry.attr, + &queue_logical_block_size_entry.attr, &queue_nonrot_entry.attr, &queue_nomerges_entry.attr, &queue_rq_affinity_entry.attr, diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index f87615dea46..9eaa1940273 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -763,7 +763,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) case BLKBSZGET_32: /* get the logical block size (cf. BLKSSZGET) */ return compat_put_int(arg, block_size(bdev)); case BLKSSZGET: /* get block device hardware sector size */ - return compat_put_int(arg, bdev_hardsect_size(bdev)); + return compat_put_int(arg, bdev_logical_block_size(bdev)); case BLKSECTGET: return compat_put_ushort(arg, bdev_get_queue(bdev)->max_sectors); diff --git a/block/ioctl.c b/block/ioctl.c index ad474d4bbcc..7aa97f65da8 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -311,7 +311,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, case BLKBSZGET: /* get the logical block size (cf. BLKSSZGET) */ return put_int(arg, block_size(bdev)); case BLKSSZGET: /* get block device hardware sector size */ - return put_int(arg, bdev_hardsect_size(bdev)); + return put_int(arg, bdev_logical_block_size(bdev)); case BLKSECTGET: return put_ushort(arg, bdev_get_queue(bdev)->max_sectors); case BLKRASET: diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index e714e7cce6f..94474f5f8bc 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -1389,8 +1389,8 @@ static void cciss_add_disk(ctlr_info_t *h, struct gendisk *disk, disk->queue->queuedata = h; - blk_queue_hardsect_size(disk->queue, - h->drv[drv_index].block_size); + blk_queue_logical_block_size(disk->queue, + h->drv[drv_index].block_size); /* Make sure all queue data is written out before */ /* setting h->drv[drv_index].queue, as setting this */ @@ -2298,7 +2298,7 @@ static int cciss_revalidate(struct gendisk *disk) cciss_geometry_inquiry(h->ctlr, logvol, 1, total_size, block_size, inq_buff, drv); - blk_queue_hardsect_size(drv->queue, drv->block_size); + blk_queue_logical_block_size(drv->queue, drv->block_size); set_capacity(disk, drv->nr_blocks); kfree(inq_buff); diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index a02dcfc00f1..44fa2018f6b 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -474,7 +474,7 @@ static int __init cpqarray_register_ctlr( int i, struct pci_dev *pdev) disk->fops = &ida_fops; if (j && !drv->nr_blks) continue; - blk_queue_hardsect_size(hba[i]->queue, drv->blk_size); + blk_queue_logical_block_size(hba[i]->queue, drv->blk_size); set_capacity(disk, drv->nr_blks); disk->queue = hba[i]->queue; disk->private_data = drv; @@ -1546,7 +1546,7 @@ static int revalidate_allvol(ctlr_info_t *host) drv_info_t *drv = &host->drv[i]; if (i && !drv->nr_blks) continue; - blk_queue_hardsect_size(host->queue, drv->blk_size); + blk_queue_logical_block_size(host->queue, drv->blk_size); set_capacity(disk, drv->nr_blks); disk->queue = host->queue; disk->private_data = drv; diff --git a/drivers/block/hd.c b/drivers/block/hd.c index 961de56d00a..f65b3f369eb 100644 --- a/drivers/block/hd.c +++ b/drivers/block/hd.c @@ -724,7 +724,7 @@ static int __init hd_init(void) blk_queue_max_sectors(hd_queue, 255); init_timer(&device_timer); device_timer.function = hd_times_out; - blk_queue_hardsect_size(hd_queue, 512); + blk_queue_logical_block_size(hd_queue, 512); if (!NR_HD) { /* diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c index c0cd0a03f69..60de5a01e71 100644 --- a/drivers/block/mg_disk.c +++ b/drivers/block/mg_disk.c @@ -996,7 +996,7 @@ static int mg_probe(struct platform_device *plat_dev) goto probe_err_6; } blk_queue_max_sectors(host->breq, MG_MAX_SECTS); - blk_queue_hardsect_size(host->breq, MG_SECTOR_SIZE); + blk_queue_logical_block_size(host->breq, MG_SECTOR_SIZE); init_timer(&host->timer); host->timer.function = mg_times_out; diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index dc7a8c352da..293f5858921 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2657,7 +2657,7 @@ static void pkt_init_queue(struct pktcdvd_device *pd) struct request_queue *q = pd->disk->queue; blk_queue_make_request(q, pkt_make_request); - blk_queue_hardsect_size(q, CD_FRAMESIZE); + blk_queue_logical_block_size(q, CD_FRAMESIZE); blk_queue_max_sectors(q, PACKET_MAX_SECTORS); blk_queue_merge_bvec(q, pkt_merge_bvec); q->queuedata = pd; diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index 338cee4cc0b..aaeeb544228 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -477,7 +477,7 @@ static int __devinit ps3disk_probe(struct ps3_system_bus_device *_dev) blk_queue_max_sectors(queue, dev->bounce_size >> 9); blk_queue_segment_boundary(queue, -1UL); blk_queue_dma_alignment(queue, dev->blk_size-1); - blk_queue_hardsect_size(queue, dev->blk_size); + blk_queue_logical_block_size(queue, dev->blk_size); blk_queue_ordered(queue, QUEUE_ORDERED_DRAIN_FLUSH, ps3disk_prepare_flush); diff --git a/drivers/block/ub.c b/drivers/block/ub.c index e67bbae9547..cc54473b8e7 100644 --- a/drivers/block/ub.c +++ b/drivers/block/ub.c @@ -722,7 +722,7 @@ static void ub_cmd_build_block(struct ub_dev *sc, struct ub_lun *lun, /* * build the command * - * The call to blk_queue_hardsect_size() guarantees that request + * The call to blk_queue_logical_block_size() guarantees that request * is aligned, but it is given in terms of 512 byte units, always. */ block = blk_rq_pos(rq) >> lun->capacity.bshift; @@ -1749,7 +1749,7 @@ static int ub_bd_revalidate(struct gendisk *disk) ub_revalidate(lun->udev, lun); /* XXX Support sector size switching like in sr.c */ - blk_queue_hardsect_size(disk->queue, lun->capacity.bsize); + blk_queue_logical_block_size(disk->queue, lun->capacity.bsize); set_capacity(disk, lun->capacity.nsec); // set_disk_ro(sdkp->disk, lun->readonly); @@ -2324,7 +2324,7 @@ static int ub_probe_lun(struct ub_dev *sc, int lnum) blk_queue_max_phys_segments(q, UB_MAX_REQ_SG); blk_queue_segment_boundary(q, 0xffffffff); /* Dubious. */ blk_queue_max_sectors(q, UB_MAX_SECTORS); - blk_queue_hardsect_size(q, lun->capacity.bsize); + blk_queue_logical_block_size(q, lun->capacity.bsize); lun->disk = disk; q->queuedata = lun; diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 511d4ae2d17..c4845b16946 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -347,7 +347,7 @@ static int virtblk_probe(struct virtio_device *vdev) offsetof(struct virtio_blk_config, blk_size), &blk_size); if (!err) - blk_queue_hardsect_size(vblk->disk->queue, blk_size); + blk_queue_logical_block_size(vblk->disk->queue, blk_size); add_disk(vblk->disk); return 0; diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 132120ae4bd..c1996829d5e 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -344,7 +344,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq); /* Hard sector size and max sectors impersonate the equiv. hardware. */ - blk_queue_hardsect_size(rq, sector_size); + blk_queue_logical_block_size(rq, sector_size); blk_queue_max_sectors(rq, 512); /* Each segment in a request is up to an aligned page in size. */ diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c index 3a4397edab7..f08491a3a81 100644 --- a/drivers/block/xsysace.c +++ b/drivers/block/xsysace.c @@ -984,7 +984,7 @@ static int __devinit ace_setup(struct ace_device *ace) ace->queue = blk_init_queue(ace_request, &ace->lock); if (ace->queue == NULL) goto err_blk_initq; - blk_queue_hardsect_size(ace->queue, 512); + blk_queue_logical_block_size(ace->queue, 512); /* * Allocate and initialize GD structure diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index 1e366ad8f68..b5621f27c4b 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -739,7 +739,7 @@ static void __devinit probe_gdrom_setupdisk(void) static int __devinit probe_gdrom_setupqueue(void) { - blk_queue_hardsect_size(gd.gdrom_rq, GDROM_HARD_SECTOR); + blk_queue_logical_block_size(gd.gdrom_rq, GDROM_HARD_SECTOR); /* using DMA so memory will need to be contiguous */ blk_queue_max_hw_segments(gd.gdrom_rq, 1); /* set a large max size to get most from DMA */ diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c index f177c2d4017..0fff646cc2f 100644 --- a/drivers/cdrom/viocd.c +++ b/drivers/cdrom/viocd.c @@ -469,8 +469,8 @@ static void vio_handle_cd_event(struct HvLpEvent *event) case viocdopen: if (event->xRc == 0) { di = &viocd_diskinfo[bevent->disk]; - blk_queue_hardsect_size(di->viocd_disk->queue, - bevent->block_size); + blk_queue_logical_block_size(di->viocd_disk->queue, + bevent->block_size); set_capacity(di->viocd_disk, bevent->media_size * bevent->block_size / 512); diff --git a/drivers/char/raw.c b/drivers/char/raw.c index 20d90e6a6e5..db32f0e4c7d 100644 --- a/drivers/char/raw.c +++ b/drivers/char/raw.c @@ -71,7 +71,7 @@ static int raw_open(struct inode *inode, struct file *filp) err = bd_claim(bdev, raw_open); if (err) goto out1; - err = set_blocksize(bdev, bdev_hardsect_size(bdev)); + err = set_blocksize(bdev, bdev_logical_block_size(bdev)); if (err) goto out2; filp->f_flags |= O_DIRECT; diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 1799328decf..424140c6c40 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -182,7 +182,7 @@ static void cdrom_analyze_sense_data(ide_drive_t *drive, (sense->information[2] << 8) | (sense->information[3]); - if (drive->queue->hardsect_size == 2048) + if (queue_logical_block_size(drive->queue) == 2048) /* device sector size is 2K */ sector <<= 2; @@ -737,7 +737,7 @@ static ide_startstop_t cdrom_start_rw(ide_drive_t *drive, struct request *rq) struct request_queue *q = drive->queue; int write = rq_data_dir(rq) == WRITE; unsigned short sectors_per_frame = - queue_hardsect_size(q) >> SECTOR_BITS; + queue_logical_block_size(q) >> SECTOR_BITS; ide_debug_log(IDE_DBG_RQ, "rq->cmd[0]: 0x%x, rq->cmd_flags: 0x%x, " "secs_per_frame: %u", @@ -1021,8 +1021,8 @@ int ide_cd_read_toc(ide_drive_t *drive, struct request_sense *sense) /* save a private copy of the TOC capacity for error handling */ drive->probed_capacity = toc->capacity * sectors_per_frame; - blk_queue_hardsect_size(drive->queue, - sectors_per_frame << SECTOR_BITS); + blk_queue_logical_block_size(drive->queue, + sectors_per_frame << SECTOR_BITS); /* first read just the header, so we know how long the TOC is */ stat = cdrom_read_tocentry(drive, 0, 1, 0, (char *) &toc->hdr, @@ -1338,7 +1338,7 @@ static int ide_cdrom_probe_capabilities(ide_drive_t *drive) /* standard prep_rq_fn that builds 10 byte cmds */ static int ide_cdrom_prep_fs(struct request_queue *q, struct request *rq) { - int hard_sect = queue_hardsect_size(q); + int hard_sect = queue_logical_block_size(q); long block = (long)blk_rq_pos(rq) / (hard_sect >> 9); unsigned long blocks = blk_rq_sectors(rq) / (hard_sect >> 9); @@ -1543,7 +1543,7 @@ static int ide_cdrom_setup(ide_drive_t *drive) nslots = ide_cdrom_probe_capabilities(drive); - blk_queue_hardsect_size(q, CD_FRAMESIZE); + blk_queue_logical_block_size(q, CD_FRAMESIZE); if (ide_cdrom_register(drive, nslots)) { printk(KERN_ERR PFX "%s: %s failed to register device with the" diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 47c68bc75a1..06b0ded1ce2 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -232,7 +232,7 @@ static struct page *read_sb_page(mddev_t *mddev, long offset, target = rdev->sb_start + offset + index * (PAGE_SIZE/512); if (sync_page_io(rdev->bdev, target, - roundup(size, bdev_hardsect_size(rdev->bdev)), + roundup(size, bdev_logical_block_size(rdev->bdev)), page, READ)) { page->index = index; attach_page_buffers(page, NULL); /* so that free_buffer will @@ -287,7 +287,7 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) int size = PAGE_SIZE; if (page->index == bitmap->file_pages-1) size = roundup(bitmap->last_page_size, - bdev_hardsect_size(rdev->bdev)); + bdev_logical_block_size(rdev->bdev)); /* Just make sure we aren't corrupting data or * metadata */ diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index a2e26c24214..75d8081a904 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -178,7 +178,7 @@ static int set_chunk_size(struct dm_exception_store *store, } /* Validate the chunk size against the device block size */ - if (chunk_size_ulong % (bdev_hardsect_size(store->cow->bdev) >> 9)) { + if (chunk_size_ulong % (bdev_logical_block_size(store->cow->bdev) >> 9)) { *error = "Chunk size is not a multiple of device blocksize"; return -EINVAL; } diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index be233bc4d91..6fa8ccf91c7 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -413,7 +413,8 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, * Buffer holds both header and bitset. */ buf_size = dm_round_up((LOG_OFFSET << SECTOR_SHIFT) + - bitset_size, ti->limits.hardsect_size); + bitset_size, + ti->limits.logical_block_size); if (buf_size > dev->bdev->bd_inode->i_size) { DMWARN("log device %s too small: need %llu bytes", diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index e75c6dd76a9..2662a41337e 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -282,7 +282,7 @@ static int read_header(struct pstore *ps, int *new_snapshot) */ if (!ps->store->chunk_size) { ps->store->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS, - bdev_hardsect_size(ps->store->cow->bdev) >> 9); + bdev_logical_block_size(ps->store->cow->bdev) >> 9); ps->store->chunk_mask = ps->store->chunk_size - 1; ps->store->chunk_shift = ffs(ps->store->chunk_size) - 1; chunk_size_supplied = 0; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 429b50b975d..65e2d975985 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -108,7 +108,8 @@ static void combine_restrictions_low(struct io_restrictions *lhs, lhs->max_hw_segments = min_not_zero(lhs->max_hw_segments, rhs->max_hw_segments); - lhs->hardsect_size = max(lhs->hardsect_size, rhs->hardsect_size); + lhs->logical_block_size = max(lhs->logical_block_size, + rhs->logical_block_size); lhs->max_segment_size = min_not_zero(lhs->max_segment_size, rhs->max_segment_size); @@ -529,7 +530,8 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev) rs->max_hw_segments = min_not_zero(rs->max_hw_segments, q->max_hw_segments); - rs->hardsect_size = max(rs->hardsect_size, q->hardsect_size); + rs->logical_block_size = max(rs->logical_block_size, + queue_logical_block_size(q)); rs->max_segment_size = min_not_zero(rs->max_segment_size, q->max_segment_size); @@ -683,8 +685,8 @@ static void check_for_valid_limits(struct io_restrictions *rs) rs->max_phys_segments = MAX_PHYS_SEGMENTS; if (!rs->max_hw_segments) rs->max_hw_segments = MAX_HW_SEGMENTS; - if (!rs->hardsect_size) - rs->hardsect_size = 1 << SECTOR_SHIFT; + if (!rs->logical_block_size) + rs->logical_block_size = 1 << SECTOR_SHIFT; if (!rs->max_segment_size) rs->max_segment_size = MAX_SEGMENT_SIZE; if (!rs->seg_boundary_mask) @@ -914,7 +916,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q) blk_queue_max_sectors(q, t->limits.max_sectors); q->max_phys_segments = t->limits.max_phys_segments; q->max_hw_segments = t->limits.max_hw_segments; - q->hardsect_size = t->limits.hardsect_size; + q->logical_block_size = t->limits.logical_block_size; q->max_segment_size = t->limits.max_segment_size; q->max_hw_sectors = t->limits.max_hw_sectors; q->seg_boundary_mask = t->limits.seg_boundary_mask; diff --git a/drivers/md/md.c b/drivers/md/md.c index fccc8343a25..4cbc19f5c30 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1202,7 +1202,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read)); rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256; - bmask = queue_hardsect_size(rdev->bdev->bd_disk->queue)-1; + bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1; if (rdev->sb_size & bmask) rdev->sb_size = (rdev->sb_size | bmask) + 1; diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index c0bebc6a2f2..7847bbc1440 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -1242,7 +1242,7 @@ static int mspro_block_init_disk(struct memstick_dev *card) sprintf(msb->disk->disk_name, "mspblk%d", disk_id); - blk_queue_hardsect_size(msb->queue, msb->page_size); + blk_queue_logical_block_size(msb->queue, msb->page_size); capacity = be16_to_cpu(sys_info->user_block_count); capacity *= be16_to_cpu(sys_info->block_size); diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c index 6573ef4408f..335d4c78a77 100644 --- a/drivers/message/i2o/i2o_block.c +++ b/drivers/message/i2o/i2o_block.c @@ -794,8 +794,9 @@ static int i2o_block_transfer(struct request *req) if (c->adaptec) { u8 cmd[10]; u32 scsi_flags; - u16 hwsec = queue_hardsect_size(req->q) >> KERNEL_SECTOR_SHIFT; + u16 hwsec; + hwsec = queue_logical_block_size(req->q) >> KERNEL_SECTOR_SHIFT; memset(cmd, 0, 10); sgl_offset = SGL_OFFSET_12; @@ -1078,7 +1079,7 @@ static int i2o_block_probe(struct device *dev) */ if (!i2o_parm_field_get(i2o_dev, 0x0004, 1, &blocksize, 4) || !i2o_parm_field_get(i2o_dev, 0x0000, 3, &blocksize, 4)) { - blk_queue_hardsect_size(queue, le32_to_cpu(blocksize)); + blk_queue_logical_block_size(queue, le32_to_cpu(blocksize)); } else osm_warn("unable to get blocksize of %s\n", gd->disk_name); diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index c5df8654645..98ffc41eaf2 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -521,7 +521,7 @@ static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card) sprintf(md->disk->disk_name, "mmcblk%d", devidx); - blk_queue_hardsect_size(md->queue.queue, 512); + blk_queue_logical_block_size(md->queue.queue, 512); if (!mmc_card_sd(card) && mmc_card_blockaddr(card)) { /* diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 502622f628b..aaac3b6800b 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -378,7 +378,7 @@ int register_mtd_blktrans(struct mtd_blktrans_ops *tr) } tr->blkcore_priv->rq->queuedata = tr; - blk_queue_hardsect_size(tr->blkcore_priv->rq, tr->blksize); + blk_queue_logical_block_size(tr->blkcore_priv->rq, tr->blksize); if (tr->discard) blk_queue_set_discard(tr->blkcore_priv->rq, blktrans_discard_request); diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index e64f62d5e0f..27a1be0cd4d 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1990,7 +1990,7 @@ static void dasd_setup_queue(struct dasd_block *block) { int max; - blk_queue_hardsect_size(block->request_queue, block->bp_block); + blk_queue_logical_block_size(block->request_queue, block->bp_block); max = block->base->discipline->max_blocks << block->s2b_shift; blk_queue_max_sectors(block->request_queue, max); blk_queue_max_phys_segments(block->request_queue, -1L); diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index cfdcf1aed33..a4c7ffcd998 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -602,7 +602,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char dev_info->gd->private_data = dev_info; dev_info->gd->driverfs_dev = &dev_info->dev; blk_queue_make_request(dev_info->dcssblk_queue, dcssblk_make_request); - blk_queue_hardsect_size(dev_info->dcssblk_queue, 4096); + blk_queue_logical_block_size(dev_info->dcssblk_queue, 4096); seg_byte_size = (dev_info->end - dev_info->start + 1); set_capacity(dev_info->gd, seg_byte_size >> 9); // size in sectors diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c index 76814f3e898..0ae0c83ef87 100644 --- a/drivers/s390/block/xpram.c +++ b/drivers/s390/block/xpram.c @@ -343,7 +343,7 @@ static int __init xpram_setup_blkdev(void) goto out; } blk_queue_make_request(xpram_queues[i], xpram_make_request); - blk_queue_hardsect_size(xpram_queues[i], 4096); + blk_queue_logical_block_size(xpram_queues[i], 4096); } /* diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c index 1e796767598..47ff695255e 100644 --- a/drivers/s390/char/tape_block.c +++ b/drivers/s390/char/tape_block.c @@ -222,7 +222,7 @@ tapeblock_setup_device(struct tape_device * device) if (rc) goto cleanup_queue; - blk_queue_hardsect_size(blkdat->request_queue, TAPEBLOCK_HSEC_SIZE); + blk_queue_logical_block_size(blkdat->request_queue, TAPEBLOCK_HSEC_SIZE); blk_queue_max_sectors(blkdat->request_queue, TAPEBLOCK_MAX_SEC); blk_queue_max_phys_segments(blkdat->request_queue, -1L); blk_queue_max_hw_segments(blkdat->request_queue, -1L); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 40d2860f235..bcf3bd40bbd 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1510,7 +1510,7 @@ got_data: */ sector_size = 512; } - blk_queue_hardsect_size(sdp->request_queue, sector_size); + blk_queue_logical_block_size(sdp->request_queue, sector_size); { char cap_str_2[10], cap_str_10[10]; diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index fddba53c7fe..cd350dfc121 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -727,7 +727,7 @@ static void get_sectorsize(struct scsi_cd *cd) } queue = cd->device->request_queue; - blk_queue_hardsect_size(queue, sector_size); + blk_queue_logical_block_size(queue, sector_size); return; } diff --git a/fs/bio.c b/fs/bio.c index 81dc93e7253..4445c382173 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -1490,11 +1490,12 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors) sector_t bio_sector_offset(struct bio *bio, unsigned short index, unsigned int offset) { - unsigned int sector_sz = queue_hardsect_size(bio->bi_bdev->bd_disk->queue); + unsigned int sector_sz; struct bio_vec *bv; sector_t sectors; int i; + sector_sz = queue_logical_block_size(bio->bi_bdev->bd_disk->queue); sectors = 0; if (index >= bio->bi_idx) diff --git a/fs/block_dev.c b/fs/block_dev.c index a85fe310fc6..a29b4dcc1bc 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -76,7 +76,7 @@ int set_blocksize(struct block_device *bdev, int size) return -EINVAL; /* Size cannot be smaller than the size supported by the device */ - if (size < bdev_hardsect_size(bdev)) + if (size < bdev_logical_block_size(bdev)) return -EINVAL; /* Don't change the size if it is same as current */ @@ -106,7 +106,7 @@ EXPORT_SYMBOL(sb_set_blocksize); int sb_min_blocksize(struct super_block *sb, int size) { - int minsize = bdev_hardsect_size(sb->s_bdev); + int minsize = bdev_logical_block_size(sb->s_bdev); if (size < minsize) size = minsize; return sb_set_blocksize(sb, size); @@ -1117,7 +1117,7 @@ EXPORT_SYMBOL(check_disk_change); void bd_set_size(struct block_device *bdev, loff_t size) { - unsigned bsize = bdev_hardsect_size(bdev); + unsigned bsize = bdev_logical_block_size(bdev); bdev->bd_inode->i_size = size; while (bsize < PAGE_CACHE_SIZE) { diff --git a/fs/buffer.c b/fs/buffer.c index aed297739eb..36e2bbc60ec 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1085,12 +1085,12 @@ static struct buffer_head * __getblk_slow(struct block_device *bdev, sector_t block, int size) { /* Size must be multiple of hard sectorsize */ - if (unlikely(size & (bdev_hardsect_size(bdev)-1) || + if (unlikely(size & (bdev_logical_block_size(bdev)-1) || (size < 512 || size > PAGE_SIZE))) { printk(KERN_ERR "getblk(): invalid block size %d requested\n", size); - printk(KERN_ERR "hardsect size: %d\n", - bdev_hardsect_size(bdev)); + printk(KERN_ERR "logical block size: %d\n", + bdev_logical_block_size(bdev)); dump_stack(); return NULL; diff --git a/fs/direct-io.c b/fs/direct-io.c index 05763bbc205..8b10b87dc01 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1127,7 +1127,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, rw = WRITE_ODIRECT; if (bdev) - bdev_blkbits = blksize_bits(bdev_hardsect_size(bdev)); + bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev)); if (offset & blocksize_mask) { if (bdev) diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 599dbfe504c..acbb94fdf90 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -1696,7 +1696,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) goto failed_mount; } - hblock = bdev_hardsect_size(sb->s_bdev); + hblock = bdev_logical_block_size(sb->s_bdev); if (sb->s_blocksize != blocksize) { /* * Make sure the blocksize for the filesystem is larger @@ -2119,7 +2119,7 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb, } blocksize = sb->s_blocksize; - hblock = bdev_hardsect_size(bdev); + hblock = bdev_logical_block_size(bdev); if (blocksize < hblock) { printk(KERN_ERR "EXT3-fs: blocksize too small for journal device.\n"); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 2958f4e6f22..a30549f7a30 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2962,7 +2962,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, } blocksize = sb->s_blocksize; - hblock = bdev_hardsect_size(bdev); + hblock = bdev_logical_block_size(bdev); if (blocksize < hblock) { printk(KERN_ERR "EXT4-fs: blocksize too small for journal device.\n"); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 1ff9473ea75..a3b2ac989fc 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -526,11 +526,11 @@ static int init_sb(struct gfs2_sbd *sdp, int silent) } /* Set up the buffer cache and SB for real */ - if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) { + if (sdp->sd_sb.sb_bsize < bdev_logical_block_size(sb->s_bdev)) { ret = -EINVAL; fs_err(sdp, "FS block size (%u) is too small for device " "block size (%u)\n", - sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev)); + sdp->sd_sb.sb_bsize, bdev_logical_block_size(sb->s_bdev)); goto out; } if (sdp->sd_sb.sb_bsize > PAGE_SIZE) { diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 565038243fa..a971d24e10c 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -845,7 +845,7 @@ static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, struct super_block *sb = sdp->sd_vfs; struct block_device *bdev = sb->s_bdev; const unsigned int sects_per_blk = sdp->sd_sb.sb_bsize / - bdev_hardsect_size(sb->s_bdev); + bdev_logical_block_size(sb->s_bdev); u64 blk; sector_t start = 0; sector_t nr_sects = 0; diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 7f65b3be4aa..a91f15b8673 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -515,7 +515,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size); if (sb->s_blocksize != blocksize) { - int hw_blocksize = bdev_hardsect_size(sb->s_bdev); + int hw_blocksize = bdev_logical_block_size(sb->s_bdev); if (blocksize < hw_blocksize) { printk(KERN_ERR diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index f76951dcd4a..6aa7c471353 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -25,7 +25,7 @@ #include #include #include -#include /* For bdev_hardsect_size(). */ +#include /* For bdev_logical_block_size(). */ #include #include #include @@ -2785,13 +2785,13 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) goto err_out_now; /* We support sector sizes up to the PAGE_CACHE_SIZE. */ - if (bdev_hardsect_size(sb->s_bdev) > PAGE_CACHE_SIZE) { + if (bdev_logical_block_size(sb->s_bdev) > PAGE_CACHE_SIZE) { if (!silent) ntfs_error(sb, "Device has unsupported sector size " "(%i). The maximum supported sector " "size on this architecture is %lu " "bytes.", - bdev_hardsect_size(sb->s_bdev), + bdev_logical_block_size(sb->s_bdev), PAGE_CACHE_SIZE); goto err_out_now; } diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 4f85eceab37..09cc25d0461 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1371,7 +1371,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, bdevname(reg->hr_bdev, reg->hr_dev_name); - sectsize = bdev_hardsect_size(reg->hr_bdev); + sectsize = bdev_logical_block_size(reg->hr_bdev); if (sectsize != reg->hr_block_bytes) { mlog(ML_ERROR, "blocksize %u incorrect for device, expected %d", diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 79ff8d9d37e..5c6163f5503 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -713,7 +713,7 @@ static int ocfs2_sb_probe(struct super_block *sb, *bh = NULL; /* may be > 512 */ - *sector_size = bdev_hardsect_size(sb->s_bdev); + *sector_size = bdev_logical_block_size(sb->s_bdev); if (*sector_size > OCFS2_MAX_BLOCKSIZE) { mlog(ML_ERROR, "Hardware sector size too large: %d (max=%d)\n", *sector_size, OCFS2_MAX_BLOCKSIZE); diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c index 46297683cd3..fc71aab0846 100644 --- a/fs/partitions/ibm.c +++ b/fs/partitions/ibm.c @@ -76,7 +76,7 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) Sector sect; res = 0; - blocksize = bdev_hardsect_size(bdev); + blocksize = bdev_logical_block_size(bdev); if (blocksize <= 0) goto out_exit; i_size = i_size_read(bdev->bd_inode); diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c index 796511886f2..0028d2ef066 100644 --- a/fs/partitions/msdos.c +++ b/fs/partitions/msdos.c @@ -110,7 +110,7 @@ parse_extended(struct parsed_partitions *state, struct block_device *bdev, Sector sect; unsigned char *data; u32 this_sector, this_size; - int sector_size = bdev_hardsect_size(bdev) / 512; + int sector_size = bdev_logical_block_size(bdev) / 512; int loopct = 0; /* number of links followed without finding a data partition */ int i; @@ -415,7 +415,7 @@ static struct { int msdos_partition(struct parsed_partitions *state, struct block_device *bdev) { - int sector_size = bdev_hardsect_size(bdev) / 512; + int sector_size = bdev_logical_block_size(bdev) / 512; Sector sect; unsigned char *data; struct partition *p; diff --git a/fs/udf/super.c b/fs/udf/super.c index 72348cc855a..0ba44107d8f 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1915,7 +1915,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) if (uopt.flags & (1 << UDF_FLAG_BLOCKSIZE_SET)) { ret = udf_load_vrs(sb, &uopt, silent, &fileset); } else { - uopt.blocksize = bdev_hardsect_size(sb->s_bdev); + uopt.blocksize = bdev_logical_block_size(sb->s_bdev); ret = udf_load_vrs(sb, &uopt, silent, &fileset); if (!ret && uopt.blocksize != UDF_DEFAULT_BLOCKSIZE) { if (!silent) diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index e28800a9f2b..1418b916fc2 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1501,7 +1501,7 @@ xfs_setsize_buftarg_early( struct block_device *bdev) { return xfs_setsize_buftarg_flags(btp, - PAGE_CACHE_SIZE, bdev_hardsect_size(bdev), 0); + PAGE_CACHE_SIZE, bdev_logical_block_size(bdev), 0); } int diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 56ce53fce72..872b78b7a10 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -391,7 +391,7 @@ struct request_queue unsigned int max_hw_sectors; unsigned short max_phys_segments; unsigned short max_hw_segments; - unsigned short hardsect_size; + unsigned short logical_block_size; unsigned int max_segment_size; unsigned long seg_boundary_mask; @@ -901,7 +901,7 @@ extern void blk_queue_max_sectors(struct request_queue *, unsigned int); extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short); extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short); extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); -extern void blk_queue_hardsect_size(struct request_queue *, unsigned short); +extern void blk_queue_logical_block_size(struct request_queue *, unsigned short); extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); extern void blk_queue_dma_pad(struct request_queue *, unsigned int); extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); @@ -988,19 +988,19 @@ extern void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter); #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) -static inline int queue_hardsect_size(struct request_queue *q) +static inline unsigned short queue_logical_block_size(struct request_queue *q) { int retval = 512; - if (q && q->hardsect_size) - retval = q->hardsect_size; + if (q && q->logical_block_size) + retval = q->logical_block_size; return retval; } -static inline int bdev_hardsect_size(struct block_device *bdev) +static inline unsigned short bdev_logical_block_size(struct block_device *bdev) { - return queue_hardsect_size(bdev_get_queue(bdev)); + return queue_logical_block_size(bdev_get_queue(bdev)); } static inline int queue_dma_alignment(struct request_queue *q) diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index ded2d7c4266..49c2362977f 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -149,7 +149,7 @@ struct io_restrictions { unsigned max_hw_sectors; unsigned max_sectors; unsigned max_segment_size; - unsigned short hardsect_size; + unsigned short logical_block_size; unsigned short max_hw_segments; unsigned short max_phys_segments; unsigned char no_cluster; /* inverted so that 0 is default */ -- cgit v1.2.3-70-g09d2 From ae03bf639a5027d27270123f5f6e3ee6a412781d Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 22 May 2009 17:17:50 -0400 Subject: block: Use accessor functions for queue limits Convert all external users of queue limits to using wrapper functions instead of poking the request queue variables directly. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/blk-barrier.c | 8 ++++---- block/blk-core.c | 16 ++++++++-------- block/blk-map.c | 4 ++-- block/blk-merge.c | 27 ++++++++++++++------------- block/blk-settings.c | 15 ++++++++++++--- block/blk-sysfs.c | 8 ++++---- block/compat_ioctl.c | 2 +- block/ioctl.c | 10 +++++----- block/scsi_ioctl.c | 8 ++++---- drivers/block/pktcdvd.c | 6 ++++-- drivers/cdrom/cdrom.c | 4 ++-- drivers/md/dm-table.c | 28 ++++++++++++++-------------- drivers/md/linear.c | 2 +- drivers/md/multipath.c | 4 ++-- drivers/md/raid0.c | 2 +- drivers/md/raid1.c | 4 ++-- drivers/md/raid10.c | 8 ++++---- drivers/md/raid5.c | 4 ++-- drivers/scsi/sg.c | 15 ++++++++------- drivers/scsi/st.c | 4 ++-- drivers/usb/storage/scsiglue.c | 4 ++-- fs/bio.c | 19 ++++++++++--------- include/linux/bio.h | 2 +- include/linux/blkdev.h | 36 ++++++++++++++++++++++++++++++++++++ mm/bounce.c | 4 ++-- 25 files changed, 147 insertions(+), 97 deletions(-) (limited to 'include') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index 0d98054cdbd..30022b4e2f6 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -388,10 +388,10 @@ int blkdev_issue_discard(struct block_device *bdev, bio->bi_sector = sector; - if (nr_sects > q->max_hw_sectors) { - bio->bi_size = q->max_hw_sectors << 9; - nr_sects -= q->max_hw_sectors; - sector += q->max_hw_sectors; + if (nr_sects > queue_max_hw_sectors(q)) { + bio->bi_size = queue_max_hw_sectors(q) << 9; + nr_sects -= queue_max_hw_sectors(q); + sector += queue_max_hw_sectors(q); } else { bio->bi_size = nr_sects << 9; nr_sects = 0; diff --git a/block/blk-core.c b/block/blk-core.c index 59c4af52311..7a4c40184a6 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1437,11 +1437,11 @@ static inline void __generic_make_request(struct bio *bio) goto end_io; } - if (unlikely(nr_sectors > q->max_hw_sectors)) { + if (unlikely(nr_sectors > queue_max_hw_sectors(q))) { printk(KERN_ERR "bio too big device %s (%u > %u)\n", - bdevname(bio->bi_bdev, b), - bio_sectors(bio), - q->max_hw_sectors); + bdevname(bio->bi_bdev, b), + bio_sectors(bio), + queue_max_hw_sectors(q)); goto end_io; } @@ -1608,8 +1608,8 @@ EXPORT_SYMBOL(submit_bio); */ int blk_rq_check_limits(struct request_queue *q, struct request *rq) { - if (blk_rq_sectors(rq) > q->max_sectors || - blk_rq_bytes(rq) > q->max_hw_sectors << 9) { + if (blk_rq_sectors(rq) > queue_max_sectors(q) || + blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) { printk(KERN_ERR "%s: over max size limit.\n", __func__); return -EIO; } @@ -1621,8 +1621,8 @@ int blk_rq_check_limits(struct request_queue *q, struct request *rq) * limitation. */ blk_recalc_rq_segments(rq); - if (rq->nr_phys_segments > q->max_phys_segments || - rq->nr_phys_segments > q->max_hw_segments) { + if (rq->nr_phys_segments > queue_max_phys_segments(q) || + rq->nr_phys_segments > queue_max_hw_segments(q)) { printk(KERN_ERR "%s: over max segments limit.\n", __func__); return -EIO; } diff --git a/block/blk-map.c b/block/blk-map.c index ef2492adca7..9083cf0180c 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -115,7 +115,7 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq, struct bio *bio = NULL; int ret; - if (len > (q->max_hw_sectors << 9)) + if (len > (queue_max_hw_sectors(q) << 9)) return -EINVAL; if (!len) return -EINVAL; @@ -292,7 +292,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, struct bio *bio; int ret; - if (len > (q->max_hw_sectors << 9)) + if (len > (queue_max_hw_sectors(q) << 9)) return -EINVAL; if (!len || !kbuf) return -EINVAL; diff --git a/block/blk-merge.c b/block/blk-merge.c index 4974dd5767e..39ce64432ba 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -32,11 +32,12 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q, * never considered part of another segment, since that * might change with the bounce page. */ - high = page_to_pfn(bv->bv_page) > q->bounce_pfn; + high = page_to_pfn(bv->bv_page) > queue_bounce_pfn(q); if (high || highprv) goto new_segment; if (cluster) { - if (seg_size + bv->bv_len > q->max_segment_size) + if (seg_size + bv->bv_len + > queue_max_segment_size(q)) goto new_segment; if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv)) goto new_segment; @@ -91,7 +92,7 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, return 0; if (bio->bi_seg_back_size + nxt->bi_seg_front_size > - q->max_segment_size) + queue_max_segment_size(q)) return 0; if (!bio_has_data(bio)) @@ -134,7 +135,7 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq, int nbytes = bvec->bv_len; if (bvprv && cluster) { - if (sg->length + nbytes > q->max_segment_size) + if (sg->length + nbytes > queue_max_segment_size(q)) goto new_segment; if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) @@ -205,8 +206,8 @@ static inline int ll_new_hw_segment(struct request_queue *q, { int nr_phys_segs = bio_phys_segments(q, bio); - if (req->nr_phys_segments + nr_phys_segs > q->max_hw_segments - || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { + if (req->nr_phys_segments + nr_phys_segs > queue_max_hw_segments(q) || + req->nr_phys_segments + nr_phys_segs > queue_max_phys_segments(q)) { req->cmd_flags |= REQ_NOMERGE; if (req == q->last_merge) q->last_merge = NULL; @@ -227,9 +228,9 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req, unsigned short max_sectors; if (unlikely(blk_pc_request(req))) - max_sectors = q->max_hw_sectors; + max_sectors = queue_max_hw_sectors(q); else - max_sectors = q->max_sectors; + max_sectors = queue_max_sectors(q); if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) { req->cmd_flags |= REQ_NOMERGE; @@ -251,9 +252,9 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req, unsigned short max_sectors; if (unlikely(blk_pc_request(req))) - max_sectors = q->max_hw_sectors; + max_sectors = queue_max_hw_sectors(q); else - max_sectors = q->max_sectors; + max_sectors = queue_max_sectors(q); if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) { @@ -287,7 +288,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, /* * Will it become too large? */ - if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > q->max_sectors) + if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > queue_max_sectors(q)) return 0; total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; @@ -299,10 +300,10 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, total_phys_segments--; } - if (total_phys_segments > q->max_phys_segments) + if (total_phys_segments > queue_max_phys_segments(q)) return 0; - if (total_phys_segments > q->max_hw_segments) + if (total_phys_segments > queue_max_hw_segments(q)) return 0; /* Merge is OK... */ diff --git a/block/blk-settings.c b/block/blk-settings.c index 15c3164537b..0b32f984eed 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -219,6 +219,15 @@ void blk_queue_max_sectors(struct request_queue *q, unsigned int max_sectors) } EXPORT_SYMBOL(blk_queue_max_sectors); +void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_sectors) +{ + if (BLK_DEF_MAX_SECTORS > max_sectors) + q->max_hw_sectors = BLK_DEF_MAX_SECTORS; + else + q->max_hw_sectors = max_sectors; +} +EXPORT_SYMBOL(blk_queue_max_hw_sectors); + /** * blk_queue_max_phys_segments - set max phys segments for a request for this queue * @q: the request queue for the device @@ -395,11 +404,11 @@ int blk_queue_dma_drain(struct request_queue *q, dma_drain_needed_fn *dma_drain_needed, void *buf, unsigned int size) { - if (q->max_hw_segments < 2 || q->max_phys_segments < 2) + if (queue_max_hw_segments(q) < 2 || queue_max_phys_segments(q) < 2) return -EINVAL; /* make room for appending the drain */ - --q->max_hw_segments; - --q->max_phys_segments; + blk_queue_max_hw_segments(q, queue_max_hw_segments(q) - 1); + blk_queue_max_phys_segments(q, queue_max_phys_segments(q) - 1); q->dma_drain_needed = dma_drain_needed; q->dma_drain_buffer = buf; q->dma_drain_size = size; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 13d38b7e4d0..142a4acddd4 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -95,7 +95,7 @@ queue_ra_store(struct request_queue *q, const char *page, size_t count) static ssize_t queue_max_sectors_show(struct request_queue *q, char *page) { - int max_sectors_kb = q->max_sectors >> 1; + int max_sectors_kb = queue_max_sectors(q) >> 1; return queue_var_show(max_sectors_kb, (page)); } @@ -109,7 +109,7 @@ static ssize_t queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) { unsigned long max_sectors_kb, - max_hw_sectors_kb = q->max_hw_sectors >> 1, + max_hw_sectors_kb = queue_max_hw_sectors(q) >> 1, page_kb = 1 << (PAGE_CACHE_SHIFT - 10); ssize_t ret = queue_var_store(&max_sectors_kb, page, count); @@ -117,7 +117,7 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) return -EINVAL; spin_lock_irq(q->queue_lock); - q->max_sectors = max_sectors_kb << 1; + blk_queue_max_sectors(q, max_sectors_kb << 1); spin_unlock_irq(q->queue_lock); return ret; @@ -125,7 +125,7 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page) { - int max_hw_sectors_kb = q->max_hw_sectors >> 1; + int max_hw_sectors_kb = queue_max_hw_sectors(q) >> 1; return queue_var_show(max_hw_sectors_kb, (page)); } diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index 9eaa1940273..df18a156d01 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -766,7 +766,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) return compat_put_int(arg, bdev_logical_block_size(bdev)); case BLKSECTGET: return compat_put_ushort(arg, - bdev_get_queue(bdev)->max_sectors); + queue_max_sectors(bdev_get_queue(bdev))); case BLKRASET: /* compatible, but no compat_ptr (!) */ case BLKFRASET: if (!capable(CAP_SYS_ADMIN)) diff --git a/block/ioctl.c b/block/ioctl.c index 7aa97f65da8..500e4c73cc5 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -152,10 +152,10 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, bio->bi_private = &wait; bio->bi_sector = start; - if (len > q->max_hw_sectors) { - bio->bi_size = q->max_hw_sectors << 9; - len -= q->max_hw_sectors; - start += q->max_hw_sectors; + if (len > queue_max_hw_sectors(q)) { + bio->bi_size = queue_max_hw_sectors(q) << 9; + len -= queue_max_hw_sectors(q); + start += queue_max_hw_sectors(q); } else { bio->bi_size = len << 9; len = 0; @@ -313,7 +313,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, case BLKSSZGET: /* get block device hardware sector size */ return put_int(arg, bdev_logical_block_size(bdev)); case BLKSECTGET: - return put_ushort(arg, bdev_get_queue(bdev)->max_sectors); + return put_ushort(arg, queue_max_sectors(bdev_get_queue(bdev))); case BLKRASET: case BLKFRASET: if(!capable(CAP_SYS_ADMIN)) diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index a9670dd4b5d..5f8e798ede4 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -75,7 +75,7 @@ static int sg_set_timeout(struct request_queue *q, int __user *p) static int sg_get_reserved_size(struct request_queue *q, int __user *p) { - unsigned val = min(q->sg_reserved_size, q->max_sectors << 9); + unsigned val = min(q->sg_reserved_size, queue_max_sectors(q) << 9); return put_user(val, p); } @@ -89,8 +89,8 @@ static int sg_set_reserved_size(struct request_queue *q, int __user *p) if (size < 0) return -EINVAL; - if (size > (q->max_sectors << 9)) - size = q->max_sectors << 9; + if (size > (queue_max_sectors(q) << 9)) + size = queue_max_sectors(q) << 9; q->sg_reserved_size = size; return 0; @@ -264,7 +264,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk, if (hdr->cmd_len > BLK_MAX_CDB) return -EINVAL; - if (hdr->dxfer_len > (q->max_hw_sectors << 9)) + if (hdr->dxfer_len > (queue_max_hw_sectors(q) << 9)) return -EIO; if (hdr->dxfer_len) diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 293f5858921..d57f1175948 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -991,13 +991,15 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd) */ static int pkt_set_segment_merging(struct pktcdvd_device *pd, struct request_queue *q) { - if ((pd->settings.size << 9) / CD_FRAMESIZE <= q->max_phys_segments) { + if ((pd->settings.size << 9) / CD_FRAMESIZE + <= queue_max_phys_segments(q)) { /* * The cdrom device can handle one segment/frame */ clear_bit(PACKET_MERGE_SEGS, &pd->flags); return 0; - } else if ((pd->settings.size << 9) / PAGE_SIZE <= q->max_phys_segments) { + } else if ((pd->settings.size << 9) / PAGE_SIZE + <= queue_max_phys_segments(q)) { /* * We can handle this case at the expense of some extra memory * copies during write operations diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index cceace61ef2..71d1b9bab70 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2101,8 +2101,8 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, nr = nframes; if (cdi->cdda_method == CDDA_BPC_SINGLE) nr = 1; - if (nr * CD_FRAMESIZE_RAW > (q->max_sectors << 9)) - nr = (q->max_sectors << 9) / CD_FRAMESIZE_RAW; + if (nr * CD_FRAMESIZE_RAW > (queue_max_sectors(q) << 9)) + nr = (queue_max_sectors(q) << 9) / CD_FRAMESIZE_RAW; len = nr * CD_FRAMESIZE_RAW; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 65e2d975985..e9a73bb242b 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -510,7 +510,7 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev) * combine_restrictions_low() */ rs->max_sectors = - min_not_zero(rs->max_sectors, q->max_sectors); + min_not_zero(rs->max_sectors, queue_max_sectors(q)); /* * Check if merge fn is supported. @@ -525,25 +525,25 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev) rs->max_phys_segments = min_not_zero(rs->max_phys_segments, - q->max_phys_segments); + queue_max_phys_segments(q)); rs->max_hw_segments = - min_not_zero(rs->max_hw_segments, q->max_hw_segments); + min_not_zero(rs->max_hw_segments, queue_max_hw_segments(q)); rs->logical_block_size = max(rs->logical_block_size, queue_logical_block_size(q)); rs->max_segment_size = - min_not_zero(rs->max_segment_size, q->max_segment_size); + min_not_zero(rs->max_segment_size, queue_max_segment_size(q)); rs->max_hw_sectors = - min_not_zero(rs->max_hw_sectors, q->max_hw_sectors); + min_not_zero(rs->max_hw_sectors, queue_max_hw_sectors(q)); rs->seg_boundary_mask = min_not_zero(rs->seg_boundary_mask, - q->seg_boundary_mask); + queue_segment_boundary(q)); - rs->bounce_pfn = min_not_zero(rs->bounce_pfn, q->bounce_pfn); + rs->bounce_pfn = min_not_zero(rs->bounce_pfn, queue_bounce_pfn(q)); rs->no_cluster |= !test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); } @@ -914,13 +914,13 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q) * restrictions. */ blk_queue_max_sectors(q, t->limits.max_sectors); - q->max_phys_segments = t->limits.max_phys_segments; - q->max_hw_segments = t->limits.max_hw_segments; - q->logical_block_size = t->limits.logical_block_size; - q->max_segment_size = t->limits.max_segment_size; - q->max_hw_sectors = t->limits.max_hw_sectors; - q->seg_boundary_mask = t->limits.seg_boundary_mask; - q->bounce_pfn = t->limits.bounce_pfn; + blk_queue_max_phys_segments(q, t->limits.max_phys_segments); + blk_queue_max_hw_segments(q, t->limits.max_hw_segments); + blk_queue_logical_block_size(q, t->limits.logical_block_size); + blk_queue_max_segment_size(q, t->limits.max_segment_size); + blk_queue_max_hw_sectors(q, t->limits.max_hw_sectors); + blk_queue_segment_boundary(q, t->limits.seg_boundary_mask); + blk_queue_bounce_limit(q, t->limits.bounce_pfn); if (t->limits.no_cluster) queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q); diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 7a36e38393a..64f1f3e046e 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -146,7 +146,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) * a one page request is never in violation. */ if (rdev->bdev->bd_disk->queue->merge_bvec_fn && - mddev->queue->max_sectors > (PAGE_SIZE>>9)) + queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); disk->num_sectors = rdev->sectors; diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 41ced0cbe82..4ee31aa13c4 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -303,7 +303,7 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) * merge_bvec_fn will be involved in multipath.) */ if (q->merge_bvec_fn && - mddev->queue->max_sectors > (PAGE_SIZE>>9)) + queue_max_sectors(q) > (PAGE_SIZE>>9)) blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); conf->working_disks++; @@ -467,7 +467,7 @@ static int multipath_run (mddev_t *mddev) * violating it, not that we ever expect a device with * a merge_bvec_fn to be involved in multipath */ if (rdev->bdev->bd_disk->queue->merge_bvec_fn && - mddev->queue->max_sectors > (PAGE_SIZE>>9)) + queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); if (!test_bit(Faulty, &rdev->flags)) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index c08d7559be5..925507e7d67 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -144,7 +144,7 @@ static int create_strip_zones (mddev_t *mddev) */ if (rdev1->bdev->bd_disk->queue->merge_bvec_fn && - mddev->queue->max_sectors > (PAGE_SIZE>>9)) + queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); if (!smallest || (rdev1->sectors < smallest->sectors)) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 36df9109cde..e23758b4a34 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1130,7 +1130,7 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) * a one page request is never in violation. */ if (rdev->bdev->bd_disk->queue->merge_bvec_fn && - mddev->queue->max_sectors > (PAGE_SIZE>>9)) + queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); p->head_position = 0; @@ -1996,7 +1996,7 @@ static int run(mddev_t *mddev) * a one page request is never in violation. */ if (rdev->bdev->bd_disk->queue->merge_bvec_fn && - mddev->queue->max_sectors > (PAGE_SIZE>>9)) + queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); disk->head_position = 0; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 499620afb44..750550c1166 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1158,8 +1158,8 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) * a one page request is never in violation. */ if (rdev->bdev->bd_disk->queue->merge_bvec_fn && - mddev->queue->max_sectors > (PAGE_SIZE>>9)) - mddev->queue->max_sectors = (PAGE_SIZE>>9); + queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) + blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); p->head_position = 0; rdev->raid_disk = mirror; @@ -2145,8 +2145,8 @@ static int run(mddev_t *mddev) * a one page request is never in violation. */ if (rdev->bdev->bd_disk->queue->merge_bvec_fn && - mddev->queue->max_sectors > (PAGE_SIZE>>9)) - mddev->queue->max_sectors = (PAGE_SIZE>>9); + queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) + blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); disk->head_position = 0; } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 4616bc3a6e7..7970dc8c522 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3463,10 +3463,10 @@ static int bio_fits_rdev(struct bio *bi) { struct request_queue *q = bdev_get_queue(bi->bi_bdev); - if ((bi->bi_size>>9) > q->max_sectors) + if ((bi->bi_size>>9) > queue_max_sectors(q)) return 0; blk_recount_segments(q, bi); - if (bi->bi_phys_segments > q->max_phys_segments) + if (bi->bi_phys_segments > queue_max_phys_segments(q)) return 0; if (q->merge_bvec_fn) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 0fc2c0ae769..9bd407fa98e 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -289,8 +289,8 @@ sg_open(struct inode *inode, struct file *filp) if (list_empty(&sdp->sfds)) { /* no existing opens on this device */ sdp->sgdebug = 0; q = sdp->device->request_queue; - sdp->sg_tablesize = min(q->max_hw_segments, - q->max_phys_segments); + sdp->sg_tablesize = min(queue_max_hw_segments(q), + queue_max_phys_segments(q)); } if ((sfp = sg_add_sfp(sdp, dev))) filp->private_data = sfp; @@ -909,7 +909,7 @@ sg_ioctl(struct inode *inode, struct file *filp, if (val < 0) return -EINVAL; val = min_t(int, val, - sdp->device->request_queue->max_sectors * 512); + queue_max_sectors(sdp->device->request_queue) * 512); if (val != sfp->reserve.bufflen) { if (sg_res_in_use(sfp) || sfp->mmap_called) return -EBUSY; @@ -919,7 +919,7 @@ sg_ioctl(struct inode *inode, struct file *filp, return 0; case SG_GET_RESERVED_SIZE: val = min_t(int, sfp->reserve.bufflen, - sdp->device->request_queue->max_sectors * 512); + queue_max_sectors(sdp->device->request_queue) * 512); return put_user(val, ip); case SG_SET_COMMAND_Q: result = get_user(val, ip); @@ -1059,7 +1059,7 @@ sg_ioctl(struct inode *inode, struct file *filp, return -ENODEV; return scsi_ioctl(sdp->device, cmd_in, p); case BLKSECTGET: - return put_user(sdp->device->request_queue->max_sectors * 512, + return put_user(queue_max_sectors(sdp->device->request_queue) * 512, ip); case BLKTRACESETUP: return blk_trace_setup(sdp->device->request_queue, @@ -1377,7 +1377,8 @@ static Sg_device *sg_alloc(struct gendisk *disk, struct scsi_device *scsidp) sdp->device = scsidp; INIT_LIST_HEAD(&sdp->sfds); init_waitqueue_head(&sdp->o_excl_wait); - sdp->sg_tablesize = min(q->max_hw_segments, q->max_phys_segments); + sdp->sg_tablesize = min(queue_max_hw_segments(q), + queue_max_phys_segments(q)); sdp->index = k; kref_init(&sdp->d_ref); @@ -2055,7 +2056,7 @@ sg_add_sfp(Sg_device * sdp, int dev) sg_big_buff = def_reserved_size; bufflen = min_t(int, sg_big_buff, - sdp->device->request_queue->max_sectors * 512); + queue_max_sectors(sdp->device->request_queue) * 512); sg_build_reserve(sfp, bufflen); SCSI_LOG_TIMEOUT(3, printk("sg_add_sfp: bufflen=%d, k_use_sg=%d\n", sfp->reserve.bufflen, sfp->reserve.k_use_sg)); diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index 8681b708344..89bd438e1fe 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -3983,8 +3983,8 @@ static int st_probe(struct device *dev) return -ENODEV; } - i = min(SDp->request_queue->max_hw_segments, - SDp->request_queue->max_phys_segments); + i = min(queue_max_hw_segments(SDp->request_queue), + queue_max_phys_segments(SDp->request_queue)); if (st_max_sg_segs < i) i = st_max_sg_segs; buffer = new_tape_buffer((SDp->host)->unchecked_isa_dma, i); diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c index 4ca3b586064..cfa26d56ce6 100644 --- a/drivers/usb/storage/scsiglue.c +++ b/drivers/usb/storage/scsiglue.c @@ -132,7 +132,7 @@ static int slave_configure(struct scsi_device *sdev) if (us->fflags & US_FL_MAX_SECTORS_MIN) max_sectors = PAGE_CACHE_SIZE >> 9; - if (sdev->request_queue->max_sectors > max_sectors) + if (queue_max_sectors(sdev->request_queue) > max_sectors) blk_queue_max_sectors(sdev->request_queue, max_sectors); } else if (sdev->type == TYPE_TAPE) { @@ -483,7 +483,7 @@ static ssize_t show_max_sectors(struct device *dev, struct device_attribute *att { struct scsi_device *sdev = to_scsi_device(dev); - return sprintf(buf, "%u\n", sdev->request_queue->max_sectors); + return sprintf(buf, "%u\n", queue_max_sectors(sdev->request_queue)); } /* Input routine for the sysfs max_sectors file */ diff --git a/fs/bio.c b/fs/bio.c index 4445c382173..ab423a1024a 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -499,11 +499,11 @@ int bio_get_nr_vecs(struct block_device *bdev) struct request_queue *q = bdev_get_queue(bdev); int nr_pages; - nr_pages = ((q->max_sectors << 9) + PAGE_SIZE - 1) >> PAGE_SHIFT; - if (nr_pages > q->max_phys_segments) - nr_pages = q->max_phys_segments; - if (nr_pages > q->max_hw_segments) - nr_pages = q->max_hw_segments; + nr_pages = ((queue_max_sectors(q) << 9) + PAGE_SIZE - 1) >> PAGE_SHIFT; + if (nr_pages > queue_max_phys_segments(q)) + nr_pages = queue_max_phys_segments(q); + if (nr_pages > queue_max_hw_segments(q)) + nr_pages = queue_max_hw_segments(q); return nr_pages; } @@ -562,8 +562,8 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page * make this too complex. */ - while (bio->bi_phys_segments >= q->max_phys_segments - || bio->bi_phys_segments >= q->max_hw_segments) { + while (bio->bi_phys_segments >= queue_max_phys_segments(q) + || bio->bi_phys_segments >= queue_max_hw_segments(q)) { if (retried_segments) return 0; @@ -634,7 +634,8 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page *page, unsigned int len, unsigned int offset) { - return __bio_add_page(q, bio, page, len, offset, q->max_hw_sectors); + return __bio_add_page(q, bio, page, len, offset, + queue_max_hw_sectors(q)); } /** @@ -654,7 +655,7 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int offset) { struct request_queue *q = bdev_get_queue(bio->bi_bdev); - return __bio_add_page(q, bio, page, len, offset, q->max_sectors); + return __bio_add_page(q, bio, page, len, offset, queue_max_sectors(q)); } struct bio_map_data { diff --git a/include/linux/bio.h b/include/linux/bio.h index d30ec6f30dd..12737be5860 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -279,7 +279,7 @@ static inline int bio_has_allocated_vec(struct bio *bio) #define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \ (((addr1) | (mask)) == (((addr2) - 1) | (mask))) #define BIOVEC_SEG_BOUNDARY(q, b1, b2) \ - __BIO_SEG_BOUNDARY(bvec_to_phys((b1)), bvec_to_phys((b2)) + (b2)->bv_len, (q)->seg_boundary_mask) + __BIO_SEG_BOUNDARY(bvec_to_phys((b1)), bvec_to_phys((b2)) + (b2)->bv_len, queue_segment_boundary((q))) #define BIO_SEG_BOUNDARY(q, b1, b2) \ BIOVEC_SEG_BOUNDARY((q), __BVEC_END((b1)), __BVEC_START((b2))) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 872b78b7a10..29b48f7b4ba 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -898,6 +898,7 @@ extern void blk_cleanup_queue(struct request_queue *); extern void blk_queue_make_request(struct request_queue *, make_request_fn *); extern void blk_queue_bounce_limit(struct request_queue *, u64); extern void blk_queue_max_sectors(struct request_queue *, unsigned int); +extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short); extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short); extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); @@ -988,6 +989,41 @@ extern void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter); #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) +static inline unsigned long queue_bounce_pfn(struct request_queue *q) +{ + return q->bounce_pfn; +} + +static inline unsigned long queue_segment_boundary(struct request_queue *q) +{ + return q->seg_boundary_mask; +} + +static inline unsigned int queue_max_sectors(struct request_queue *q) +{ + return q->max_sectors; +} + +static inline unsigned int queue_max_hw_sectors(struct request_queue *q) +{ + return q->max_hw_sectors; +} + +static inline unsigned short queue_max_hw_segments(struct request_queue *q) +{ + return q->max_hw_segments; +} + +static inline unsigned short queue_max_phys_segments(struct request_queue *q) +{ + return q->max_phys_segments; +} + +static inline unsigned int queue_max_segment_size(struct request_queue *q) +{ + return q->max_segment_size; +} + static inline unsigned short queue_logical_block_size(struct request_queue *q) { int retval = 512; diff --git a/mm/bounce.c b/mm/bounce.c index e590272fe7a..8dcd4315e01 100644 --- a/mm/bounce.c +++ b/mm/bounce.c @@ -192,7 +192,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, /* * is destination page below bounce pfn? */ - if (page_to_pfn(page) <= q->bounce_pfn) + if (page_to_pfn(page) <= queue_bounce_pfn(q)) continue; /* @@ -284,7 +284,7 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig) * don't waste time iterating over bio segments */ if (!(q->bounce_gfp & GFP_DMA)) { - if (q->bounce_pfn >= blk_max_pfn) + if (queue_bounce_pfn(q) >= blk_max_pfn) return; pool = page_pool; } else { -- cgit v1.2.3-70-g09d2 From 025146e13b63483add912706c101fb0fb6f015cc Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 22 May 2009 17:17:51 -0400 Subject: block: Move queue limits to an embedded struct To accommodate stacking drivers that do not have an associated request queue we're moving the limits to a separate, embedded structure. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/blk-settings.c | 55 +++++++++++++++++++++++++++++++------------------- include/linux/blkdev.h | 44 +++++++++++++++++++++++----------------- 2 files changed, 60 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/block/blk-settings.c b/block/blk-settings.c index 0b32f984eed..b0f547cecfb 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -179,16 +179,16 @@ void blk_queue_bounce_limit(struct request_queue *q, u64 dma_mask) */ if (b_pfn < (min_t(u64, 0xffffffffUL, BLK_BOUNCE_HIGH) >> PAGE_SHIFT)) dma = 1; - q->bounce_pfn = max_low_pfn; + q->limits.bounce_pfn = max_low_pfn; #else if (b_pfn < blk_max_low_pfn) dma = 1; - q->bounce_pfn = b_pfn; + q->limits.bounce_pfn = b_pfn; #endif if (dma) { init_emergency_isa_pool(); q->bounce_gfp = GFP_NOIO | GFP_DMA; - q->bounce_pfn = b_pfn; + q->limits.bounce_pfn = b_pfn; } } EXPORT_SYMBOL(blk_queue_bounce_limit); @@ -211,10 +211,10 @@ void blk_queue_max_sectors(struct request_queue *q, unsigned int max_sectors) } if (BLK_DEF_MAX_SECTORS > max_sectors) - q->max_hw_sectors = q->max_sectors = max_sectors; + q->limits.max_hw_sectors = q->limits.max_sectors = max_sectors; else { - q->max_sectors = BLK_DEF_MAX_SECTORS; - q->max_hw_sectors = max_sectors; + q->limits.max_sectors = BLK_DEF_MAX_SECTORS; + q->limits.max_hw_sectors = max_sectors; } } EXPORT_SYMBOL(blk_queue_max_sectors); @@ -222,9 +222,9 @@ EXPORT_SYMBOL(blk_queue_max_sectors); void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_sectors) { if (BLK_DEF_MAX_SECTORS > max_sectors) - q->max_hw_sectors = BLK_DEF_MAX_SECTORS; + q->limits.max_hw_sectors = BLK_DEF_MAX_SECTORS; else - q->max_hw_sectors = max_sectors; + q->limits.max_hw_sectors = max_sectors; } EXPORT_SYMBOL(blk_queue_max_hw_sectors); @@ -247,7 +247,7 @@ void blk_queue_max_phys_segments(struct request_queue *q, __func__, max_segments); } - q->max_phys_segments = max_segments; + q->limits.max_phys_segments = max_segments; } EXPORT_SYMBOL(blk_queue_max_phys_segments); @@ -271,7 +271,7 @@ void blk_queue_max_hw_segments(struct request_queue *q, __func__, max_segments); } - q->max_hw_segments = max_segments; + q->limits.max_hw_segments = max_segments; } EXPORT_SYMBOL(blk_queue_max_hw_segments); @@ -292,7 +292,7 @@ void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size) __func__, max_size); } - q->max_segment_size = max_size; + q->limits.max_segment_size = max_size; } EXPORT_SYMBOL(blk_queue_max_segment_size); @@ -308,7 +308,7 @@ EXPORT_SYMBOL(blk_queue_max_segment_size); **/ void blk_queue_logical_block_size(struct request_queue *q, unsigned short size) { - q->logical_block_size = size; + q->limits.logical_block_size = size; } EXPORT_SYMBOL(blk_queue_logical_block_size); @@ -325,14 +325,27 @@ EXPORT_SYMBOL(blk_queue_logical_block_size); void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) { /* zero is "infinity" */ - t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); - t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors); - t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, b->seg_boundary_mask); - - t->max_phys_segments = min_not_zero(t->max_phys_segments, b->max_phys_segments); - t->max_hw_segments = min_not_zero(t->max_hw_segments, b->max_hw_segments); - t->max_segment_size = min_not_zero(t->max_segment_size, b->max_segment_size); - t->logical_block_size = max(t->logical_block_size, b->logical_block_size); + t->limits.max_sectors = min_not_zero(queue_max_sectors(t), + queue_max_sectors(b)); + + t->limits.max_hw_sectors = min_not_zero(queue_max_hw_sectors(t), + queue_max_hw_sectors(b)); + + t->limits.seg_boundary_mask = min_not_zero(queue_segment_boundary(t), + queue_segment_boundary(b)); + + t->limits.max_phys_segments = min_not_zero(queue_max_phys_segments(t), + queue_max_phys_segments(b)); + + t->limits.max_hw_segments = min_not_zero(queue_max_hw_segments(t), + queue_max_hw_segments(b)); + + t->limits.max_segment_size = min_not_zero(queue_max_segment_size(t), + queue_max_segment_size(b)); + + t->limits.logical_block_size = max(queue_logical_block_size(t), + queue_logical_block_size(b)); + if (!t->queue_lock) WARN_ON_ONCE(1); else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) { @@ -430,7 +443,7 @@ void blk_queue_segment_boundary(struct request_queue *q, unsigned long mask) __func__, mask); } - q->seg_boundary_mask = mask; + q->limits.seg_boundary_mask = mask; } EXPORT_SYMBOL(blk_queue_segment_boundary); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 29b48f7b4ba..b7bb6fdba12 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -307,6 +307,21 @@ struct blk_cmd_filter { struct kobject kobj; }; +struct queue_limits { + unsigned long bounce_pfn; + unsigned long seg_boundary_mask; + + unsigned int max_hw_sectors; + unsigned int max_sectors; + unsigned int max_segment_size; + + unsigned short logical_block_size; + unsigned short max_hw_segments; + unsigned short max_phys_segments; + + unsigned char no_cluster; +}; + struct request_queue { /* @@ -358,7 +373,6 @@ struct request_queue /* * queue needs bounce pages for pages above this limit */ - unsigned long bounce_pfn; gfp_t bounce_gfp; /* @@ -387,14 +401,6 @@ struct request_queue unsigned int nr_congestion_off; unsigned int nr_batching; - unsigned int max_sectors; - unsigned int max_hw_sectors; - unsigned short max_phys_segments; - unsigned short max_hw_segments; - unsigned short logical_block_size; - unsigned int max_segment_size; - - unsigned long seg_boundary_mask; void *dma_drain_buffer; unsigned int dma_drain_size; unsigned int dma_pad_mask; @@ -410,6 +416,8 @@ struct request_queue struct timer_list timeout; struct list_head timeout_list; + struct queue_limits limits; + /* * sg stuff */ @@ -991,45 +999,45 @@ extern void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter); static inline unsigned long queue_bounce_pfn(struct request_queue *q) { - return q->bounce_pfn; + return q->limits.bounce_pfn; } static inline unsigned long queue_segment_boundary(struct request_queue *q) { - return q->seg_boundary_mask; + return q->limits.seg_boundary_mask; } static inline unsigned int queue_max_sectors(struct request_queue *q) { - return q->max_sectors; + return q->limits.max_sectors; } static inline unsigned int queue_max_hw_sectors(struct request_queue *q) { - return q->max_hw_sectors; + return q->limits.max_hw_sectors; } static inline unsigned short queue_max_hw_segments(struct request_queue *q) { - return q->max_hw_segments; + return q->limits.max_hw_segments; } static inline unsigned short queue_max_phys_segments(struct request_queue *q) { - return q->max_phys_segments; + return q->limits.max_phys_segments; } static inline unsigned int queue_max_segment_size(struct request_queue *q) { - return q->max_segment_size; + return q->limits.max_segment_size; } static inline unsigned short queue_logical_block_size(struct request_queue *q) { int retval = 512; - if (q && q->logical_block_size) - retval = q->logical_block_size; + if (q && q->limits.logical_block_size) + retval = q->limits.logical_block_size; return retval; } -- cgit v1.2.3-70-g09d2 From c72758f33784e5e2a1a4bb9421ef3e6de8f9fcf3 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 22 May 2009 17:17:53 -0400 Subject: block: Export I/O topology for block devices and partitions To support devices with physical block sizes bigger than 512 bytes we need to ensure proper alignment. This patch adds support for exposing I/O topology characteristics as devices are stacked. logical_block_size is the smallest unit the device can address. physical_block_size indicates the smallest I/O the device can write without incurring a read-modify-write penalty. The io_min parameter is the smallest preferred I/O size reported by the device. In many cases this is the same as the physical block size. However, the io_min parameter can be scaled up when stacking (RAID5 chunk size > physical block size). The io_opt characteristic indicates the optimal I/O size reported by the device. This is usually the stripe width for arrays. The alignment_offset parameter indicates the number of bytes the start of the device/partition is offset from the device's natural alignment. Partition tools and MD/DM utilities can use this to pad their offsets so filesystems start on proper boundaries. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- Documentation/ABI/testing/sysfs-block | 59 +++++++++++ block/blk-settings.c | 186 ++++++++++++++++++++++++++++++++++ block/blk-sysfs.c | 33 ++++++ block/genhd.c | 11 ++ fs/partitions/check.c | 10 ++ include/linux/blkdev.h | 47 +++++++++ include/linux/genhd.h | 1 + 7 files changed, 347 insertions(+) (limited to 'include') diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block index 44f52a4f590..cbbd3e06994 100644 --- a/Documentation/ABI/testing/sysfs-block +++ b/Documentation/ABI/testing/sysfs-block @@ -60,3 +60,62 @@ Description: Indicates whether the block layer should automatically generate checksums for write requests bound for devices that support receiving integrity metadata. + +What: /sys/block//alignment_offset +Date: April 2009 +Contact: Martin K. Petersen +Description: + Storage devices may report a physical block size that is + bigger than the logical block size (for instance a drive + with 4KB physical sectors exposing 512-byte logical + blocks to the operating system). This parameter + indicates how many bytes the beginning of the device is + offset from the disk's natural alignment. + +What: /sys/block///alignment_offset +Date: April 2009 +Contact: Martin K. Petersen +Description: + Storage devices may report a physical block size that is + bigger than the logical block size (for instance a drive + with 4KB physical sectors exposing 512-byte logical + blocks to the operating system). This parameter + indicates how many bytes the beginning of the partition + is offset from the disk's natural alignment. + +What: /sys/block//queue/logical_block_size +Date: May 2009 +Contact: Martin K. Petersen +Description: + This is the smallest unit the storage device can + address. It is typically 512 bytes. + +What: /sys/block//queue/physical_block_size +Date: May 2009 +Contact: Martin K. Petersen +Description: + This is the smallest unit the storage device can write + without resorting to read-modify-write operation. It is + usually the same as the logical block size but may be + bigger. One example is SATA drives with 4KB sectors + that expose a 512-byte logical block size to the + operating system. + +What: /sys/block//queue/minimum_io_size +Date: April 2009 +Contact: Martin K. Petersen +Description: + Storage devices may report a preferred minimum I/O size, + which is the smallest request the device can perform + without incurring a read-modify-write penalty. For disk + drives this is often the physical block size. For RAID + arrays it is often the stripe chunk size. + +What: /sys/block//queue/optimal_io_size +Date: April 2009 +Contact: Martin K. Petersen +Description: + Storage devices may report an optimal I/O size, which is + the device's preferred unit of receiving I/O. This is + rarely reported for disk drives. For RAID devices it is + usually the stripe width or the internal block size. diff --git a/block/blk-settings.c b/block/blk-settings.c index b0f547cecfb..5649f34adb4 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -309,9 +309,94 @@ EXPORT_SYMBOL(blk_queue_max_segment_size); void blk_queue_logical_block_size(struct request_queue *q, unsigned short size) { q->limits.logical_block_size = size; + + if (q->limits.physical_block_size < size) + q->limits.physical_block_size = size; + + if (q->limits.io_min < q->limits.physical_block_size) + q->limits.io_min = q->limits.physical_block_size; } EXPORT_SYMBOL(blk_queue_logical_block_size); +/** + * blk_queue_physical_block_size - set physical block size for the queue + * @q: the request queue for the device + * @size: the physical block size, in bytes + * + * Description: + * This should be set to the lowest possible sector size that the + * hardware can operate on without reverting to read-modify-write + * operations. + */ +void blk_queue_physical_block_size(struct request_queue *q, unsigned short size) +{ + q->limits.physical_block_size = size; + + if (q->limits.physical_block_size < q->limits.logical_block_size) + q->limits.physical_block_size = q->limits.logical_block_size; + + if (q->limits.io_min < q->limits.physical_block_size) + q->limits.io_min = q->limits.physical_block_size; +} +EXPORT_SYMBOL(blk_queue_physical_block_size); + +/** + * blk_queue_alignment_offset - set physical block alignment offset + * @q: the request queue for the device + * @alignment: alignment offset in bytes + * + * Description: + * Some devices are naturally misaligned to compensate for things like + * the legacy DOS partition table 63-sector offset. Low-level drivers + * should call this function for devices whose first sector is not + * naturally aligned. + */ +void blk_queue_alignment_offset(struct request_queue *q, unsigned int offset) +{ + q->limits.alignment_offset = + offset & (q->limits.physical_block_size - 1); + q->limits.misaligned = 0; +} +EXPORT_SYMBOL(blk_queue_alignment_offset); + +/** + * blk_queue_io_min - set minimum request size for the queue + * @q: the request queue for the device + * @io_min: smallest I/O size in bytes + * + * Description: + * Some devices have an internal block size bigger than the reported + * hardware sector size. This function can be used to signal the + * smallest I/O the device can perform without incurring a performance + * penalty. + */ +void blk_queue_io_min(struct request_queue *q, unsigned int min) +{ + q->limits.io_min = min; + + if (q->limits.io_min < q->limits.logical_block_size) + q->limits.io_min = q->limits.logical_block_size; + + if (q->limits.io_min < q->limits.physical_block_size) + q->limits.io_min = q->limits.physical_block_size; +} +EXPORT_SYMBOL(blk_queue_io_min); + +/** + * blk_queue_io_opt - set optimal request size for the queue + * @q: the request queue for the device + * @io_opt: optimal request size in bytes + * + * Description: + * Drivers can call this function to set the preferred I/O request + * size for devices that report such a value. + */ +void blk_queue_io_opt(struct request_queue *q, unsigned int opt) +{ + q->limits.io_opt = opt; +} +EXPORT_SYMBOL(blk_queue_io_opt); + /* * Returns the minimum that is _not_ zero, unless both are zero. */ @@ -357,6 +442,107 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) } EXPORT_SYMBOL(blk_queue_stack_limits); +/** + * blk_stack_limits - adjust queue_limits for stacked devices + * @t: the stacking driver limits (top) + * @bdev: the underlying queue limits (bottom) + * @offset: offset to beginning of data within component device + * + * Description: + * Merges two queue_limit structs. Returns 0 if alignment didn't + * change. Returns -1 if adding the bottom device caused + * misalignment. + */ +int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, + sector_t offset) +{ + t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); + t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors); + + t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, + b->seg_boundary_mask); + + t->max_phys_segments = min_not_zero(t->max_phys_segments, + b->max_phys_segments); + + t->max_hw_segments = min_not_zero(t->max_hw_segments, + b->max_hw_segments); + + t->max_segment_size = min_not_zero(t->max_segment_size, + b->max_segment_size); + + t->logical_block_size = max(t->logical_block_size, + b->logical_block_size); + + t->physical_block_size = max(t->physical_block_size, + b->physical_block_size); + + t->io_min = max(t->io_min, b->io_min); + t->no_cluster |= b->no_cluster; + + /* Bottom device offset aligned? */ + if (offset && + (offset & (b->physical_block_size - 1)) != b->alignment_offset) { + t->misaligned = 1; + return -1; + } + + /* If top has no alignment offset, inherit from bottom */ + if (!t->alignment_offset) + t->alignment_offset = + b->alignment_offset & (b->physical_block_size - 1); + + /* Top device aligned on logical block boundary? */ + if (t->alignment_offset & (t->logical_block_size - 1)) { + t->misaligned = 1; + return -1; + } + + return 0; +} + +/** + * disk_stack_limits - adjust queue limits for stacked drivers + * @t: MD/DM gendisk (top) + * @bdev: the underlying block device (bottom) + * @offset: offset to beginning of data within component device + * + * Description: + * Merges the limits for two queues. Returns 0 if alignment + * didn't change. Returns -1 if adding the bottom device caused + * misalignment. + */ +void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, + sector_t offset) +{ + struct request_queue *t = disk->queue; + struct request_queue *b = bdev_get_queue(bdev); + + offset += get_start_sect(bdev) << 9; + + if (blk_stack_limits(&t->limits, &b->limits, offset) < 0) { + char top[BDEVNAME_SIZE], bottom[BDEVNAME_SIZE]; + + disk_name(disk, 0, top); + bdevname(bdev, bottom); + + printk(KERN_NOTICE "%s: Warning: Device %s is misaligned\n", + top, bottom); + } + + if (!t->queue_lock) + WARN_ON_ONCE(1); + else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) { + unsigned long flags; + + spin_lock_irqsave(t->queue_lock, flags); + if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) + queue_flag_clear(QUEUE_FLAG_CLUSTER, t); + spin_unlock_irqrestore(t->queue_lock, flags); + } +} +EXPORT_SYMBOL(disk_stack_limits); + /** * blk_queue_dma_pad - set pad mask * @q: the request queue for the device diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 3ccdadb8e20..9337e17f911 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -105,6 +105,21 @@ static ssize_t queue_logical_block_size_show(struct request_queue *q, char *page return queue_var_show(queue_logical_block_size(q), page); } +static ssize_t queue_physical_block_size_show(struct request_queue *q, char *page) +{ + return queue_var_show(queue_physical_block_size(q), page); +} + +static ssize_t queue_io_min_show(struct request_queue *q, char *page) +{ + return queue_var_show(queue_io_min(q), page); +} + +static ssize_t queue_io_opt_show(struct request_queue *q, char *page) +{ + return queue_var_show(queue_io_opt(q), page); +} + static ssize_t queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) { @@ -257,6 +272,21 @@ static struct queue_sysfs_entry queue_logical_block_size_entry = { .show = queue_logical_block_size_show, }; +static struct queue_sysfs_entry queue_physical_block_size_entry = { + .attr = {.name = "physical_block_size", .mode = S_IRUGO }, + .show = queue_physical_block_size_show, +}; + +static struct queue_sysfs_entry queue_io_min_entry = { + .attr = {.name = "minimum_io_size", .mode = S_IRUGO }, + .show = queue_io_min_show, +}; + +static struct queue_sysfs_entry queue_io_opt_entry = { + .attr = {.name = "optimal_io_size", .mode = S_IRUGO }, + .show = queue_io_opt_show, +}; + static struct queue_sysfs_entry queue_nonrot_entry = { .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR }, .show = queue_nonrot_show, @@ -289,6 +319,9 @@ static struct attribute *default_attrs[] = { &queue_iosched_entry.attr, &queue_hw_sector_size_entry.attr, &queue_logical_block_size_entry.attr, + &queue_physical_block_size_entry.attr, + &queue_io_min_entry.attr, + &queue_io_opt_entry.attr, &queue_nonrot_entry.attr, &queue_nomerges_entry.attr, &queue_rq_affinity_entry.attr, diff --git a/block/genhd.c b/block/genhd.c index 1a4916e0173..fe7ccc0a618 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -852,11 +852,21 @@ static ssize_t disk_capability_show(struct device *dev, return sprintf(buf, "%x\n", disk->flags); } +static ssize_t disk_alignment_offset_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct gendisk *disk = dev_to_disk(dev); + + return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue)); +} + static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL); static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL); static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL); static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL); static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); +static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL); static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL); static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); #ifdef CONFIG_FAIL_MAKE_REQUEST @@ -875,6 +885,7 @@ static struct attribute *disk_attrs[] = { &dev_attr_removable.attr, &dev_attr_ro.attr, &dev_attr_size.attr, + &dev_attr_alignment_offset.attr, &dev_attr_capability.attr, &dev_attr_stat.attr, #ifdef CONFIG_FAIL_MAKE_REQUEST diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 99e33ef40be..0af36085eb2 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -219,6 +219,13 @@ ssize_t part_size_show(struct device *dev, return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects); } +ssize_t part_alignment_offset_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hd_struct *p = dev_to_part(dev); + return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset); +} + ssize_t part_stat_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -272,6 +279,7 @@ ssize_t part_fail_store(struct device *dev, static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL); static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL); static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); +static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL); static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); #ifdef CONFIG_FAIL_MAKE_REQUEST static struct device_attribute dev_attr_fail = @@ -282,6 +290,7 @@ static struct attribute *part_attrs[] = { &dev_attr_partition.attr, &dev_attr_start.attr, &dev_attr_size.attr, + &dev_attr_alignment_offset.attr, &dev_attr_stat.attr, #ifdef CONFIG_FAIL_MAKE_REQUEST &dev_attr_fail.attr, @@ -383,6 +392,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, pdev = part_to_dev(p); p->start_sect = start; + p->alignment_offset = queue_sector_alignment_offset(disk->queue, start); p->nr_sects = len; p->partno = partno; p->policy = get_disk_ro(disk); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b7bb6fdba12..5e740a135e7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -314,11 +314,16 @@ struct queue_limits { unsigned int max_hw_sectors; unsigned int max_sectors; unsigned int max_segment_size; + unsigned int physical_block_size; + unsigned int alignment_offset; + unsigned int io_min; + unsigned int io_opt; unsigned short logical_block_size; unsigned short max_hw_segments; unsigned short max_phys_segments; + unsigned char misaligned; unsigned char no_cluster; }; @@ -911,6 +916,15 @@ extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short); extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short); extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); extern void blk_queue_logical_block_size(struct request_queue *, unsigned short); +extern void blk_queue_physical_block_size(struct request_queue *, unsigned short); +extern void blk_queue_alignment_offset(struct request_queue *q, + unsigned int alignment); +extern void blk_queue_io_min(struct request_queue *q, unsigned int min); +extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt); +extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, + sector_t offset); +extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, + sector_t offset); extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); extern void blk_queue_dma_pad(struct request_queue *, unsigned int); extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); @@ -1047,6 +1061,39 @@ static inline unsigned short bdev_logical_block_size(struct block_device *bdev) return queue_logical_block_size(bdev_get_queue(bdev)); } +static inline unsigned int queue_physical_block_size(struct request_queue *q) +{ + return q->limits.physical_block_size; +} + +static inline unsigned int queue_io_min(struct request_queue *q) +{ + return q->limits.io_min; +} + +static inline unsigned int queue_io_opt(struct request_queue *q) +{ + return q->limits.io_opt; +} + +static inline int queue_alignment_offset(struct request_queue *q) +{ + if (q && q->limits.misaligned) + return -1; + + if (q && q->limits.alignment_offset) + return q->limits.alignment_offset; + + return 0; +} + +static inline int queue_sector_alignment_offset(struct request_queue *q, + sector_t sector) +{ + return ((sector << 9) - q->limits.alignment_offset) + & (q->limits.io_min - 1); +} + static inline int queue_dma_alignment(struct request_queue *q) { return q ? q->dma_alignment : 511; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index a1a28caed23..149fda264c8 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -90,6 +90,7 @@ struct disk_stats { struct hd_struct { sector_t start_sect; sector_t nr_sects; + sector_t alignment_offset; struct device __dev; struct kobject *holder_dir; int policy, partno; -- cgit v1.2.3-70-g09d2 From e220d2dcb944c5c488b6855d15ec66d76900514f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 23 May 2009 18:28:55 +0200 Subject: perf_counter: Fix dynamic irq_period logging We call perf_adjust_freq() from perf_counter_task_tick() which is is called under the rq->lock causing lock recursion. However, it's no longer required to be called under the rq->lock, so remove it from under it. Also, fix up some related comments. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Cc: Arnaldo Carvalho de Melo Cc: John Kacur LKML-Reference: <20090523163012.476197912@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 1 + kernel/perf_counter.c | 3 ++- kernel/sched.c | 3 ++- 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 2eedae8498d..23ddd29730f 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -260,6 +260,7 @@ enum perf_event_type { /* * struct { * struct perf_event_header header; + * u64 time; * u64 irq_period; * }; */ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index c10055416de..2f410ea2cb3 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -2559,7 +2559,8 @@ void perf_counter_munmap(unsigned long addr, unsigned long len, } /* - * + * Log irq_period changes so that analyzing tools can re-normalize the + * event flow. */ static void perf_log_period(struct perf_counter *counter, u64 period) diff --git a/kernel/sched.c b/kernel/sched.c index 4c0d58bce6b..ad079f07c9c 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4875,9 +4875,10 @@ void scheduler_tick(void) update_rq_clock(rq); update_cpu_load(rq); curr->sched_class->task_tick(rq, curr, 0); - perf_counter_task_tick(curr, cpu); spin_unlock(&rq->lock); + perf_counter_task_tick(curr, cpu); + #ifdef CONFIG_SMP rq->idle_at_tick = idle_cpu(cpu); trigger_load_balance(rq, cpu); -- cgit v1.2.3-70-g09d2 From fccc714b3148ab9741fafc1e90c3876d50df6093 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 23 May 2009 18:28:56 +0200 Subject: perf_counter: Sanitize counter->mutex s/counter->mutex/counter->child_mutex/ and make sure its only used to protect child_list. The usage in __perf_counter_exit_task() doesn't appear to be problematic since ctx->mutex also covers anything related to fd tear-down. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Cc: Arnaldo Carvalho de Melo Cc: John Kacur LKML-Reference: <20090523163012.533186528@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 7 +++---- kernel/perf_counter.c | 47 ++++++++++++++++++-------------------------- 2 files changed, 22 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 23ddd29730f..4ab8050eb9e 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -452,9 +452,6 @@ struct perf_counter { struct perf_counter_context *ctx; struct file *filp; - struct perf_counter *parent; - struct list_head child_list; - /* * These accumulate total time (in nanoseconds) that children * counters have been enabled and running, respectively. @@ -465,7 +462,9 @@ struct perf_counter { /* * Protect attach/detach and child_list: */ - struct mutex mutex; + struct mutex child_mutex; + struct list_head child_list; + struct perf_counter *parent; int oncpu; int cpu; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 2f410ea2cb3..679c3b5bb7d 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -111,6 +111,10 @@ static void put_ctx(struct perf_counter_context *ctx) } } +/* + * Add a counter from the lists for its context. + * Must be called with ctx->mutex and ctx->lock held. + */ static void list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) { @@ -136,7 +140,7 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) /* * Remove a counter from the lists for its context. - * Must be called with counter->mutex and ctx->mutex held. + * Must be called with ctx->mutex and ctx->lock held. */ static void list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx) @@ -276,7 +280,7 @@ static void __perf_counter_remove_from_context(void *info) /* * Remove the counter from a task's (or a CPU's) list of counters. * - * Must be called with counter->mutex and ctx->mutex held. + * Must be called with ctx->mutex held. * * CPU counters are removed with a smp call. For task counters we only * call when the task is on a CPU. @@ -1407,11 +1411,7 @@ static int perf_release(struct inode *inode, struct file *file) file->private_data = NULL; mutex_lock(&ctx->mutex); - mutex_lock(&counter->mutex); - perf_counter_remove_from_context(counter); - - mutex_unlock(&counter->mutex); mutex_unlock(&ctx->mutex); free_counter(counter); @@ -1437,7 +1437,7 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) if (counter->state == PERF_COUNTER_STATE_ERROR) return 0; - mutex_lock(&counter->mutex); + mutex_lock(&counter->child_mutex); values[0] = perf_counter_read(counter); n = 1; if (counter->hw_event.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) @@ -1446,7 +1446,7 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) if (counter->hw_event.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) values[n++] = counter->total_time_running + atomic64_read(&counter->child_total_time_running); - mutex_unlock(&counter->mutex); + mutex_unlock(&counter->child_mutex); if (count < n * sizeof(u64)) return -EINVAL; @@ -1510,11 +1510,11 @@ static void perf_counter_for_each_child(struct perf_counter *counter, { struct perf_counter *child; - mutex_lock(&counter->mutex); + mutex_lock(&counter->child_mutex); func(counter); list_for_each_entry(child, &counter->child_list, child_list) func(child); - mutex_unlock(&counter->mutex); + mutex_unlock(&counter->child_mutex); } static void perf_counter_for_each(struct perf_counter *counter, @@ -1522,11 +1522,11 @@ static void perf_counter_for_each(struct perf_counter *counter, { struct perf_counter *child; - mutex_lock(&counter->mutex); + mutex_lock(&counter->child_mutex); perf_counter_for_each_sibling(counter, func); list_for_each_entry(child, &counter->child_list, child_list) perf_counter_for_each_sibling(child, func); - mutex_unlock(&counter->mutex); + mutex_unlock(&counter->child_mutex); } static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) @@ -3106,7 +3106,9 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, if (!group_leader) group_leader = counter; - mutex_init(&counter->mutex); + mutex_init(&counter->child_mutex); + INIT_LIST_HEAD(&counter->child_list); + INIT_LIST_HEAD(&counter->list_entry); INIT_LIST_HEAD(&counter->event_entry); INIT_LIST_HEAD(&counter->sibling_list); @@ -3114,8 +3116,6 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, mutex_init(&counter->mmap_mutex); - INIT_LIST_HEAD(&counter->child_list); - counter->cpu = cpu; counter->hw_event = *hw_event; counter->group_leader = group_leader; @@ -3346,10 +3346,9 @@ inherit_counter(struct perf_counter *parent_counter, /* * Link this into the parent counter's child list */ - mutex_lock(&parent_counter->mutex); + mutex_lock(&parent_counter->child_mutex); list_add_tail(&child_counter->child_list, &parent_counter->child_list); - - mutex_unlock(&parent_counter->mutex); + mutex_unlock(&parent_counter->child_mutex); return child_counter; } @@ -3396,9 +3395,9 @@ static void sync_child_counter(struct perf_counter *child_counter, /* * Remove this counter from the parent's list */ - mutex_lock(&parent_counter->mutex); + mutex_lock(&parent_counter->child_mutex); list_del_init(&child_counter->child_list); - mutex_unlock(&parent_counter->mutex); + mutex_unlock(&parent_counter->child_mutex); /* * Release the parent counter, if this was the last @@ -3414,17 +3413,9 @@ __perf_counter_exit_task(struct task_struct *child, { struct perf_counter *parent_counter; - /* - * Protect against concurrent operations on child_counter - * due its fd getting closed, etc. - */ - mutex_lock(&child_counter->mutex); - update_counter_times(child_counter); list_del_counter(child_counter, child_ctx); - mutex_unlock(&child_counter->mutex); - parent_counter = child_counter->parent; /* * It can happen that parent exits first, and has counters -- cgit v1.2.3-70-g09d2 From 10eb0f013c63c71c82ede77945a5f390c10cfda6 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 13 May 2009 17:57:38 -0500 Subject: [SCSI] iscsi: pass ep connect shost When we create the tcp/ip connection by calling ep_connect, we currently just go by the routing table info. I think there are two problems with this. 1. Some drivers do not have access to a routing table. Some drivers like qla4xxx do not even know about other ports. 2. If you have two initiator ports on the same subnet, the user may have set things up so that session1 was supposed to be run through port1. and session2 was supposed to be run through port2. It looks like we could end with both sessions going through one of the ports. Fixes for cxgb3i from Karen Xie. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/infiniband/ulp/iser/iscsi_iser.c | 3 +- drivers/scsi/cxgb3i/cxgb3i.h | 1 - drivers/scsi/cxgb3i/cxgb3i_iscsi.c | 25 +++++++++++++--- drivers/scsi/cxgb3i/cxgb3i_offload.c | 23 ++++++++------ drivers/scsi/cxgb3i/cxgb3i_offload.h | 3 +- drivers/scsi/scsi_transport_iscsi.c | 51 +++++++++++++++++++++++++------- include/scsi/iscsi_if.h | 7 ++++- include/scsi/scsi_transport_iscsi.h | 3 +- 8 files changed, 87 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 75223f50de5..ffbe0c76bc1 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -517,7 +517,8 @@ iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *s } static struct iscsi_endpoint * -iscsi_iser_ep_connect(struct sockaddr *dst_addr, int non_blocking) +iscsi_iser_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr, + int non_blocking) { int err; struct iser_conn *ib_conn; diff --git a/drivers/scsi/cxgb3i/cxgb3i.h b/drivers/scsi/cxgb3i/cxgb3i.h index 59b0958d2d1..e3133b58e59 100644 --- a/drivers/scsi/cxgb3i/cxgb3i.h +++ b/drivers/scsi/cxgb3i/cxgb3i.h @@ -144,7 +144,6 @@ struct cxgb3i_adapter *cxgb3i_adapter_find_by_tdev(struct t3cdev *); void cxgb3i_adapter_open(struct t3cdev *); void cxgb3i_adapter_close(struct t3cdev *); -struct cxgb3i_hba *cxgb3i_hba_find_by_netdev(struct net_device *); struct cxgb3i_hba *cxgb3i_hba_host_add(struct cxgb3i_adapter *, struct net_device *); void cxgb3i_hba_host_remove(struct cxgb3i_hba *); diff --git a/drivers/scsi/cxgb3i/cxgb3i_iscsi.c b/drivers/scsi/cxgb3i/cxgb3i_iscsi.c index 9212400b9b1..04a43744aed 100644 --- a/drivers/scsi/cxgb3i/cxgb3i_iscsi.c +++ b/drivers/scsi/cxgb3i/cxgb3i_iscsi.c @@ -178,7 +178,7 @@ void cxgb3i_adapter_close(struct t3cdev *t3dev) * cxgb3i_hba_find_by_netdev - find the cxgb3i_hba structure via net_device * @t3dev: t3cdev adapter */ -struct cxgb3i_hba *cxgb3i_hba_find_by_netdev(struct net_device *ndev) +static struct cxgb3i_hba *cxgb3i_hba_find_by_netdev(struct net_device *ndev) { struct cxgb3i_adapter *snic; int i; @@ -261,20 +261,27 @@ void cxgb3i_hba_host_remove(struct cxgb3i_hba *hba) /** * cxgb3i_ep_connect - establish TCP connection to target portal + * @shost: scsi host to use * @dst_addr: target IP address * @non_blocking: blocking or non-blocking call * * Initiates a TCP/IP connection to the dst_addr */ -static struct iscsi_endpoint *cxgb3i_ep_connect(struct sockaddr *dst_addr, +static struct iscsi_endpoint *cxgb3i_ep_connect(struct Scsi_Host *shost, + struct sockaddr *dst_addr, int non_blocking) { struct iscsi_endpoint *ep; struct cxgb3i_endpoint *cep; - struct cxgb3i_hba *hba; + struct cxgb3i_hba *hba = NULL; struct s3_conn *c3cn = NULL; int err = 0; + if (shost) + hba = iscsi_host_priv(shost); + + cxgb3i_api_debug("shost 0x%p, hba 0x%p.\n", shost, hba); + c3cn = cxgb3i_c3cn_create(); if (!c3cn) { cxgb3i_log_info("ep connect OOM.\n"); @@ -282,17 +289,27 @@ static struct iscsi_endpoint *cxgb3i_ep_connect(struct sockaddr *dst_addr, goto release_conn; } - err = cxgb3i_c3cn_connect(c3cn, (struct sockaddr_in *)dst_addr); + err = cxgb3i_c3cn_connect(hba ? hba->ndev : NULL, c3cn, + (struct sockaddr_in *)dst_addr); if (err < 0) { cxgb3i_log_info("ep connect failed.\n"); goto release_conn; } + hba = cxgb3i_hba_find_by_netdev(c3cn->dst_cache->dev); if (!hba) { err = -ENOSPC; cxgb3i_log_info("NOT going through cxgbi device.\n"); goto release_conn; } + + if (shost && hba != iscsi_host_priv(shost)) { + err = -ENOSPC; + cxgb3i_log_info("Could not connect through request host%u\n", + shost->host_no); + goto release_conn; + } + if (c3cn_is_closing(c3cn)) { err = -ENOSPC; cxgb3i_log_info("ep connect unable to connect.\n"); diff --git a/drivers/scsi/cxgb3i/cxgb3i_offload.c b/drivers/scsi/cxgb3i/cxgb3i_offload.c index e11c9c180f3..c1d5be4adf9 100644 --- a/drivers/scsi/cxgb3i/cxgb3i_offload.c +++ b/drivers/scsi/cxgb3i/cxgb3i_offload.c @@ -1479,12 +1479,13 @@ static struct net_device *cxgb3_egress_dev(struct net_device *root_dev, return NULL; } -static struct rtable *find_route(__be32 saddr, __be32 daddr, +static struct rtable *find_route(struct net_device *dev, + __be32 saddr, __be32 daddr, __be16 sport, __be16 dport) { struct rtable *rt; struct flowi fl = { - .oif = 0, + .oif = dev ? dev->ifindex : 0, .nl_u = { .ip4_u = { .daddr = daddr, @@ -1573,36 +1574,40 @@ out_err: * * return 0 if active open request is sent, < 0 otherwise. */ -int cxgb3i_c3cn_connect(struct s3_conn *c3cn, struct sockaddr_in *usin) +int cxgb3i_c3cn_connect(struct net_device *dev, struct s3_conn *c3cn, + struct sockaddr_in *usin) { struct rtable *rt; - struct net_device *dev; struct cxgb3i_sdev_data *cdata; struct t3cdev *cdev; __be32 sipv4; int err; + c3cn_conn_debug("c3cn 0x%p, dev 0x%p.\n", c3cn, dev); + if (usin->sin_family != AF_INET) return -EAFNOSUPPORT; c3cn->daddr.sin_port = usin->sin_port; c3cn->daddr.sin_addr.s_addr = usin->sin_addr.s_addr; - rt = find_route(c3cn->saddr.sin_addr.s_addr, + rt = find_route(dev, c3cn->saddr.sin_addr.s_addr, c3cn->daddr.sin_addr.s_addr, c3cn->saddr.sin_port, c3cn->daddr.sin_port); if (rt == NULL) { - c3cn_conn_debug("NO route to 0x%x, port %u.\n", + c3cn_conn_debug("NO route to 0x%x, port %u, dev %s.\n", c3cn->daddr.sin_addr.s_addr, - ntohs(c3cn->daddr.sin_port)); + ntohs(c3cn->daddr.sin_port), + dev ? dev->name : "any"); return -ENETUNREACH; } if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { - c3cn_conn_debug("multi-cast route to 0x%x, port %u.\n", + c3cn_conn_debug("multi-cast route to 0x%x, port %u, dev %s.\n", c3cn->daddr.sin_addr.s_addr, - ntohs(c3cn->daddr.sin_port)); + ntohs(c3cn->daddr.sin_port), + dev ? dev->name : "any"); ip_rt_put(rt); return -ENETUNREACH; } diff --git a/drivers/scsi/cxgb3i/cxgb3i_offload.h b/drivers/scsi/cxgb3i/cxgb3i_offload.h index ebfca960c0a..6a1d86b1faf 100644 --- a/drivers/scsi/cxgb3i/cxgb3i_offload.h +++ b/drivers/scsi/cxgb3i/cxgb3i_offload.h @@ -169,7 +169,8 @@ void cxgb3i_sdev_add(struct t3cdev *, struct cxgb3_client *); void cxgb3i_sdev_remove(struct t3cdev *); struct s3_conn *cxgb3i_c3cn_create(void); -int cxgb3i_c3cn_connect(struct s3_conn *, struct sockaddr_in *); +int cxgb3i_c3cn_connect(struct net_device *, struct s3_conn *, + struct sockaddr_in *); void cxgb3i_c3cn_rx_credits(struct s3_conn *, int); int cxgb3i_c3cn_send_pdus(struct s3_conn *, struct sk_buff *); void cxgb3i_c3cn_release(struct s3_conn *); diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 0a2ce7b6325..d69a53aa406 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -1268,26 +1268,54 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev) return err; } +static int iscsi_if_ep_connect(struct iscsi_transport *transport, + struct iscsi_uevent *ev, int msg_type) +{ + struct iscsi_endpoint *ep; + struct sockaddr *dst_addr; + struct Scsi_Host *shost = NULL; + int non_blocking, err = 0; + + if (!transport->ep_connect) + return -EINVAL; + + if (msg_type == ISCSI_UEVENT_TRANSPORT_EP_CONNECT_THROUGH_HOST) { + shost = scsi_host_lookup(ev->u.ep_connect_through_host.host_no); + if (!shost) { + printk(KERN_ERR "ep connect failed. Could not find " + "host no %u\n", + ev->u.ep_connect_through_host.host_no); + return -ENODEV; + } + non_blocking = ev->u.ep_connect_through_host.non_blocking; + } else + non_blocking = ev->u.ep_connect.non_blocking; + + dst_addr = (struct sockaddr *)((char*)ev + sizeof(*ev)); + ep = transport->ep_connect(shost, dst_addr, non_blocking); + if (IS_ERR(ep)) { + err = PTR_ERR(ep); + goto release_host; + } + + ev->r.ep_connect_ret.handle = ep->id; +release_host: + if (shost) + scsi_host_put(shost); + return err; +} + static int iscsi_if_transport_ep(struct iscsi_transport *transport, struct iscsi_uevent *ev, int msg_type) { struct iscsi_endpoint *ep; - struct sockaddr *dst_addr; int rc = 0; switch (msg_type) { + case ISCSI_UEVENT_TRANSPORT_EP_CONNECT_THROUGH_HOST: case ISCSI_UEVENT_TRANSPORT_EP_CONNECT: - if (!transport->ep_connect) - return -EINVAL; - - dst_addr = (struct sockaddr *)((char*)ev + sizeof(*ev)); - ep = transport->ep_connect(dst_addr, - ev->u.ep_connect.non_blocking); - if (IS_ERR(ep)) - return PTR_ERR(ep); - - ev->r.ep_connect_ret.handle = ep->id; + rc = iscsi_if_ep_connect(transport, ev, msg_type); break; case ISCSI_UEVENT_TRANSPORT_EP_POLL: if (!transport->ep_poll) @@ -1469,6 +1497,7 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) case ISCSI_UEVENT_TRANSPORT_EP_CONNECT: case ISCSI_UEVENT_TRANSPORT_EP_POLL: case ISCSI_UEVENT_TRANSPORT_EP_DISCONNECT: + case ISCSI_UEVENT_TRANSPORT_EP_CONNECT_THROUGH_HOST: err = iscsi_if_transport_ep(transport, ev, nlh->nlmsg_type); break; case ISCSI_UEVENT_TGT_DSCVR: diff --git a/include/scsi/iscsi_if.h b/include/scsi/iscsi_if.h index d0ed5226f8c..2c1a4af9eaf 100644 --- a/include/scsi/iscsi_if.h +++ b/include/scsi/iscsi_if.h @@ -50,7 +50,8 @@ enum iscsi_uevent_e { ISCSI_UEVENT_TGT_DSCVR = UEVENT_BASE + 15, ISCSI_UEVENT_SET_HOST_PARAM = UEVENT_BASE + 16, ISCSI_UEVENT_UNBIND_SESSION = UEVENT_BASE + 17, - ISCSI_UEVENT_CREATE_BOUND_SESSION = UEVENT_BASE + 18, + ISCSI_UEVENT_CREATE_BOUND_SESSION = UEVENT_BASE + 18, + ISCSI_UEVENT_TRANSPORT_EP_CONNECT_THROUGH_HOST = UEVENT_BASE + 19, /* up events */ ISCSI_KEVENT_RECV_PDU = KEVENT_BASE + 1, @@ -131,6 +132,10 @@ struct iscsi_uevent { struct msg_transport_connect { uint32_t non_blocking; } ep_connect; + struct msg_transport_connect_through_host { + uint32_t host_no; + uint32_t non_blocking; + } ep_connect_through_host; struct msg_transport_poll { uint64_t ep_handle; uint32_t timeout_ms; diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index 457588e1119..8cb7a31d996 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -126,7 +126,8 @@ struct iscsi_transport { int *index, int *age); void (*session_recovery_timedout) (struct iscsi_cls_session *session); - struct iscsi_endpoint *(*ep_connect) (struct sockaddr *dst_addr, + struct iscsi_endpoint *(*ep_connect) (struct Scsi_Host *shost, + struct sockaddr *dst_addr, int non_blocking); int (*ep_poll) (struct iscsi_endpoint *ep, int timeout_ms); void (*ep_disconnect) (struct iscsi_endpoint *ep); -- cgit v1.2.3-70-g09d2 From 8f9256cea10ca43ac80f66e176643eb41db34244 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 13 May 2009 17:57:41 -0500 Subject: [SCSI] libiscsi: export iscsi_itt_to_task for bnx2i bnx2i needs to be able to look up mgmt task like login and nop, because it does some processing of them on the completion path. This exports iscsi_itt_to_task so it can look up the task. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/scsi/libiscsi.c | 3 ++- include/scsi/libiscsi.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index b4aaf2e5fe7..a6e6eef04fe 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -828,7 +828,7 @@ static int iscsi_handle_reject(struct iscsi_conn *conn, struct iscsi_hdr *hdr, * * The session lock must be held. */ -static struct iscsi_task *iscsi_itt_to_task(struct iscsi_conn *conn, itt_t itt) +struct iscsi_task *iscsi_itt_to_task(struct iscsi_conn *conn, itt_t itt) { struct iscsi_session *session = conn->session; int i; @@ -845,6 +845,7 @@ static struct iscsi_task *iscsi_itt_to_task(struct iscsi_conn *conn, itt_t itt) return session->cmds[i]; } +EXPORT_SYMBOL_GPL(iscsi_itt_to_task); /** * __iscsi_complete_pdu - complete pdu diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index 0289f5745fb..88d33a46efa 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -406,6 +406,7 @@ extern int __iscsi_complete_pdu(struct iscsi_conn *, struct iscsi_hdr *, char *, int); extern int iscsi_verify_itt(struct iscsi_conn *, itt_t); extern struct iscsi_task *iscsi_itt_to_ctask(struct iscsi_conn *, itt_t); +extern struct iscsi_task *iscsi_itt_to_task(struct iscsi_conn *, itt_t); extern void iscsi_requeue_task(struct iscsi_task *task); extern void iscsi_put_task(struct iscsi_task *task); extern void __iscsi_get_task(struct iscsi_task *task); -- cgit v1.2.3-70-g09d2 From 3bbaaad95fd38dedb7c66a601f14825b4e0c5a59 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 13 May 2009 17:57:46 -0500 Subject: [SCSI] libiscsi: handle cleanup task races bnx2i needs to send a hardware specific cleanup command if a command has not completed normally (iscsi/scsi response from target), and the session is still ok (this is the case when we send a TMF to stop the command). At this time it will need to drop the session lock. The problem with the current code is that fail_all_commands assumes we will hold the lock the entire time, so it uses list_for_each_entry_safe. If while bnx2i drops the session lock multiple cmds complete then list_for_each_entry_safe will not handle this correctly. This patch removes the running lists and just has us loop over the cmds array (in later patches we will then replace that array with a block tag map at the session level). It also fixes up the completion path so that if the TMF code and the normal recv path were completing the same command then they both do not try to do release the refcount taken when the task is queued. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/scsi/libiscsi.c | 225 +++++++++++++++++++++++++----------------------- include/scsi/libiscsi.h | 5 +- 2 files changed, 118 insertions(+), 112 deletions(-) (limited to 'include') diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index c648bd328a2..a9d7e520e55 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -109,7 +109,7 @@ iscsi_update_cmdsn(struct iscsi_session *session, struct iscsi_nopin *hdr) * if the window closed with IO queued, then kick the * xmit thread */ - if (!list_empty(&session->leadconn->xmitqueue) || + if (!list_empty(&session->leadconn->cmdqueue) || !list_empty(&session->leadconn->mgmtqueue)) { if (!(session->tt->caps & CAP_DATA_PATH_OFFLOAD)) iscsi_conn_queue_work(session->leadconn); @@ -366,7 +366,6 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task) return -EIO; task->state = ISCSI_TASK_RUNNING; - list_move_tail(&task->running, &conn->run_list); conn->scsicmd_pdus_cnt++; ISCSI_DBG_SESSION(session, "iscsi prep [%s cid %d sc %p cdb 0x%x " @@ -382,26 +381,23 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task) } /** - * iscsi_complete_command - finish a task + * iscsi_free_task - free a task * @task: iscsi cmd task * * Must be called with session lock. * This function returns the scsi command to scsi-ml or cleans * up mgmt tasks then returns the task to the pool. */ -static void iscsi_complete_command(struct iscsi_task *task) +static void iscsi_free_task(struct iscsi_task *task) { struct iscsi_conn *conn = task->conn; struct iscsi_session *session = conn->session; struct scsi_cmnd *sc = task->sc; session->tt->cleanup_task(task); - list_del_init(&task->running); - task->state = ISCSI_TASK_COMPLETED; + task->state = ISCSI_TASK_FREE; task->sc = NULL; - if (conn->task == task) - conn->task = NULL; /* * login task is preallocated so do not free */ @@ -410,9 +406,6 @@ static void iscsi_complete_command(struct iscsi_task *task) __kfifo_put(session->cmdpool.queue, (void*)&task, sizeof(void*)); - if (conn->ping_task == task) - conn->ping_task = NULL; - if (sc) { task->sc = NULL; /* SCSI eh reuses commands to verify us */ @@ -435,7 +428,7 @@ EXPORT_SYMBOL_GPL(__iscsi_get_task); static void __iscsi_put_task(struct iscsi_task *task) { if (atomic_dec_and_test(&task->refcount)) - iscsi_complete_command(task); + iscsi_free_task(task); } void iscsi_put_task(struct iscsi_task *task) @@ -448,14 +441,50 @@ void iscsi_put_task(struct iscsi_task *task) } EXPORT_SYMBOL_GPL(iscsi_put_task); +/** + * iscsi_complete_task - finish a task + * @task: iscsi cmd task + * + * Must be called with session lock. + */ +static void iscsi_complete_task(struct iscsi_task *task) +{ + struct iscsi_conn *conn = task->conn; + + if (task->state == ISCSI_TASK_COMPLETED) + return; + WARN_ON_ONCE(task->state == ISCSI_TASK_FREE); + + task->state = ISCSI_TASK_COMPLETED; + + if (!list_empty(&task->running)) + list_del_init(&task->running); + + if (conn->task == task) + conn->task = NULL; + + if (conn->ping_task == task) + conn->ping_task = NULL; + + /* release get from queueing */ + __iscsi_put_task(task); +} + /* - * session lock must be held + * session lock must be held and if not called for a task that is + * still pending or from the xmit thread, then xmit thread must + * be suspended. */ -static void fail_command(struct iscsi_conn *conn, struct iscsi_task *task, - int err) +static void fail_scsi_task(struct iscsi_task *task, int err) { + struct iscsi_conn *conn = task->conn; struct scsi_cmnd *sc; + /* + * if a command completes and we get a successful tmf response + * we will hit this because the scsi eh abort code does not take + * a ref to the task. + */ sc = task->sc; if (!sc) return; @@ -475,10 +504,7 @@ static void fail_command(struct iscsi_conn *conn, struct iscsi_task *task, scsi_in(sc)->resid = scsi_in(sc)->length; } - if (conn->task == task) - conn->task = NULL; - /* release ref from queuecommand */ - __iscsi_put_task(task); + iscsi_complete_task(task); } static int iscsi_prep_mgmt_task(struct iscsi_conn *conn, @@ -518,7 +544,6 @@ static int iscsi_prep_mgmt_task(struct iscsi_conn *conn, session->state = ISCSI_STATE_LOGGING_OUT; task->state = ISCSI_TASK_RUNNING; - list_move_tail(&task->running, &conn->mgmt_run_list); ISCSI_DBG_SESSION(session, "mgmtpdu [op 0x%x hdr->itt 0x%x " "datalen %d]\n", hdr->opcode & ISCSI_OPCODE_MASK, hdr->itt, task->data_count); @@ -564,6 +589,8 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr, atomic_set(&task->refcount, 1); task->conn = conn; task->sc = NULL; + INIT_LIST_HEAD(&task->running); + task->state = ISCSI_TASK_PENDING; if (data_size) { memcpy(task->data, data, data_size); @@ -575,7 +602,7 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr, if (conn->session->tt->alloc_pdu(task, hdr->opcode)) { iscsi_conn_printk(KERN_ERR, conn, "Could not allocate " "pdu for mgmt task.\n"); - goto requeue_task; + goto free_task; } } @@ -591,30 +618,22 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr, task->conn->session->age); } - INIT_LIST_HEAD(&task->running); - list_add_tail(&task->running, &conn->mgmtqueue); - if (session->tt->caps & CAP_DATA_PATH_OFFLOAD) { if (iscsi_prep_mgmt_task(conn, task)) goto free_task; if (session->tt->xmit_task(task)) goto free_task; - - } else + } else { + list_add_tail(&task->running, &conn->mgmtqueue); iscsi_conn_queue_work(conn); + } return task; free_task: __iscsi_put_task(task); return NULL; - -requeue_task: - if (task != conn->login_task) - __kfifo_put(session->cmdpool.queue, (void*)&task, - sizeof(void*)); - return NULL; } int iscsi_conn_send_pdu(struct iscsi_cls_conn *cls_conn, struct iscsi_hdr *hdr, @@ -709,11 +728,10 @@ invalid_datalen: sc->result = (DID_BAD_TARGET << 16) | rhdr->cmd_status; } out: - ISCSI_DBG_SESSION(session, "done [sc %p res %d itt 0x%x]\n", + ISCSI_DBG_SESSION(session, "cmd rsp done [sc %p res %d itt 0x%x]\n", sc, sc->result, task->itt); conn->scsirsp_pdus_cnt++; - - __iscsi_put_task(task); + iscsi_complete_task(task); } /** @@ -747,8 +765,11 @@ iscsi_data_in_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr, sc->result = (DID_BAD_TARGET << 16) | rhdr->cmd_status; } + ISCSI_DBG_SESSION(conn->session, "data in with status done " + "[sc %p res %d itt 0x%x]\n", + sc, sc->result, task->itt); conn->scsirsp_pdus_cnt++; - __iscsi_put_task(task); + iscsi_complete_task(task); } static void iscsi_tmf_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr) @@ -969,7 +990,7 @@ int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr, } iscsi_tmf_rsp(conn, hdr); - __iscsi_put_task(task); + iscsi_complete_task(task); break; case ISCSI_OP_NOOP_IN: iscsi_update_cmdsn(session, (struct iscsi_nopin*)hdr); @@ -987,7 +1008,7 @@ int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr, goto recv_pdu; mod_timer(&conn->transport_timer, jiffies + conn->recv_timeout); - __iscsi_put_task(task); + iscsi_complete_task(task); break; default: rc = ISCSI_ERR_BAD_OPCODE; @@ -999,7 +1020,7 @@ out: recv_pdu: if (iscsi_recv_pdu(conn->cls_conn, hdr, data, datalen)) rc = ISCSI_ERR_CONN_FAILED; - __iscsi_put_task(task); + iscsi_complete_task(task); return rc; } EXPORT_SYMBOL_GPL(__iscsi_complete_pdu); @@ -1176,7 +1197,12 @@ void iscsi_requeue_task(struct iscsi_task *task) { struct iscsi_conn *conn = task->conn; - list_move_tail(&task->running, &conn->requeue); + /* + * this may be on the requeue list already if the xmit_task callout + * is handling the r2ts while we are adding new ones + */ + if (list_empty(&task->running)) + list_add_tail(&task->running, &conn->requeue); iscsi_conn_queue_work(conn); } EXPORT_SYMBOL_GPL(iscsi_requeue_task); @@ -1216,6 +1242,7 @@ check_mgmt: while (!list_empty(&conn->mgmtqueue)) { conn->task = list_entry(conn->mgmtqueue.next, struct iscsi_task, running); + list_del_init(&conn->task->running); if (iscsi_prep_mgmt_task(conn, conn->task)) { __iscsi_put_task(conn->task); conn->task = NULL; @@ -1227,23 +1254,26 @@ check_mgmt: } /* process pending command queue */ - while (!list_empty(&conn->xmitqueue)) { + while (!list_empty(&conn->cmdqueue)) { if (conn->tmf_state == TMF_QUEUED) break; - conn->task = list_entry(conn->xmitqueue.next, + conn->task = list_entry(conn->cmdqueue.next, struct iscsi_task, running); + list_del_init(&conn->task->running); if (conn->session->state == ISCSI_STATE_LOGGING_OUT) { - fail_command(conn, conn->task, DID_IMM_RETRY << 16); + fail_scsi_task(conn->task, DID_IMM_RETRY << 16); continue; } rc = iscsi_prep_scsi_cmd_pdu(conn->task); if (rc) { if (rc == -ENOMEM) { + list_add_tail(&conn->task->running, + &conn->cmdqueue); conn->task = NULL; goto again; } else - fail_command(conn, conn->task, DID_ABORT << 16); + fail_scsi_task(conn->task, DID_ABORT << 16); continue; } rc = iscsi_xmit_task(conn); @@ -1270,8 +1300,8 @@ check_mgmt: conn->task = list_entry(conn->requeue.next, struct iscsi_task, running); + list_del_init(&conn->task->running); conn->task->state = ISCSI_TASK_RUNNING; - list_move_tail(conn->requeue.next, &conn->run_list); rc = iscsi_xmit_task(conn); if (rc) goto again; @@ -1412,7 +1442,6 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *)) reason = FAILURE_OOM; goto reject; } - list_add_tail(&task->running, &conn->xmitqueue); if (session->tt->caps & CAP_DATA_PATH_OFFLOAD) { reason = iscsi_prep_scsi_cmd_pdu(task); @@ -1429,8 +1458,10 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *)) reason = FAILURE_SESSION_NOT_READY; goto prepd_reject; } - } else + } else { + list_add_tail(&task->running, &conn->cmdqueue); iscsi_conn_queue_work(conn); + } session->queued_cmdsn++; spin_unlock(&session->lock); @@ -1439,7 +1470,7 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *)) prepd_reject: sc->scsi_done = NULL; - iscsi_complete_command(task); + iscsi_complete_task(task); reject: spin_unlock(&session->lock); ISCSI_DBG_SESSION(session, "cmd 0x%x rejected (%d)\n", @@ -1449,7 +1480,7 @@ reject: prepd_fault: sc->scsi_done = NULL; - iscsi_complete_command(task); + iscsi_complete_task(task); fault: spin_unlock(&session->lock); ISCSI_DBG_SESSION(session, "iscsi: cmd 0x%x is not queued (%d)\n", @@ -1618,44 +1649,24 @@ static int iscsi_exec_task_mgmt_fn(struct iscsi_conn *conn, * Fail commands. session lock held and recv side suspended and xmit * thread flushed */ -static void fail_all_commands(struct iscsi_conn *conn, unsigned lun, - int error) +static void fail_scsi_tasks(struct iscsi_conn *conn, unsigned lun, + int error) { - struct iscsi_task *task, *tmp; - - if (conn->task) { - if (lun == -1 || - (conn->task->sc && conn->task->sc->device->lun == lun)) - conn->task = NULL; - } + struct iscsi_task *task; + int i; - /* flush pending */ - list_for_each_entry_safe(task, tmp, &conn->xmitqueue, running) { - if (lun == task->sc->device->lun || lun == -1) { - ISCSI_DBG_SESSION(conn->session, - "failing pending sc %p itt 0x%x\n", - task->sc, task->itt); - fail_command(conn, task, error << 16); - } - } + for (i = 0; i < conn->session->cmds_max; i++) { + task = conn->session->cmds[i]; + if (!task->sc || task->state == ISCSI_TASK_FREE) + continue; - list_for_each_entry_safe(task, tmp, &conn->requeue, running) { - if (lun == task->sc->device->lun || lun == -1) { - ISCSI_DBG_SESSION(conn->session, - "failing requeued sc %p itt 0x%x\n", - task->sc, task->itt); - fail_command(conn, task, error << 16); - } - } + if (lun != -1 && lun != task->sc->device->lun) + continue; - /* fail all other running */ - list_for_each_entry_safe(task, tmp, &conn->run_list, running) { - if (lun == task->sc->device->lun || lun == -1) { - ISCSI_DBG_SESSION(conn->session, - "failing in progress sc %p itt 0x%x\n", - task->sc, task->itt); - fail_command(conn, task, error << 16); - } + ISCSI_DBG_SESSION(conn->session, + "failing sc %p itt 0x%x state %d\n", + task->sc, task->itt, task->state); + fail_scsi_task(task, error << 16); } } @@ -1859,7 +1870,7 @@ int iscsi_eh_abort(struct scsi_cmnd *sc) } if (task->state == ISCSI_TASK_PENDING) { - fail_command(conn, task, DID_ABORT << 16); + fail_scsi_task(task, DID_ABORT << 16); goto success; } @@ -1890,7 +1901,7 @@ int iscsi_eh_abort(struct scsi_cmnd *sc) * then sent more data for the cmd. */ spin_lock(&session->lock); - fail_command(conn, task, DID_ABORT << 16); + fail_scsi_task(task, DID_ABORT << 16); conn->tmf_state = TMF_INITIAL; spin_unlock(&session->lock); iscsi_start_tx(conn); @@ -1997,7 +2008,7 @@ int iscsi_eh_device_reset(struct scsi_cmnd *sc) iscsi_suspend_tx(conn); spin_lock_bh(&session->lock); - fail_all_commands(conn, sc->device->lun, DID_ERROR); + fail_scsi_tasks(conn, sc->device->lun, DID_ERROR); conn->tmf_state = TMF_INITIAL; spin_unlock_bh(&session->lock); @@ -2304,6 +2315,7 @@ iscsi_session_setup(struct iscsi_transport *iscsit, struct Scsi_Host *shost, if (cmd_task_size) task->dd_data = &task[1]; task->itt = cmd_i; + task->state = ISCSI_TASK_FREE; INIT_LIST_HEAD(&task->running); } @@ -2390,10 +2402,8 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size, conn->transport_timer.data = (unsigned long)conn; conn->transport_timer.function = iscsi_check_transport_timeouts; - INIT_LIST_HEAD(&conn->run_list); - INIT_LIST_HEAD(&conn->mgmt_run_list); INIT_LIST_HEAD(&conn->mgmtqueue); - INIT_LIST_HEAD(&conn->xmitqueue); + INIT_LIST_HEAD(&conn->cmdqueue); INIT_LIST_HEAD(&conn->requeue); INIT_WORK(&conn->xmitwork, iscsi_xmitworker); @@ -2561,27 +2571,24 @@ int iscsi_conn_start(struct iscsi_cls_conn *cls_conn) EXPORT_SYMBOL_GPL(iscsi_conn_start); static void -flush_control_queues(struct iscsi_session *session, struct iscsi_conn *conn) +fail_mgmt_tasks(struct iscsi_session *session, struct iscsi_conn *conn) { - struct iscsi_task *task, *tmp; + struct iscsi_task *task; + int i; - /* handle pending */ - list_for_each_entry_safe(task, tmp, &conn->mgmtqueue, running) { - ISCSI_DBG_SESSION(session, "flushing pending mgmt task " - "itt 0x%x\n", task->itt); - /* release ref from prep task */ - __iscsi_put_task(task); - } + for (i = 0; i < conn->session->cmds_max; i++) { + task = conn->session->cmds[i]; + if (task->sc) + continue; - /* handle running */ - list_for_each_entry_safe(task, tmp, &conn->mgmt_run_list, running) { - ISCSI_DBG_SESSION(session, "flushing running mgmt task " - "itt 0x%x\n", task->itt); - /* release ref from prep task */ - __iscsi_put_task(task); - } + if (task->state == ISCSI_TASK_FREE) + continue; - conn->task = NULL; + ISCSI_DBG_SESSION(conn->session, + "failing mgmt itt 0x%x state %d\n", + task->itt, task->state); + iscsi_complete_task(task); + } } static void iscsi_start_session_recovery(struct iscsi_session *session, @@ -2638,10 +2645,10 @@ static void iscsi_start_session_recovery(struct iscsi_session *session, */ spin_lock_bh(&session->lock); if (flag == STOP_CONN_RECOVER) - fail_all_commands(conn, -1, DID_TRANSPORT_DISRUPTED); + fail_scsi_tasks(conn, -1, DID_TRANSPORT_DISRUPTED); else - fail_all_commands(conn, -1, DID_ERROR); - flush_control_queues(session, conn); + fail_scsi_tasks(conn, -1, DID_ERROR); + fail_mgmt_tasks(session, conn); spin_unlock_bh(&session->lock); mutex_unlock(&session->eh_mutex); } diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index 88d33a46efa..facae71183a 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -82,6 +82,7 @@ enum { enum { + ISCSI_TASK_FREE, ISCSI_TASK_COMPLETED, ISCSI_TASK_PENDING, ISCSI_TASK_RUNNING, @@ -181,9 +182,7 @@ struct iscsi_conn { /* xmit */ struct list_head mgmtqueue; /* mgmt (control) xmit queue */ - struct list_head mgmt_run_list; /* list of control tasks */ - struct list_head xmitqueue; /* data-path cmd queue */ - struct list_head run_list; /* list of cmds in progress */ + struct list_head cmdqueue; /* data-path cmd queue */ struct list_head requeue; /* tasks needing another run */ struct work_struct xmitwork; /* per-conn. xmit workqueue */ unsigned long suspend_tx; /* suspend Tx */ -- cgit v1.2.3-70-g09d2 From b3cd5050bf8eb32ceecee129cac7c59e6f1668c4 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 13 May 2009 17:57:49 -0500 Subject: [SCSI] libiscsi: add task aborted state If a task did not complete normally due to a TMF, libiscsi will now complete the task with the state ISCSI_TASK_ABRT_TMF. Drivers like bnx2i that need to free resources if a command did not complete normally can then check the task state. If a driver does not need to send a special command if we have dropped the session then they can check for ISCSI_TASK_ABRT_SESS_RECOV. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/infiniband/ulp/iser/iscsi_iser.c | 7 ++-- drivers/scsi/libiscsi.c | 60 +++++++++++++++++++------------- drivers/scsi/libiscsi_tcp.c | 4 +-- include/scsi/libiscsi.h | 2 ++ 4 files changed, 41 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index ffbe0c76bc1..0ba6ec87629 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -257,11 +257,8 @@ static void iscsi_iser_cleanup_task(struct iscsi_task *task) { struct iscsi_iser_task *iser_task = task->dd_data; - /* - * mgmt tasks do not need special cleanup and we do not - * allocate anything in the init task callout - */ - if (!task->sc || task->state == ISCSI_TASK_PENDING) + /* mgmt tasks do not need special cleanup */ + if (!task->sc) return; if (iser_task->status == ISER_TASK_STATUS_STARTED) { diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index dafa054537f..b00be6c3efc 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -443,18 +443,20 @@ EXPORT_SYMBOL_GPL(iscsi_put_task); /** * iscsi_complete_task - finish a task * @task: iscsi cmd task + * @state: state to complete task with * * Must be called with session lock. */ -static void iscsi_complete_task(struct iscsi_task *task) +static void iscsi_complete_task(struct iscsi_task *task, int state) { struct iscsi_conn *conn = task->conn; - if (task->state == ISCSI_TASK_COMPLETED) + if (task->state == ISCSI_TASK_COMPLETED || + task->state == ISCSI_TASK_ABRT_TMF || + task->state == ISCSI_TASK_ABRT_SESS_RECOV) return; WARN_ON_ONCE(task->state == ISCSI_TASK_FREE); - - task->state = ISCSI_TASK_COMPLETED; + task->state = state; if (!list_empty(&task->running)) list_del_init(&task->running); @@ -478,6 +480,7 @@ static void fail_scsi_task(struct iscsi_task *task, int err) { struct iscsi_conn *conn = task->conn; struct scsi_cmnd *sc; + int state; /* * if a command completes and we get a successful tmf response @@ -488,14 +491,20 @@ static void fail_scsi_task(struct iscsi_task *task, int err) if (!sc) return; - if (task->state == ISCSI_TASK_PENDING) + if (task->state == ISCSI_TASK_PENDING) { /* * cmd never made it to the xmit thread, so we should not count * the cmd in the sequencing */ conn->session->queued_cmdsn--; + /* it was never sent so just complete like normal */ + state = ISCSI_TASK_COMPLETED; + } else if (err == DID_TRANSPORT_DISRUPTED) + state = ISCSI_TASK_ABRT_SESS_RECOV; + else + state = ISCSI_TASK_ABRT_TMF; - sc->result = err; + sc->result = err << 16; if (!scsi_bidi_cmnd(sc)) scsi_set_resid(sc, scsi_bufflen(sc)); else { @@ -503,7 +512,7 @@ static void fail_scsi_task(struct iscsi_task *task, int err) scsi_in(sc)->resid = scsi_in(sc)->length; } - iscsi_complete_task(task); + iscsi_complete_task(task, state); } static int iscsi_prep_mgmt_task(struct iscsi_conn *conn, @@ -731,7 +740,7 @@ out: ISCSI_DBG_SESSION(session, "cmd rsp done [sc %p res %d itt 0x%x]\n", sc, sc->result, task->itt); conn->scsirsp_pdus_cnt++; - iscsi_complete_task(task); + iscsi_complete_task(task, ISCSI_TASK_COMPLETED); } /** @@ -769,7 +778,7 @@ iscsi_data_in_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr, "[sc %p res %d itt 0x%x]\n", sc, sc->result, task->itt); conn->scsirsp_pdus_cnt++; - iscsi_complete_task(task); + iscsi_complete_task(task, ISCSI_TASK_COMPLETED); } static void iscsi_tmf_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr) @@ -990,7 +999,7 @@ int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr, } iscsi_tmf_rsp(conn, hdr); - iscsi_complete_task(task); + iscsi_complete_task(task, ISCSI_TASK_COMPLETED); break; case ISCSI_OP_NOOP_IN: iscsi_update_cmdsn(session, (struct iscsi_nopin*)hdr); @@ -1008,7 +1017,7 @@ int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr, goto recv_pdu; mod_timer(&conn->transport_timer, jiffies + conn->recv_timeout); - iscsi_complete_task(task); + iscsi_complete_task(task, ISCSI_TASK_COMPLETED); break; default: rc = ISCSI_ERR_BAD_OPCODE; @@ -1020,7 +1029,7 @@ out: recv_pdu: if (iscsi_recv_pdu(conn->cls_conn, hdr, data, datalen)) rc = ISCSI_ERR_CONN_FAILED; - iscsi_complete_task(task); + iscsi_complete_task(task, ISCSI_TASK_COMPLETED); return rc; } EXPORT_SYMBOL_GPL(__iscsi_complete_pdu); @@ -1262,7 +1271,7 @@ check_mgmt: struct iscsi_task, running); list_del_init(&conn->task->running); if (conn->session->state == ISCSI_STATE_LOGGING_OUT) { - fail_scsi_task(conn->task, DID_IMM_RETRY << 16); + fail_scsi_task(conn->task, DID_IMM_RETRY); continue; } rc = iscsi_prep_scsi_cmd_pdu(conn->task); @@ -1273,7 +1282,7 @@ check_mgmt: conn->task = NULL; goto again; } else - fail_scsi_task(conn->task, DID_ABORT << 16); + fail_scsi_task(conn->task, DID_ABORT); continue; } rc = iscsi_xmit_task(conn); @@ -1469,7 +1478,7 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *)) prepd_reject: sc->scsi_done = NULL; - iscsi_complete_task(task); + iscsi_complete_task(task, ISCSI_TASK_COMPLETED); reject: spin_unlock(&session->lock); ISCSI_DBG_SESSION(session, "cmd 0x%x rejected (%d)\n", @@ -1479,7 +1488,7 @@ reject: prepd_fault: sc->scsi_done = NULL; - iscsi_complete_task(task); + iscsi_complete_task(task, ISCSI_TASK_COMPLETED); fault: spin_unlock(&session->lock); ISCSI_DBG_SESSION(session, "iscsi: cmd 0x%x is not queued (%d)\n", @@ -1665,7 +1674,7 @@ static void fail_scsi_tasks(struct iscsi_conn *conn, unsigned lun, ISCSI_DBG_SESSION(conn->session, "failing sc %p itt 0x%x state %d\n", task->sc, task->itt, task->state); - fail_scsi_task(task, error << 16); + fail_scsi_task(task, error); } } @@ -1868,7 +1877,7 @@ int iscsi_eh_abort(struct scsi_cmnd *sc) } if (task->state == ISCSI_TASK_PENDING) { - fail_scsi_task(task, DID_ABORT << 16); + fail_scsi_task(task, DID_ABORT); goto success; } @@ -1899,7 +1908,7 @@ int iscsi_eh_abort(struct scsi_cmnd *sc) * then sent more data for the cmd. */ spin_lock(&session->lock); - fail_scsi_task(task, DID_ABORT << 16); + fail_scsi_task(task, DID_ABORT); conn->tmf_state = TMF_INITIAL; spin_unlock(&session->lock); iscsi_start_tx(conn); @@ -2572,7 +2581,7 @@ static void fail_mgmt_tasks(struct iscsi_session *session, struct iscsi_conn *conn) { struct iscsi_task *task; - int i; + int i, state; for (i = 0; i < conn->session->cmds_max; i++) { task = conn->session->cmds[i]; @@ -2585,7 +2594,11 @@ fail_mgmt_tasks(struct iscsi_session *session, struct iscsi_conn *conn) ISCSI_DBG_SESSION(conn->session, "failing mgmt itt 0x%x state %d\n", task->itt, task->state); - iscsi_complete_task(task); + state = ISCSI_TASK_ABRT_SESS_RECOV; + if (task->state == ISCSI_TASK_PENDING) + state = ISCSI_TASK_COMPLETED; + iscsi_complete_task(task, state); + } } @@ -2642,10 +2655,7 @@ static void iscsi_start_session_recovery(struct iscsi_session *session, * flush queues. */ spin_lock_bh(&session->lock); - if (flag == STOP_CONN_RECOVER) - fail_scsi_tasks(conn, -1, DID_TRANSPORT_DISRUPTED); - else - fail_scsi_tasks(conn, -1, DID_ERROR); + fail_scsi_tasks(conn, -1, DID_TRANSPORT_DISRUPTED); fail_mgmt_tasks(session, conn); spin_unlock_bh(&session->lock); mutex_unlock(&session->eh_mutex); diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c index b84a1d853f2..2bc07090321 100644 --- a/drivers/scsi/libiscsi_tcp.c +++ b/drivers/scsi/libiscsi_tcp.c @@ -440,8 +440,8 @@ void iscsi_tcp_cleanup_task(struct iscsi_task *task) struct iscsi_tcp_task *tcp_task = task->dd_data; struct iscsi_r2t_info *r2t; - /* nothing to do for mgmt or pending tasks */ - if (!task->sc || task->state == ISCSI_TASK_PENDING) + /* nothing to do for mgmt */ + if (!task->sc) return; /* flush task's r2t queues */ diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index facae71183a..196525cd402 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -86,6 +86,8 @@ enum { ISCSI_TASK_COMPLETED, ISCSI_TASK_PENDING, ISCSI_TASK_RUNNING, + ISCSI_TASK_ABRT_TMF, /* aborted due to TMF */ + ISCSI_TASK_ABRT_SESS_RECOV, /* aborted due to session recovery */ }; struct iscsi_r2t_info { -- cgit v1.2.3-70-g09d2 From a366695592ebc9151dd5a248681270f0925d8324 Mon Sep 17 00:00:00 2001 From: Abhijeet Joglekar Date: Fri, 1 May 2009 10:01:26 -0700 Subject: [SCSI] libfc,fcoe,fnic: Separate rport and lport max retry counts This allows fnic to configure number of retries for lport and rport separately. Signed-off-by: Abhijeet Joglekar Acked-by: Robert Love Signed-off-by: James Bottomley --- drivers/scsi/fcoe/fcoe.c | 1 + drivers/scsi/fnic/fnic_main.c | 1 + drivers/scsi/libfc/fc_rport.c | 2 +- include/scsi/libfc.h | 1 + 4 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index d08121f246c..6acb7778f55 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -146,6 +146,7 @@ static int fcoe_lport_config(struct fc_lport *lp) lp->link_up = 0; lp->qfull = 0; lp->max_retry_count = 3; + lp->max_rport_retry_count = 3; lp->e_d_tov = 2 * 1000; /* FC-FS default */ lp->r_a_tov = 2 * 2 * 1000; lp->service_params = (FCP_SPPF_INIT_FCN | FCP_SPPF_RD_XRDY_DIS | diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c index 32ef6b87d89..a84072865fc 100644 --- a/drivers/scsi/fnic/fnic_main.c +++ b/drivers/scsi/fnic/fnic_main.c @@ -680,6 +680,7 @@ static int __devinit fnic_probe(struct pci_dev *pdev, } lp->max_retry_count = fnic->config.flogi_retries; + lp->max_rport_retry_count = fnic->config.plogi_retries; lp->service_params = (FCP_SPPF_INIT_FCN | FCP_SPPF_RD_XRDY_DIS | FCP_SPPF_CONF_COMPL); if (fnic->config.flags & VFCF_FCP_SEQ_LVL_ERR) diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c index 3f5094ebc39..7bfbff7e0ef 100644 --- a/drivers/scsi/libfc/fc_rport.c +++ b/drivers/scsi/libfc/fc_rport.c @@ -478,7 +478,7 @@ static void fc_rport_error_retry(struct fc_rport *rport, struct fc_frame *fp) if (PTR_ERR(fp) == -FC_EX_CLOSED) return fc_rport_error(rport, fp); - if (rdata->retries < rdata->local_port->max_retry_count) { + if (rdata->retries < rdata->local_port->max_rport_retry_count) { FC_DEBUG_RPORT("Error %ld in state %s, retrying\n", PTR_ERR(fp), fc_rport_state(rport)); rdata->retries++; diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h index 45f9cc642c4..ebdd9f4cf07 100644 --- a/include/scsi/libfc.h +++ b/include/scsi/libfc.h @@ -679,6 +679,7 @@ struct fc_lport { unsigned int e_d_tov; unsigned int r_a_tov; u8 max_retry_count; + u8 max_rport_retry_count; u16 link_speed; u16 link_supported_speeds; u16 lro_xid; /* max xid for fcoe lro */ -- cgit v1.2.3-70-g09d2 From 082ff5a2767a0679ee543f14883adbafb631ffbe Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 23 May 2009 18:29:00 +0200 Subject: perf_counter: Change pctrl() behaviour Instead of en/dis-abling all counters acting on a particular task, en/dis- able all counters we created. [ v2: fix crash on first counter enable ] Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Cc: Arnaldo Carvalho de Melo Cc: John Kacur LKML-Reference: <20090523163012.916937244@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 10 +++++ include/linux/perf_counter.h | 3 ++ include/linux/sched.h | 2 + kernel/perf_counter.c | 87 ++++++++++++-------------------------------- 4 files changed, 39 insertions(+), 63 deletions(-) (limited to 'include') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index d87247d2641..353c0ac7723 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -108,6 +108,15 @@ extern struct group_info init_groups; extern struct cred init_cred; +#ifdef CONFIG_PERF_COUNTERS +# define INIT_PERF_COUNTERS(tsk) \ + .perf_counter_mutex = \ + __MUTEX_INITIALIZER(tsk.perf_counter_mutex), \ + .perf_counter_list = LIST_HEAD_INIT(tsk.perf_counter_list), +#else +# define INIT_PERF_COUNTERS(tsk) +#endif + /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -171,6 +180,7 @@ extern struct cred init_cred; }, \ .dirties = INIT_PROP_LOCAL_SINGLE(dirties), \ INIT_IDS \ + INIT_PERF_COUNTERS(tsk) \ INIT_TRACE_IRQFLAGS \ INIT_LOCKDEP \ INIT_FTRACE_GRAPH \ diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 4ab8050eb9e..4159ee5940f 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -469,6 +469,9 @@ struct perf_counter { int oncpu; int cpu; + struct list_head owner_entry; + struct task_struct *owner; + /* mmap bits */ struct mutex mmap_mutex; atomic_t mmap_count; diff --git a/include/linux/sched.h b/include/linux/sched.h index 9714d450f41..bc9326dcdde 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1389,6 +1389,8 @@ struct task_struct { #endif #ifdef CONFIG_PERF_COUNTERS struct perf_counter_context *perf_counter_ctxp; + struct mutex perf_counter_mutex; + struct list_head perf_counter_list; #endif #ifdef CONFIG_NUMA struct mempolicy *mempolicy; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 0e97f896133..4c86a636976 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1076,79 +1076,26 @@ static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu) __perf_counter_sched_in(ctx, cpuctx, cpu); } -int perf_counter_task_disable(void) +int perf_counter_task_enable(void) { - struct task_struct *curr = current; - struct perf_counter_context *ctx = curr->perf_counter_ctxp; struct perf_counter *counter; - unsigned long flags; - - if (!ctx || !ctx->nr_counters) - return 0; - - local_irq_save(flags); - __perf_counter_task_sched_out(ctx); - - spin_lock(&ctx->lock); - - /* - * Disable all the counters: - */ - perf_disable(); - - list_for_each_entry(counter, &ctx->counter_list, list_entry) { - if (counter->state != PERF_COUNTER_STATE_ERROR) { - update_group_times(counter); - counter->state = PERF_COUNTER_STATE_OFF; - } - } - - perf_enable(); - - spin_unlock_irqrestore(&ctx->lock, flags); + mutex_lock(¤t->perf_counter_mutex); + list_for_each_entry(counter, ¤t->perf_counter_list, owner_entry) + perf_counter_enable(counter); + mutex_unlock(¤t->perf_counter_mutex); return 0; } -int perf_counter_task_enable(void) +int perf_counter_task_disable(void) { - struct task_struct *curr = current; - struct perf_counter_context *ctx = curr->perf_counter_ctxp; struct perf_counter *counter; - unsigned long flags; - int cpu; - - if (!ctx || !ctx->nr_counters) - return 0; - - local_irq_save(flags); - cpu = smp_processor_id(); - - __perf_counter_task_sched_out(ctx); - - spin_lock(&ctx->lock); - /* - * Disable all the counters: - */ - perf_disable(); - - list_for_each_entry(counter, &ctx->counter_list, list_entry) { - if (counter->state > PERF_COUNTER_STATE_OFF) - continue; - counter->state = PERF_COUNTER_STATE_INACTIVE; - counter->tstamp_enabled = - ctx->time - counter->total_time_enabled; - counter->hw_event.disabled = 0; - } - perf_enable(); - - spin_unlock(&ctx->lock); - - perf_counter_task_sched_in(curr, cpu); - - local_irq_restore(flags); + mutex_lock(¤t->perf_counter_mutex); + list_for_each_entry(counter, ¤t->perf_counter_list, owner_entry) + perf_counter_disable(counter); + mutex_unlock(¤t->perf_counter_mutex); return 0; } @@ -1416,6 +1363,11 @@ static int perf_release(struct inode *inode, struct file *file) perf_counter_remove_from_context(counter); mutex_unlock(&ctx->mutex); + mutex_lock(&counter->owner->perf_counter_mutex); + list_del_init(&counter->owner_entry); + mutex_unlock(&counter->owner->perf_counter_mutex); + put_task_struct(counter->owner); + free_counter(counter); put_context(ctx); @@ -3272,6 +3224,12 @@ SYSCALL_DEFINE5(perf_counter_open, perf_install_in_context(ctx, counter, cpu); mutex_unlock(&ctx->mutex); + counter->owner = current; + get_task_struct(current); + mutex_lock(¤t->perf_counter_mutex); + list_add_tail(&counter->owner_entry, ¤t->perf_counter_list); + mutex_unlock(¤t->perf_counter_mutex); + fput_light(counter_file, fput_needed2); out_fput: @@ -3488,6 +3446,9 @@ void perf_counter_init_task(struct task_struct *child) child->perf_counter_ctxp = NULL; + mutex_init(&child->perf_counter_mutex); + INIT_LIST_HEAD(&child->perf_counter_list); + /* * This is executed from the parent task context, so inherit * counters that have been marked for cloning. -- cgit v1.2.3-70-g09d2 From 475c55797323b67435083f6e2eb8ee670f6410ec Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 23 May 2009 18:29:01 +0200 Subject: perf_counter: Remove perf_counter_context::nr_enabled now that pctrl() no longer disables other people's counters, remove the PMU cache code that deals with that. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Cc: Arnaldo Carvalho de Melo Cc: John Kacur LKML-Reference: <20090523163013.032998331@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 1 - kernel/perf_counter.c | 11 +---------- 2 files changed, 1 insertion(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 4159ee5940f..2ddf5e3c551 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -516,7 +516,6 @@ struct perf_counter_context { struct list_head event_list; int nr_counters; int nr_active; - int nr_enabled; int is_active; atomic_t refcount; struct task_struct *task; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 4c86a636976..cb4062559b4 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -134,8 +134,6 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) list_add_rcu(&counter->event_entry, &ctx->event_list); ctx->nr_counters++; - if (counter->state >= PERF_COUNTER_STATE_INACTIVE) - ctx->nr_enabled++; } /* @@ -150,8 +148,6 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx) if (list_empty(&counter->list_entry)) return; ctx->nr_counters--; - if (counter->state >= PERF_COUNTER_STATE_INACTIVE) - ctx->nr_enabled--; list_del_init(&counter->list_entry); list_del_rcu(&counter->event_entry); @@ -406,7 +402,6 @@ static void __perf_counter_disable(void *info) else counter_sched_out(counter, cpuctx, ctx); counter->state = PERF_COUNTER_STATE_OFF; - ctx->nr_enabled--; } spin_unlock_irqrestore(&ctx->lock, flags); @@ -448,7 +443,6 @@ static void perf_counter_disable(struct perf_counter *counter) if (counter->state == PERF_COUNTER_STATE_INACTIVE) { update_counter_times(counter); counter->state = PERF_COUNTER_STATE_OFF; - ctx->nr_enabled--; } spin_unlock_irq(&ctx->lock); @@ -759,7 +753,6 @@ static void __perf_counter_enable(void *info) goto unlock; counter->state = PERF_COUNTER_STATE_INACTIVE; counter->tstamp_enabled = ctx->time - counter->total_time_enabled; - ctx->nr_enabled++; /* * If the counter is in a group and isn't the group leader, @@ -850,7 +843,6 @@ static void perf_counter_enable(struct perf_counter *counter) counter->state = PERF_COUNTER_STATE_INACTIVE; counter->tstamp_enabled = ctx->time - counter->total_time_enabled; - ctx->nr_enabled++; } out: spin_unlock_irq(&ctx->lock); @@ -910,8 +902,7 @@ static int context_equiv(struct perf_counter_context *ctx1, struct perf_counter_context *ctx2) { return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx - && ctx1->parent_gen == ctx2->parent_gen - && ctx1->nr_enabled == ctx2->nr_enabled; + && ctx1->parent_gen == ctx2->parent_gen; } /* -- cgit v1.2.3-70-g09d2 From 7df3bb8f59ca8e346bb834006c257cc367c6250a Mon Sep 17 00:00:00 2001 From: Andreas Eversberg Date: Fri, 22 May 2009 11:04:44 +0000 Subject: mISDN: Add watchdog functionality to hfcmulti driver This patch was made by Titus Moldovan and provides IOCTL functions for enabling and disabling the controller's built in watchdog. The use is optional. Signed-off-by: Andreas Eversberg Signed-off-by: Karsten Keil Signed-off-by: David S. Miller --- drivers/isdn/hardware/mISDN/hfc_multi.h | 1 + drivers/isdn/hardware/mISDN/hfcmulti.c | 32 +++++++++++++++++++++++++++++++- include/linux/mISDNif.h | 3 ++- 3 files changed, 34 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/isdn/hardware/mISDN/hfc_multi.h b/drivers/isdn/hardware/mISDN/hfc_multi.h index 663b77f578b..5765e196291 100644 --- a/drivers/isdn/hardware/mISDN/hfc_multi.h +++ b/drivers/isdn/hardware/mISDN/hfc_multi.h @@ -62,6 +62,7 @@ struct hfcm_hw { u_char r_sci_msk; u_char r_tx0, r_tx1; u_char a_st_ctrl0[8]; + u_char r_bert_wd_md; timer_t timer; }; diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c index 0b28141e43b..ca153de6954 100644 --- a/drivers/isdn/hardware/mISDN/hfcmulti.c +++ b/drivers/isdn/hardware/mISDN/hfcmulti.c @@ -4013,11 +4013,41 @@ open_bchannel(struct hfc_multi *hc, struct dchannel *dch, static int channel_dctrl(struct dchannel *dch, struct mISDN_ctrl_req *cq) { + struct hfc_multi *hc = dch->hw; int ret = 0; + int wd_mode, wd_cnt; switch (cq->op) { case MISDN_CTRL_GETOP: - cq->op = 0; + cq->op = MISDN_CTRL_HFC_OP; + break; + case MISDN_CTRL_HFC_WD_INIT: /* init the watchdog */ + wd_cnt = cq->p1 & 0xf; + wd_mode = !!(cq->p1 >> 4); + if (debug & DEBUG_HFCMULTI_MSG) + printk(KERN_DEBUG + "%s: MISDN_CTRL_HFC_WD_INIT mode %s counter 0x%x\n", + __func__, wd_mode ? "AUTO" : "MANUAL", wd_cnt); + /* set the watchdog timer */ + HFC_outb(hc, R_TI_WD, poll_timer | (wd_cnt << 4)); + hc->hw.r_bert_wd_md = (wd_mode ? V_AUTO_WD_RES : 0); + if (hc->ctype == HFC_TYPE_XHFC) + hc->hw.r_bert_wd_md |= 0x40 /* V_WD_EN */; + /* init the watchdog register and reset the counter */ + HFC_outb(hc, R_BERT_WD_MD, hc->hw.r_bert_wd_md | V_WD_RES); + if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) { + /* enable the watchdog output for Speech-Design */ + HFC_outb(hc, R_GPIO_SEL, V_GPIO_SEL7); + HFC_outb(hc, R_GPIO_EN1, V_GPIO_EN15); + HFC_outb(hc, R_GPIO_OUT1, 0); + HFC_outb(hc, R_GPIO_OUT1, V_GPIO_OUT15); + } + break; + case MISDN_CTRL_HFC_WD_RESET: /* reset the watchdog counter */ + if (debug & DEBUG_HFCMULTI_MSG) + printk(KERN_DEBUG "%s: MISDN_CTRL_HFC_WD_RESET\n", + __func__); + HFC_outb(hc, R_BERT_WD_MD, hc->hw.r_bert_wd_md | V_WD_RES); break; default: printk(KERN_WARNING "%s: unknown Op %x\n", diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index 5da3d95b27f..cf974593a99 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -362,7 +362,8 @@ clear_channelmap(u_int nr, u_char *map) #define MISDN_CTRL_HFC_RECEIVE_ON 0x4006 #define MISDN_CTRL_HFC_ECHOCAN_ON 0x4007 #define MISDN_CTRL_HFC_ECHOCAN_OFF 0x4008 - +#define MISDN_CTRL_HFC_WD_INIT 0x4009 +#define MISDN_CTRL_HFC_WD_RESET 0x400A /* socket options */ #define MISDN_TIME_STAMP 0x0001 -- cgit v1.2.3-70-g09d2 From 7cfa153dd709f15188fe84b78ae76387841fe17b Mon Sep 17 00:00:00 2001 From: Andreas Eversberg Date: Fri, 22 May 2009 11:04:46 +0000 Subject: mISDN: Echo canceler now gets delay information from hardware Added tx-fifo information for calculation of current delay to sync tx and rx streams for echo canceler. Signed-off-by: Andreas Eversberg Signed-off-by: Karsten Keil Signed-off-by: David S. Miller --- drivers/isdn/hardware/mISDN/hfc_multi.h | 1 + drivers/isdn/hardware/mISDN/hfcmulti.c | 8 +++- drivers/isdn/hardware/mISDN/hfcpci.c | 70 +++++++++++++++++++-------------- drivers/isdn/hardware/mISDN/hfcsusb.c | 4 +- drivers/isdn/mISDN/dsp.h | 2 +- drivers/isdn/mISDN/dsp_core.c | 2 +- drivers/isdn/mISDN/dsp_pipeline.c | 5 ++- drivers/isdn/mISDN/hwchannel.c | 4 +- include/linux/mISDNdsp.h | 3 +- include/linux/mISDNhw.h | 2 +- 10 files changed, 60 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/drivers/isdn/hardware/mISDN/hfc_multi.h b/drivers/isdn/hardware/mISDN/hfc_multi.h index 5765e196291..c4878cc712c 100644 --- a/drivers/isdn/hardware/mISDN/hfc_multi.h +++ b/drivers/isdn/hardware/mISDN/hfc_multi.h @@ -44,6 +44,7 @@ struct hfc_chan { int conf; /* conference setting of TX slot */ int txpending; /* if there is currently data in */ /* the FIFO 0=no, 1=yes, 2=splloop */ + int Zfill; /* rx-fifo level on last hfcmulti_tx */ int rx_off; /* set to turn fifo receive off */ int coeff_count; /* curren coeff block */ s32 *coeff; /* memory pointer to 8 coeff blocks */ diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c index ca153de6954..3a7c26ce12c 100644 --- a/drivers/isdn/hardware/mISDN/hfcmulti.c +++ b/drivers/isdn/hardware/mISDN/hfcmulti.c @@ -1945,6 +1945,9 @@ next_frame: "%d!=%d\n", __func__, hc->id + 1, temp, z2); z2 = temp; /* repeat unti Z2 is equal */ } + hc->chan[ch].Zfill = z1 - z2; + if (hc->chan[ch].Zfill < 0) + hc->chan[ch].Zfill += hc->Zlen; Zspace = z2 - z1; if (Zspace <= 0) Zspace += hc->Zlen; @@ -2031,6 +2034,7 @@ next_frame: /* Have to prep the audio data */ hc->write_fifo(hc, d, ii - i); + hc->chan[ch].Zfill += ii - i; *idxp = ii; /* if not all data has been written */ @@ -2226,7 +2230,7 @@ next_frame: if (dch) recv_Dchannel(dch); else - recv_Bchannel(bch); + recv_Bchannel(bch, MISDN_ID_ANY); *sp = skb; again++; goto next_frame; @@ -2258,7 +2262,7 @@ next_frame: "(z1=%04x, z2=%04x) TRANS\n", __func__, hc->id + 1, ch, Zsize, z1, z2); /* only bch is transparent */ - recv_Bchannel(bch); + recv_Bchannel(bch, hc->chan[ch].Zfill); *sp = skb; } } diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c index 641a9cd1a53..60dc92562c6 100644 --- a/drivers/isdn/hardware/mISDN/hfcpci.c +++ b/drivers/isdn/hardware/mISDN/hfcpci.c @@ -452,7 +452,7 @@ hfcpci_empty_bfifo(struct bchannel *bch, struct bzfifo *bz, } bz->za[new_f2].z2 = cpu_to_le16(new_z2); bz->f2 = new_f2; /* next buffer */ - recv_Bchannel(bch); + recv_Bchannel(bch, MISDN_ID_ANY); } } @@ -541,35 +541,45 @@ receive_dmsg(struct hfc_pci *hc) * check for transparent receive data and read max one 'poll' size if avail */ static void -hfcpci_empty_fifo_trans(struct bchannel *bch, struct bzfifo *bz, u_char *bdata) +hfcpci_empty_fifo_trans(struct bchannel *bch, struct bzfifo *rxbz, + struct bzfifo *txbz, u_char *bdata) { - __le16 *z1r, *z2r; - int new_z2, fcnt, maxlen; - u_char *ptr, *ptr1; + __le16 *z1r, *z2r, *z1t, *z2t; + int new_z2, fcnt_rx, fcnt_tx, maxlen; + u_char *ptr, *ptr1; - z1r = &bz->za[MAX_B_FRAMES].z1; /* pointer to z reg */ + z1r = &rxbz->za[MAX_B_FRAMES].z1; /* pointer to z reg */ z2r = z1r + 1; + z1t = &txbz->za[MAX_B_FRAMES].z1; + z2t = z1t + 1; - fcnt = le16_to_cpu(*z1r) - le16_to_cpu(*z2r); - if (!fcnt) + fcnt_rx = le16_to_cpu(*z1r) - le16_to_cpu(*z2r); + if (!fcnt_rx) return; /* no data avail */ - if (fcnt <= 0) - fcnt += B_FIFO_SIZE; /* bytes actually buffered */ - new_z2 = le16_to_cpu(*z2r) + fcnt; /* new position in fifo */ + if (fcnt_rx <= 0) + fcnt_rx += B_FIFO_SIZE; /* bytes actually buffered */ + new_z2 = le16_to_cpu(*z2r) + fcnt_rx; /* new position in fifo */ if (new_z2 >= (B_FIFO_SIZE + B_SUB_VAL)) new_z2 -= B_FIFO_SIZE; /* buffer wrap */ - if (fcnt > MAX_DATA_SIZE) { /* flush, if oversized */ + if (fcnt_rx > MAX_DATA_SIZE) { /* flush, if oversized */ *z2r = cpu_to_le16(new_z2); /* new position */ return; } - bch->rx_skb = mI_alloc_skb(fcnt, GFP_ATOMIC); + fcnt_tx = le16_to_cpu(*z2t) - le16_to_cpu(*z1t); + if (fcnt_tx <= 0) + fcnt_tx += B_FIFO_SIZE; + /* fcnt_tx contains available bytes in tx-fifo */ + fcnt_tx = B_FIFO_SIZE - fcnt_tx; + /* remaining bytes to send (bytes in tx-fifo) */ + + bch->rx_skb = mI_alloc_skb(fcnt_rx, GFP_ATOMIC); if (bch->rx_skb) { - ptr = skb_put(bch->rx_skb, fcnt); - if (le16_to_cpu(*z2r) + fcnt <= B_FIFO_SIZE + B_SUB_VAL) - maxlen = fcnt; /* complete transfer */ + ptr = skb_put(bch->rx_skb, fcnt_rx); + if (le16_to_cpu(*z2r) + fcnt_rx <= B_FIFO_SIZE + B_SUB_VAL) + maxlen = fcnt_rx; /* complete transfer */ else maxlen = B_FIFO_SIZE + B_SUB_VAL - le16_to_cpu(*z2r); /* maximum */ @@ -577,14 +587,14 @@ hfcpci_empty_fifo_trans(struct bchannel *bch, struct bzfifo *bz, u_char *bdata) ptr1 = bdata + (le16_to_cpu(*z2r) - B_SUB_VAL); /* start of data */ memcpy(ptr, ptr1, maxlen); /* copy data */ - fcnt -= maxlen; + fcnt_rx -= maxlen; - if (fcnt) { /* rest remaining */ + if (fcnt_rx) { /* rest remaining */ ptr += maxlen; ptr1 = bdata; /* start of buffer */ - memcpy(ptr, ptr1, fcnt); /* rest */ + memcpy(ptr, ptr1, fcnt_rx); /* rest */ } - recv_Bchannel(bch); + recv_Bchannel(bch, fcnt_tx); /* bch, id */ } else printk(KERN_WARNING "HFCPCI: receive out of memory\n"); @@ -600,26 +610,28 @@ main_rec_hfcpci(struct bchannel *bch) struct hfc_pci *hc = bch->hw; int rcnt, real_fifo; int receive = 0, count = 5; - struct bzfifo *bz; + struct bzfifo *txbz, *rxbz; u_char *bdata; struct zt *zp; if ((bch->nr & 2) && (!hc->hw.bswapped)) { - bz = &((union fifo_area *)(hc->hw.fifos))->b_chans.rxbz_b2; + rxbz = &((union fifo_area *)(hc->hw.fifos))->b_chans.rxbz_b2; + txbz = &((union fifo_area *)(hc->hw.fifos))->b_chans.txbz_b2; bdata = ((union fifo_area *)(hc->hw.fifos))->b_chans.rxdat_b2; real_fifo = 1; } else { - bz = &((union fifo_area *)(hc->hw.fifos))->b_chans.rxbz_b1; + rxbz = &((union fifo_area *)(hc->hw.fifos))->b_chans.rxbz_b1; + txbz = &((union fifo_area *)(hc->hw.fifos))->b_chans.txbz_b1; bdata = ((union fifo_area *)(hc->hw.fifos))->b_chans.rxdat_b1; real_fifo = 0; } Begin: count--; - if (bz->f1 != bz->f2) { + if (rxbz->f1 != rxbz->f2) { if (bch->debug & DEBUG_HW_BCHANNEL) printk(KERN_DEBUG "hfcpci rec ch(%x) f1(%d) f2(%d)\n", - bch->nr, bz->f1, bz->f2); - zp = &bz->za[bz->f2]; + bch->nr, rxbz->f1, rxbz->f2); + zp = &rxbz->za[rxbz->f2]; rcnt = le16_to_cpu(zp->z1) - le16_to_cpu(zp->z2); if (rcnt < 0) @@ -630,8 +642,8 @@ Begin: "hfcpci rec ch(%x) z1(%x) z2(%x) cnt(%d)\n", bch->nr, le16_to_cpu(zp->z1), le16_to_cpu(zp->z2), rcnt); - hfcpci_empty_bfifo(bch, bz, bdata, rcnt); - rcnt = bz->f1 - bz->f2; + hfcpci_empty_bfifo(bch, rxbz, bdata, rcnt); + rcnt = rxbz->f1 - rxbz->f2; if (rcnt < 0) rcnt += MAX_B_FRAMES + 1; if (hc->hw.last_bfifo_cnt[real_fifo] > rcnt + 1) { @@ -644,7 +656,7 @@ Begin: else receive = 0; } else if (test_bit(FLG_TRANSPARENT, &bch->Flags)) { - hfcpci_empty_fifo_trans(bch, bz, bdata); + hfcpci_empty_fifo_trans(bch, rxbz, txbz, bdata); return; } else receive = 0; diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c b/drivers/isdn/hardware/mISDN/hfcsusb.c index 9c427fb204e..6b7704c41b9 100644 --- a/drivers/isdn/hardware/mISDN/hfcsusb.c +++ b/drivers/isdn/hardware/mISDN/hfcsusb.c @@ -947,7 +947,7 @@ hfcsusb_rx_frame(struct usb_fifo *fifo, __u8 *data, unsigned int len, if (fifo->dch) recv_Dchannel(fifo->dch); if (fifo->bch) - recv_Bchannel(fifo->bch); + recv_Bchannel(fifo->bch, MISDN_ID_ANY); if (fifo->ech) recv_Echannel(fifo->ech, &hw->dch); @@ -969,7 +969,7 @@ hfcsusb_rx_frame(struct usb_fifo *fifo, __u8 *data, unsigned int len, } else { /* deliver transparent data to layer2 */ if (rx_skb->len >= poll) - recv_Bchannel(fifo->bch); + recv_Bchannel(fifo->bch, MISDN_ID_ANY); } spin_unlock(&hw->lock); } diff --git a/drivers/isdn/mISDN/dsp.h b/drivers/isdn/mISDN/dsp.h index 0a4a362b89c..4a1c444d73a 100644 --- a/drivers/isdn/mISDN/dsp.h +++ b/drivers/isdn/mISDN/dsp.h @@ -262,5 +262,5 @@ extern int dsp_pipeline_build(struct dsp_pipeline *pipeline, const char *cfg); extern void dsp_pipeline_process_tx(struct dsp_pipeline *pipeline, u8 *data, int len); extern void dsp_pipeline_process_rx(struct dsp_pipeline *pipeline, u8 *data, - int len); + int len, unsigned int txlen); diff --git a/drivers/isdn/mISDN/dsp_core.c b/drivers/isdn/mISDN/dsp_core.c index 3083338716b..1c49368e0a9 100644 --- a/drivers/isdn/mISDN/dsp_core.c +++ b/drivers/isdn/mISDN/dsp_core.c @@ -710,7 +710,7 @@ dsp_function(struct mISDNchannel *ch, struct sk_buff *skb) /* pipeline */ if (dsp->pipeline.inuse) dsp_pipeline_process_rx(&dsp->pipeline, skb->data, - skb->len); + skb->len, hh->id); /* change volume if requested */ if (dsp->rx_volume) dsp_change_volume(skb, dsp->rx_volume); diff --git a/drivers/isdn/mISDN/dsp_pipeline.c b/drivers/isdn/mISDN/dsp_pipeline.c index 18cf87c113e..ac61f198eb3 100644 --- a/drivers/isdn/mISDN/dsp_pipeline.c +++ b/drivers/isdn/mISDN/dsp_pipeline.c @@ -347,7 +347,8 @@ void dsp_pipeline_process_tx(struct dsp_pipeline *pipeline, u8 *data, int len) entry->elem->process_tx(entry->p, data, len); } -void dsp_pipeline_process_rx(struct dsp_pipeline *pipeline, u8 *data, int len) +void dsp_pipeline_process_rx(struct dsp_pipeline *pipeline, u8 *data, int len, + unsigned int txlen) { struct dsp_pipeline_entry *entry; @@ -356,7 +357,7 @@ void dsp_pipeline_process_rx(struct dsp_pipeline *pipeline, u8 *data, int len) list_for_each_entry_reverse(entry, &pipeline->list, list) if (entry->elem->process_rx) - entry->elem->process_rx(entry->p, data, len); + entry->elem->process_rx(entry->p, data, len, txlen); } diff --git a/drivers/isdn/mISDN/hwchannel.c b/drivers/isdn/mISDN/hwchannel.c index ab1168a110a..0481a0cdf6d 100644 --- a/drivers/isdn/mISDN/hwchannel.c +++ b/drivers/isdn/mISDN/hwchannel.c @@ -185,13 +185,13 @@ recv_Echannel(struct dchannel *ech, struct dchannel *dch) EXPORT_SYMBOL(recv_Echannel); void -recv_Bchannel(struct bchannel *bch) +recv_Bchannel(struct bchannel *bch, unsigned int id) { struct mISDNhead *hh; hh = mISDN_HEAD_P(bch->rx_skb); hh->prim = PH_DATA_IND; - hh->id = MISDN_ID_ANY; + hh->id = id; if (bch->rcount >= 64) { printk(KERN_WARNING "B-channel %p receive queue overflow, " "fushing!\n", bch); diff --git a/include/linux/mISDNdsp.h b/include/linux/mISDNdsp.h index 6b71d2dce50..2c483d45f14 100644 --- a/include/linux/mISDNdsp.h +++ b/include/linux/mISDNdsp.h @@ -12,7 +12,8 @@ struct mISDN_dsp_element { void *(*new)(const char *arg); void (*free)(void *p); void (*process_tx)(void *p, unsigned char *data, int len); - void (*process_rx)(void *p, unsigned char *data, int len); + void (*process_rx)(void *p, unsigned char *data, int len, + unsigned int txlen); int num_args; struct mISDN_dsp_element_arg *args; diff --git a/include/linux/mISDNhw.h b/include/linux/mISDNhw.h index 97ffdc1d344..ce900f4c245 100644 --- a/include/linux/mISDNhw.h +++ b/include/linux/mISDNhw.h @@ -185,7 +185,7 @@ extern int dchannel_senddata(struct dchannel *, struct sk_buff *); extern int bchannel_senddata(struct bchannel *, struct sk_buff *); extern void recv_Dchannel(struct dchannel *); extern void recv_Echannel(struct dchannel *, struct dchannel *); -extern void recv_Bchannel(struct bchannel *); +extern void recv_Bchannel(struct bchannel *, unsigned int id); extern void recv_Dchannel_skb(struct dchannel *, struct sk_buff *); extern void recv_Bchannel_skb(struct bchannel *, struct sk_buff *); extern void confirm_Bsend(struct bchannel *bch); -- cgit v1.2.3-70-g09d2 From e73f6b2260daf02793071e5ce06ea87df762920a Mon Sep 17 00:00:00 2001 From: Andreas Eversberg Date: Fri, 22 May 2009 11:04:48 +0000 Subject: mISDN: Added layer-1-hold feature Add IMHOLD_L1 ioctl. The feature will be disabled on closing. Signed-off-by: Andreas Eversberg Signed-off-by: Karsten Keil Signed-off-by: David S. Miller --- drivers/isdn/mISDN/socket.c | 21 ++++++++++++++++++--- drivers/isdn/mISDN/tei.c | 43 +++++++++++++++++++++++++++++++------------ include/linux/mISDNif.h | 2 ++ 3 files changed, 51 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index 508945d1b9c..530f6897736 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -292,7 +292,7 @@ static int data_sock_ioctl_bound(struct sock *sk, unsigned int cmd, void __user *p) { struct mISDN_ctrl_req cq; - int err = -EINVAL, val; + int err = -EINVAL, val[2]; struct mISDNchannel *bchan, *next; lock_sock(sk); @@ -328,12 +328,27 @@ data_sock_ioctl_bound(struct sock *sk, unsigned int cmd, void __user *p) err = -EINVAL; break; } - if (get_user(val, (int __user *)p)) { + val[0] = cmd; + if (get_user(val[1], (int __user *)p)) { err = -EFAULT; break; } err = _pms(sk)->dev->teimgr->ctrl(_pms(sk)->dev->teimgr, - CONTROL_CHANNEL, &val); + CONTROL_CHANNEL, val); + break; + case IMHOLD_L1: + if (sk->sk_protocol != ISDN_P_LAPD_NT + && sk->sk_protocol != ISDN_P_LAPD_TE) { + err = -EINVAL; + break; + } + val[0] = cmd; + if (get_user(val[1], (int __user *)p)) { + err = -EFAULT; + break; + } + err = _pms(sk)->dev->teimgr->ctrl(_pms(sk)->dev->teimgr, + CONTROL_CHANNEL, val); break; default: err = -EINVAL; diff --git a/drivers/isdn/mISDN/tei.c b/drivers/isdn/mISDN/tei.c index b452dead8fd..c75af762a06 100644 --- a/drivers/isdn/mISDN/tei.c +++ b/drivers/isdn/mISDN/tei.c @@ -122,8 +122,11 @@ da_deactivate(struct FsmInst *fi, int event, void *arg) } read_unlock_irqrestore(&mgr->lock, flags); /* All TEI are inactiv */ - mISDN_FsmAddTimer(&mgr->datimer, DATIMER_VAL, EV_DATIMER, NULL, 1); - mISDN_FsmChangeState(fi, ST_L1_DEACT_PENDING); + if (!test_bit(OPTION_L1_HOLD, &mgr->options)) { + mISDN_FsmAddTimer(&mgr->datimer, DATIMER_VAL, EV_DATIMER, + NULL, 1); + mISDN_FsmChangeState(fi, ST_L1_DEACT_PENDING); + } } static void @@ -132,9 +135,11 @@ da_ui(struct FsmInst *fi, int event, void *arg) struct manager *mgr = fi->userdata; /* restart da timer */ - mISDN_FsmDelTimer(&mgr->datimer, 2); - mISDN_FsmAddTimer(&mgr->datimer, DATIMER_VAL, EV_DATIMER, NULL, 2); - + if (!test_bit(OPTION_L1_HOLD, &mgr->options)) { + mISDN_FsmDelTimer(&mgr->datimer, 2); + mISDN_FsmAddTimer(&mgr->datimer, DATIMER_VAL, EV_DATIMER, + NULL, 2); + } } static void @@ -1103,6 +1108,7 @@ free_teimanager(struct manager *mgr) { struct layer2 *l2, *nl2; + test_and_clear_bit(OPTION_L1_HOLD, &mgr->options); if (test_bit(MGR_OPT_NETWORK, &mgr->options)) { /* not locked lock is taken in release tei */ mgr->up = NULL; @@ -1133,13 +1139,26 @@ static int ctrl_teimanager(struct manager *mgr, void *arg) { /* currently we only have one option */ - int clean = *((int *)arg); - - if (clean) - test_and_set_bit(OPTION_L2_CLEANUP, &mgr->options); - else - test_and_clear_bit(OPTION_L2_CLEANUP, &mgr->options); - return 0; + int *val = (int *)arg; + int ret = 0; + + switch (val[0]) { + case IMCLEAR_L2: + if (val[1]) + test_and_set_bit(OPTION_L2_CLEANUP, &mgr->options); + else + test_and_clear_bit(OPTION_L2_CLEANUP, &mgr->options); + break; + case IMHOLD_L1: + if (val[1]) + test_and_set_bit(OPTION_L1_HOLD, &mgr->options); + else + test_and_clear_bit(OPTION_L1_HOLD, &mgr->options); + break; + default: + ret = -EINVAL; + } + return ret; } /* This function does create a L2 for fixed TEI in NT Mode */ diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index cf974593a99..0b28fd1e393 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -229,6 +229,7 @@ #define OPTION_L2_PTP 2 #define OPTION_L2_FIXEDTEI 3 #define OPTION_L2_CLEANUP 4 +#define OPTION_L1_HOLD 5 /* should be in sync with linux/kobject.h:KOBJ_NAME_LEN */ #define MISDN_MAX_IDLEN 20 @@ -317,6 +318,7 @@ struct ph_info { #define IMCTRLREQ _IOR('I', 69, int) #define IMCLEAR_L2 _IOR('I', 70, int) #define IMSETDEVNAME _IOR('I', 71, struct mISDN_devrename) +#define IMHOLD_L1 _IOR('I', 72, int) static inline int test_channelmap(u_int nr, u_char *map) -- cgit v1.2.3-70-g09d2 From db9bb63a1b5b65df41d112a8c21adbbfc6a4ac08 Mon Sep 17 00:00:00 2001 From: Karsten Keil Date: Fri, 22 May 2009 11:04:53 +0000 Subject: mISDN: Add XHFC support for embedded Speech-Design board to hfcmulti New version without emulating arch specific stuff for the other architectures, the special IO and init functions for the 8xx microcontroller are in a separate include file. Signed-off-by: Andreas Eversberg Signed-off-by: Karsten Keil Signed-off-by: David S. Miller --- drivers/isdn/hardware/mISDN/Kconfig | 11 +- drivers/isdn/hardware/mISDN/hfc_multi.h | 45 ++- drivers/isdn/hardware/mISDN/hfc_multi_8xx.h | 167 +++++++++++ drivers/isdn/hardware/mISDN/hfcmulti.c | 416 +++++++++++++++++++--------- drivers/isdn/mISDN/dsp_cmx.c | 4 + include/linux/mISDNdsp.h | 1 + 6 files changed, 500 insertions(+), 144 deletions(-) create mode 100644 drivers/isdn/hardware/mISDN/hfc_multi_8xx.h (limited to 'include') diff --git a/drivers/isdn/hardware/mISDN/Kconfig b/drivers/isdn/hardware/mISDN/Kconfig index fd112ae252c..3024566dd09 100644 --- a/drivers/isdn/hardware/mISDN/Kconfig +++ b/drivers/isdn/hardware/mISDN/Kconfig @@ -13,7 +13,7 @@ config MISDN_HFCPCI config MISDN_HFCMULTI tristate "Support for HFC multiport cards (HFC-4S/8S/E1)" - depends on PCI + depends on PCI || 8xx depends on MISDN help Enable support for cards with Cologne Chip AG's HFC multiport @@ -23,6 +23,15 @@ config MISDN_HFCMULTI * HFC-8S (8 S/T interfaces on one chip) * HFC-E1 (E1 interface for 2Mbit ISDN) +config MISDN_HFCMULTI_8xx + boolean "Support for XHFC embedded board in HFC multiport driver" + depends on MISDN + depends on MISDN_HFCMULTI + depends on 8xx + default 8xx + help + Enable support for the XHFC embedded solution from Speech Design. + config MISDN_HFCUSB tristate "Support for HFC-S USB based TAs" depends on USB diff --git a/drivers/isdn/hardware/mISDN/hfc_multi.h b/drivers/isdn/hardware/mISDN/hfc_multi.h index c4878cc712c..0c773866efc 100644 --- a/drivers/isdn/hardware/mISDN/hfc_multi.h +++ b/drivers/isdn/hardware/mISDN/hfc_multi.h @@ -17,6 +17,16 @@ #define PCI_ENA_REGIO 0x01 #define PCI_ENA_MEMIO 0x02 +#define XHFC_IRQ 4 /* SIU_IRQ2 */ +#define XHFC_MEMBASE 0xFE000000 +#define XHFC_MEMSIZE 0x00001000 +#define XHFC_OFFSET 0x00001000 +#define PA_XHFC_A0 0x0020 /* PA10 */ +#define PB_XHFC_IRQ1 0x00000100 /* PB23 */ +#define PB_XHFC_IRQ2 0x00000200 /* PB22 */ +#define PB_XHFC_IRQ3 0x00000400 /* PB21 */ +#define PB_XHFC_IRQ4 0x00000800 /* PB20 */ + /* * NOTE: some registers are assigned multiple times due to different modes * also registers are assigned differen for HFC-4s/8s and HFC-E1 @@ -81,6 +91,11 @@ struct hfcm_hw { #define HFC_CFG_CRC4 10 /* disable CRC-4 Multiframe mode, */ /* use double frame instead. */ +#define HFC_TYPE_E1 1 /* controller is HFC-E1 */ +#define HFC_TYPE_4S 4 /* controller is HFC-4S */ +#define HFC_TYPE_8S 8 /* controller is HFC-8S */ +#define HFC_TYPE_XHFC 5 /* controller is XHFC */ + #define HFC_CHIP_EXRAM_128 0 /* external ram 128k */ #define HFC_CHIP_EXRAM_512 1 /* external ram 256k */ #define HFC_CHIP_REVISION0 2 /* old fifo handling */ @@ -88,19 +103,22 @@ struct hfcm_hw { #define HFC_CHIP_PCM_MASTER 4 /* PCM is master */ #define HFC_CHIP_RX_SYNC 5 /* disable pll sync for pcm */ #define HFC_CHIP_DTMF 6 /* DTMF decoding is enabled */ -#define HFC_CHIP_ULAW 7 /* ULAW mode */ -#define HFC_CHIP_CLOCK2 8 /* double clock mode */ -#define HFC_CHIP_E1CLOCK_GET 9 /* always get clock from E1 interface */ -#define HFC_CHIP_E1CLOCK_PUT 10 /* always put clock from E1 interface */ -#define HFC_CHIP_WATCHDOG 11 /* whether we should send signals */ +#define HFC_CHIP_CONF 7 /* conference handling is enabled */ +#define HFC_CHIP_ULAW 8 /* ULAW mode */ +#define HFC_CHIP_CLOCK2 9 /* double clock mode */ +#define HFC_CHIP_E1CLOCK_GET 10 /* always get clock from E1 interface */ +#define HFC_CHIP_E1CLOCK_PUT 11 /* always put clock from E1 interface */ +#define HFC_CHIP_WATCHDOG 12 /* whether we should send signals */ /* to the watchdog */ -#define HFC_CHIP_B410P 12 /* whether we have a b410p with echocan in */ +#define HFC_CHIP_B410P 13 /* whether we have a b410p with echocan in */ /* hw */ -#define HFC_CHIP_PLXSD 13 /* whether we have a Speech-Design PLX */ +#define HFC_CHIP_PLXSD 14 /* whether we have a Speech-Design PLX */ +#define HFC_CHIP_EMBSD 15 /* whether we have a SD Embedded board */ #define HFC_IO_MODE_PCIMEM 0x00 /* normal memory mapped IO */ #define HFC_IO_MODE_REGIO 0x01 /* PCI io access */ #define HFC_IO_MODE_PLXSD 0x02 /* access HFC via PLX9030 */ +#define HFC_IO_MODE_EMBSD 0x03 /* direct access */ /* table entry in the PCI devices list */ struct hm_map { @@ -113,6 +131,7 @@ struct hm_map { int opticalsupport; int dip_type; int io_mode; + int irq; }; struct hfc_multi { @@ -120,7 +139,7 @@ struct hfc_multi { struct hm_map *mtyp; int id; int pcm; /* id of pcm bus */ - int type; + int ctype; /* controller type */ int ports; u_int irq; /* irq used by card */ @@ -160,10 +179,16 @@ struct hfc_multi { int len); void (*write_fifo)(struct hfc_multi *hc, u_char *data, int len); - u_long pci_origmembase, plx_origmembase, dsp_origmembase; + u_long pci_origmembase, plx_origmembase; void __iomem *pci_membase; /* PCI memory */ void __iomem *plx_membase; /* PLX memory */ - u_char *dsp_membase; /* DSP on PLX */ + u_long xhfc_origmembase; + u_char *xhfc_membase; + u_long *xhfc_memaddr, *xhfc_memdata; +#ifdef CONFIG_MISDN_HFCMULTI_8xx + struct immap *immap; +#endif + u_long pb_irqmsk; /* Portbit mask to check the IRQ line */ u_long pci_iobase; /* PCI IO */ struct hfcm_hw hw; /* remember data of write-only-registers */ diff --git a/drivers/isdn/hardware/mISDN/hfc_multi_8xx.h b/drivers/isdn/hardware/mISDN/hfc_multi_8xx.h new file mode 100644 index 00000000000..45ddced956d --- /dev/null +++ b/drivers/isdn/hardware/mISDN/hfc_multi_8xx.h @@ -0,0 +1,167 @@ +/* + * For License see notice in hfc_multi.c + * + * special IO and init functions for the embedded XHFC board + * from Speech Design + * + */ + +#include + +/* Change this to the value used by your board */ +#ifndef IMAP_ADDR +#define IMAP_ADDR 0xFFF00000 +#endif + +static void +#ifdef HFC_REGISTER_DEBUG +HFC_outb_embsd(struct hfc_multi *hc, u_char reg, u_char val, + const char *function, int line) +#else +HFC_outb_embsd(struct hfc_multi *hc, u_char reg, u_char val) +#endif +{ + hc->immap->im_ioport.iop_padat |= PA_XHFC_A0; + writeb(reg, hc->xhfc_memaddr); + hc->immap->im_ioport.iop_padat &= ~(PA_XHFC_A0); + writeb(val, hc->xhfc_memdata); +} +static u_char +#ifdef HFC_REGISTER_DEBUG +HFC_inb_embsd(struct hfc_multi *hc, u_char reg, const char *function, int line) +#else +HFC_inb_embsd(struct hfc_multi *hc, u_char reg) +#endif +{ + hc->immap->im_ioport.iop_padat |= PA_XHFC_A0; + writeb(reg, hc->xhfc_memaddr); + hc->immap->im_ioport.iop_padat &= ~(PA_XHFC_A0); + return readb(hc->xhfc_memdata); +} +static u_short +#ifdef HFC_REGISTER_DEBUG +HFC_inw_embsd(struct hfc_multi *hc, u_char reg, const char *function, int line) +#else +HFC_inw_embsd(struct hfc_multi *hc, u_char reg) +#endif +{ + hc->immap->im_ioport.iop_padat |= PA_XHFC_A0; + writeb(reg, hc->xhfc_memaddr); + hc->immap->im_ioport.iop_padat &= ~(PA_XHFC_A0); + return readb(hc->xhfc_memdata); +} +static void +#ifdef HFC_REGISTER_DEBUG +HFC_wait_embsd(struct hfc_multi *hc, const char *function, int line) +#else +HFC_wait_embsd(struct hfc_multi *hc) +#endif +{ + hc->immap->im_ioport.iop_padat |= PA_XHFC_A0; + writeb(R_STATUS, hc->xhfc_memaddr); + hc->immap->im_ioport.iop_padat &= ~(PA_XHFC_A0); + while (readb(hc->xhfc_memdata) & V_BUSY) + cpu_relax(); +} + +/* write fifo data (EMBSD) */ +void +write_fifo_embsd(struct hfc_multi *hc, u_char *data, int len) +{ + hc->immap->im_ioport.iop_padat |= PA_XHFC_A0; + *hc->xhfc_memaddr = A_FIFO_DATA0; + hc->immap->im_ioport.iop_padat &= ~(PA_XHFC_A0); + while (len) { + *hc->xhfc_memdata = *data; + data++; + len--; + } +} + +/* read fifo data (EMBSD) */ +void +read_fifo_embsd(struct hfc_multi *hc, u_char *data, int len) +{ + hc->immap->im_ioport.iop_padat |= PA_XHFC_A0; + *hc->xhfc_memaddr = A_FIFO_DATA0; + hc->immap->im_ioport.iop_padat &= ~(PA_XHFC_A0); + while (len) { + *data = (u_char)(*hc->xhfc_memdata); + data++; + len--; + } +} + +static int +setup_embedded(struct hfc_multi *hc, struct hm_map *m) +{ + printk(KERN_INFO + "HFC-multi: card manufacturer: '%s' card name: '%s' clock: %s\n", + m->vendor_name, m->card_name, m->clock2 ? "double" : "normal"); + + hc->pci_dev = NULL; + if (m->clock2) + test_and_set_bit(HFC_CHIP_CLOCK2, &hc->chip); + + hc->leds = m->leds; + hc->ledstate = 0xAFFEAFFE; + hc->opticalsupport = m->opticalsupport; + + hc->pci_iobase = 0; + hc->pci_membase = 0; + hc->xhfc_membase = NULL; + hc->xhfc_memaddr = NULL; + hc->xhfc_memdata = NULL; + + /* set memory access methods */ + if (m->io_mode) /* use mode from card config */ + hc->io_mode = m->io_mode; + switch (hc->io_mode) { + case HFC_IO_MODE_EMBSD: + test_and_set_bit(HFC_CHIP_EMBSD, &hc->chip); + hc->slots = 128; /* required */ + /* fall through */ + hc->HFC_outb = HFC_outb_embsd; + hc->HFC_inb = HFC_inb_embsd; + hc->HFC_inw = HFC_inw_embsd; + hc->HFC_wait = HFC_wait_embsd; + hc->read_fifo = read_fifo_embsd; + hc->write_fifo = write_fifo_embsd; + hc->xhfc_origmembase = XHFC_MEMBASE + XHFC_OFFSET * hc->id; + hc->xhfc_membase = (u_char *)ioremap(hc->xhfc_origmembase, + XHFC_MEMSIZE); + if (!hc->xhfc_membase) { + printk(KERN_WARNING + "HFC-multi: failed to remap xhfc address space. " + "(internal error)\n"); + return -EIO; + } + hc->xhfc_memaddr = (u_long *)(hc->xhfc_membase + 4); + hc->xhfc_memdata = (u_long *)(hc->xhfc_membase); + printk(KERN_INFO + "HFC-multi: xhfc_membase:%#lx xhfc_origmembase:%#lx " + "xhfc_memaddr:%#lx xhfc_memdata:%#lx\n", + (u_long)hc->xhfc_membase, hc->xhfc_origmembase, + (u_long)hc->xhfc_memaddr, (u_long)hc->xhfc_memdata); + break; + default: + printk(KERN_WARNING "HFC-multi: Invalid IO mode.\n"); + return -EIO; + } + + /* Prepare the MPC8XX PortA 10 as output (address/data selector) */ + hc->immap = (struct immap *)(IMAP_ADDR); + hc->immap->im_ioport.iop_papar &= ~(PA_XHFC_A0); + hc->immap->im_ioport.iop_paodr &= ~(PA_XHFC_A0); + hc->immap->im_ioport.iop_padir |= PA_XHFC_A0; + + /* Prepare the MPC8xx PortB __X__ as input (ISDN__X__IRQ) */ + hc->pb_irqmsk = (PB_XHFC_IRQ1 << hc->id); + hc->immap->im_cpm.cp_pbpar &= ~(hc->pb_irqmsk); + hc->immap->im_cpm.cp_pbodr &= ~(hc->pb_irqmsk); + hc->immap->im_cpm.cp_pbdir &= ~(hc->pb_irqmsk); + + /* At this point the needed config is done */ + /* fifos are still not enabled */ + return 0; +} diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c index bc0d3efeb56..5be4edf631d 100644 --- a/drivers/isdn/hardware/mISDN/hfcmulti.c +++ b/drivers/isdn/hardware/mISDN/hfcmulti.c @@ -139,6 +139,10 @@ * Selects interface with clock source for mISDN and applications. * Set to card number starting with 1. Set to -1 to disable. * By default, the first card is used as clock source. + * + * hwid: + * NOTE: only one hwid value must be given once + * Enable special embedded devices with XHFC controllers. */ /* @@ -206,6 +210,11 @@ static int clock; static uint timer; static uint clockdelay_te = CLKDEL_TE; static uint clockdelay_nt = CLKDEL_NT; +#define HWID_NONE 0 +#define HWID_MINIP4 1 +#define HWID_MINIP8 2 +#define HWID_MINIP16 3 +static uint hwid = HWID_NONE; static int HFC_cnt, Port_cnt, PCM_cnt = 99; @@ -223,6 +232,7 @@ module_param_array(pcm, int, NULL, S_IRUGO | S_IWUSR); module_param_array(dslot, int, NULL, S_IRUGO | S_IWUSR); module_param_array(iomode, uint, NULL, S_IRUGO | S_IWUSR); module_param_array(port, uint, NULL, S_IRUGO | S_IWUSR); +module_param(hwid, uint, S_IRUGO | S_IWUSR); /* The hardware ID */ #ifdef HFC_REGISTER_DEBUG #define HFC_outb(hc, reg, val) \ @@ -252,6 +262,10 @@ module_param_array(port, uint, NULL, S_IRUGO | S_IWUSR); #define HFC_wait_nodebug(hc) (hc->HFC_wait_nodebug(hc)) #endif +#ifdef CONFIG_MISDN_HFCMULTI_8xx +#include "hfc_multi_8xx.h" +#endif + /* HFC_IO_MODE_PCIMEM */ static void #ifdef HFC_REGISTER_DEBUG @@ -928,7 +942,7 @@ hfcmulti_resync(struct hfc_multi *locked, struct hfc_multi *newmaster, int rm) writel(pv, plx_acc_32); if (test_bit(HFC_CHIP_PCM_MASTER, &hc->chip)) { pcmmaster = hc; - if (hc->type == 1) { + if (hc->ctype == HFC_TYPE_E1) { if (debug & DEBUG_HFCMULTI_PLXSD) printk(KERN_DEBUG "Schedule SYNC_I\n"); @@ -949,7 +963,8 @@ hfcmulti_resync(struct hfc_multi *locked, struct hfc_multi *newmaster, int rm) pv |= PLX_SYNC_O_EN; writel(pv, plx_acc_32); /* switch to jatt PLL, if not disabled by RX_SYNC */ - if (hc->type == 1 && !test_bit(HFC_CHIP_RX_SYNC, &hc->chip)) { + if (hc->ctype == HFC_TYPE_E1 + && !test_bit(HFC_CHIP_RX_SYNC, &hc->chip)) { if (debug & DEBUG_HFCMULTI_PLXSD) printk(KERN_DEBUG "Schedule jatt PLL\n"); hc->e1_resync |= 2; /* switch to jatt */ @@ -961,7 +976,7 @@ hfcmulti_resync(struct hfc_multi *locked, struct hfc_multi *newmaster, int rm) printk(KERN_DEBUG "id=%d (0x%p) = PCM master syncronized " "with QUARTZ\n", hc->id, hc); - if (hc->type == 1) { + if (hc->ctype == HFC_TYPE_E1) { /* Use the crystal clock for the PCM master card */ if (debug & DEBUG_HFCMULTI_PLXSD) @@ -972,7 +987,7 @@ hfcmulti_resync(struct hfc_multi *locked, struct hfc_multi *newmaster, int rm) if (debug & DEBUG_HFCMULTI_PLXSD) printk(KERN_DEBUG "QUARTZ is automatically " - "enabled by HFC-%dS\n", hc->type); + "enabled by HFC-%dS\n", hc->ctype); } plx_acc_32 = hc->plx_membase + PLX_GPIOC; pv = readl(plx_acc_32); @@ -1060,13 +1075,16 @@ release_io_hfcmulti(struct hfc_multi *hc) /* disable memory mapped ports / io ports */ test_and_clear_bit(HFC_CHIP_PLXSD, &hc->chip); /* prevent resync */ - pci_write_config_word(hc->pci_dev, PCI_COMMAND, 0); + if (hc->pci_dev) + pci_write_config_word(hc->pci_dev, PCI_COMMAND, 0); if (hc->pci_membase) iounmap(hc->pci_membase); if (hc->plx_membase) iounmap(hc->plx_membase); if (hc->pci_iobase) release_region(hc->pci_iobase, 8); + if (hc->xhfc_membase) + iounmap((void *)hc->xhfc_membase); if (hc->pci_dev) { pci_disable_device(hc->pci_dev); @@ -1100,8 +1118,9 @@ init_chip(struct hfc_multi *hc) /* revision check */ if (debug & DEBUG_HFCMULTI_INIT) printk(KERN_DEBUG "%s: entered\n", __func__); - val = HFC_inb(hc, R_CHIP_ID)>>4; - if (val != 0x8 && val != 0xc && val != 0xe) { + val = HFC_inb(hc, R_CHIP_ID); + if ((val>>4) != 0x8 && (val>>4) != 0xc && (val>>4) != 0xe + && (val>>1) != 0x31) { printk(KERN_INFO "HFC_multi: unknown CHIP_ID:%x\n", (u_int)val); err = -EIO; goto out; @@ -1109,8 +1128,9 @@ init_chip(struct hfc_multi *hc) rev = HFC_inb(hc, R_CHIP_RV); printk(KERN_INFO "HFC_multi: detected HFC with chip ID=0x%lx revision=%ld%s\n", - val, rev, (rev == 0) ? " (old FIFO handling)" : ""); - if (rev == 0) { + val, rev, (rev == 0 && (hc->ctype != HFC_TYPE_XHFC)) ? + " (old FIFO handling)" : ""); + if (hc->ctype != HFC_TYPE_XHFC && rev == 0) { test_and_set_bit(HFC_CHIP_REVISION0, &hc->chip); printk(KERN_WARNING "HFC_multi: NOTE: Your chip is revision 0, " @@ -1152,6 +1172,12 @@ init_chip(struct hfc_multi *hc) hc->Zlen = 8000; hc->DTMFbase = 0x2000; } + if (hc->ctype == HFC_TYPE_XHFC) { + hc->Flen = 0x8; + hc->Zmin = 0x0; + hc->Zlen = 64; + hc->DTMFbase = 0x0; + } hc->max_trans = poll << 1; if (hc->max_trans > hc->Zlen) hc->max_trans = hc->Zlen; @@ -1211,6 +1237,9 @@ init_chip(struct hfc_multi *hc) hc->hw.r_pcm_md0 = V_F0_LEN; /* shift clock for DSP */ } + if (test_bit(HFC_CHIP_EMBSD, &hc->chip)) + hc->hw.r_pcm_md0 = V_F0_LEN; /* shift clock for DSP */ + /* we only want the real Z2 read-pointer for revision > 0 */ if (!test_bit(HFC_CHIP_REVISION0, &hc->chip)) hc->hw.r_ram_sz |= V_FZ_MD; @@ -1234,15 +1263,24 @@ init_chip(struct hfc_multi *hc) /* soft reset */ HFC_outb(hc, R_CTRL, hc->hw.r_ctrl); - HFC_outb(hc, R_RAM_SZ, hc->hw.r_ram_sz); + if (hc->ctype == HFC_TYPE_XHFC) + HFC_outb(hc, 0x0C /* R_FIFO_THRES */, + 0x11 /* 16 Bytes TX/RX */); + else + HFC_outb(hc, R_RAM_SZ, hc->hw.r_ram_sz); HFC_outb(hc, R_FIFO_MD, 0); - hc->hw.r_cirm = V_SRES | V_HFCRES | V_PCMRES | V_STRES | V_RLD_EPR; + if (hc->ctype == HFC_TYPE_XHFC) + hc->hw.r_cirm = V_SRES | V_HFCRES | V_PCMRES | V_STRES; + else + hc->hw.r_cirm = V_SRES | V_HFCRES | V_PCMRES | V_STRES + | V_RLD_EPR; HFC_outb(hc, R_CIRM, hc->hw.r_cirm); udelay(100); hc->hw.r_cirm = 0; HFC_outb(hc, R_CIRM, hc->hw.r_cirm); udelay(100); - HFC_outb(hc, R_RAM_SZ, hc->hw.r_ram_sz); + if (hc->ctype != HFC_TYPE_XHFC) + HFC_outb(hc, R_RAM_SZ, hc->hw.r_ram_sz); /* Speech Design PLX bridge pcm and sync mode */ if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) { @@ -1278,13 +1316,16 @@ init_chip(struct hfc_multi *hc) HFC_outb(hc, R_PCM_MD0, hc->hw.r_pcm_md0 | 0xa0); if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) HFC_outb(hc, R_PCM_MD2, V_SYNC_SRC); /* sync via SYNC_I / O */ + else if (test_bit(HFC_CHIP_EMBSD, &hc->chip)) + HFC_outb(hc, R_PCM_MD2, 0x10); /* V_C2O_EN */ else HFC_outb(hc, R_PCM_MD2, 0x00); /* sync from interface */ HFC_outb(hc, R_PCM_MD0, hc->hw.r_pcm_md0 | 0x00); for (i = 0; i < 256; i++) { HFC_outb_nodebug(hc, R_SLOT, i); HFC_outb_nodebug(hc, A_SL_CFG, 0); - HFC_outb_nodebug(hc, A_CONF, 0); + if (hc->ctype != HFC_TYPE_XHFC) + HFC_outb_nodebug(hc, A_CONF, 0); hc->slot_owner[i] = -1; } @@ -1296,6 +1337,9 @@ init_chip(struct hfc_multi *hc) HFC_outb(hc, R_BRG_PCM_CFG, V_PCM_CLK); } + if (test_bit(HFC_CHIP_EMBSD, &hc->chip)) + HFC_outb(hc, 0x02 /* R_CLK_CFG */, 0x40 /* V_CLKO_OFF */); + /* B410P GPIO */ if (test_bit(HFC_CHIP_B410P, &hc->chip)) { printk(KERN_NOTICE "Setting GPIOs\n"); @@ -1424,7 +1468,7 @@ controller_fail: hc->hw.r_irqmsk_misc |= V_TI_IRQMSK; /* set E1 state machine IRQ */ - if (hc->type == 1) + if (hc->ctype == HFC_TYPE_E1) hc->hw.r_irqmsk_misc |= V_STA_IRQMSK; /* set DTMF detection */ @@ -1444,7 +1488,8 @@ controller_fail: r_conf_en = V_CONF_EN | V_ULAW; else r_conf_en = V_CONF_EN; - HFC_outb(hc, R_CONF_EN, r_conf_en); + if (hc->ctype != HFC_TYPE_XHFC) + HFC_outb(hc, R_CONF_EN, r_conf_en); /* setting leds */ switch (hc->leds) { @@ -1468,16 +1513,23 @@ controller_fail: break; } + if (test_bit(HFC_CHIP_EMBSD, &hc->chip)) { + hc->hw.r_st_sync = 0x10; /* V_AUTO_SYNCI */ + HFC_outb(hc, R_ST_SYNC, hc->hw.r_st_sync); + } + /* set master clock */ if (hc->masterclk >= 0) { if (debug & DEBUG_HFCMULTI_INIT) printk(KERN_DEBUG "%s: setting ST master clock " "to port %d (0..%d)\n", __func__, hc->masterclk, hc->ports-1); - hc->hw.r_st_sync = hc->masterclk | V_AUTO_SYNC; + hc->hw.r_st_sync |= (hc->masterclk | V_AUTO_SYNC); HFC_outb(hc, R_ST_SYNC, hc->hw.r_st_sync); } + + /* setting misc irq */ HFC_outb(hc, R_IRQMSK_MISC, hc->hw.r_irqmsk_misc); if (debug & DEBUG_HFCMULTI_INIT) @@ -1929,7 +1981,7 @@ next_frame: Fspace = 1; } /* one frame only for ST D-channels, to allow resending */ - if (hc->type != 1 && dch) { + if (hc->ctype != HFC_TYPE_E1 && dch) { if (f1 != f2) Fspace = 0; } @@ -1971,12 +2023,22 @@ next_frame: "slot_tx %d\n", __func__, ch, slot_tx); /* connect slot */ - HFC_outb(hc, A_CON_HDLC, 0xc0 | 0x00 | - V_HDLC_TRP | V_IFF); + if (hc->ctype == HFC_TYPE_XHFC) + HFC_outb(hc, A_CON_HDLC, 0xc0 + | 0x07 << 2 | V_HDLC_TRP | V_IFF); + /* Enable FIFO, no interrupt */ + else + HFC_outb(hc, A_CON_HDLC, 0xc0 | 0x00 | + V_HDLC_TRP | V_IFF); HFC_outb_nodebug(hc, R_FIFO, ch<<1 | 1); HFC_wait_nodebug(hc); - HFC_outb(hc, A_CON_HDLC, 0xc0 | 0x00 | - V_HDLC_TRP | V_IFF); + if (hc->ctype == HFC_TYPE_XHFC) + HFC_outb(hc, A_CON_HDLC, 0xc0 + | 0x07 << 2 | V_HDLC_TRP | V_IFF); + /* Enable FIFO, no interrupt */ + else + HFC_outb(hc, A_CON_HDLC, 0xc0 | 0x00 | + V_HDLC_TRP | V_IFF); HFC_outb_nodebug(hc, R_FIFO, ch<<1); HFC_wait_nodebug(hc); } @@ -2004,10 +2066,22 @@ next_frame: "FIFO data: channel %d slot_tx %d\n", __func__, ch, slot_tx); /* disconnect slot */ - HFC_outb(hc, A_CON_HDLC, 0x80 | 0x00 | V_HDLC_TRP | V_IFF); + if (hc->ctype == HFC_TYPE_XHFC) + HFC_outb(hc, A_CON_HDLC, 0x80 + | 0x07 << 2 | V_HDLC_TRP | V_IFF); + /* Enable FIFO, no interrupt */ + else + HFC_outb(hc, A_CON_HDLC, 0x80 | 0x00 | + V_HDLC_TRP | V_IFF); HFC_outb_nodebug(hc, R_FIFO, ch<<1 | 1); HFC_wait_nodebug(hc); - HFC_outb(hc, A_CON_HDLC, 0x80 | 0x00 | V_HDLC_TRP | V_IFF); + if (hc->ctype == HFC_TYPE_XHFC) + HFC_outb(hc, A_CON_HDLC, 0x80 + | 0x07 << 2 | V_HDLC_TRP | V_IFF); + /* Enable FIFO, no interrupt */ + else + HFC_outb(hc, A_CON_HDLC, 0x80 | 0x00 | + V_HDLC_TRP | V_IFF); HFC_outb_nodebug(hc, R_FIFO, ch<<1); HFC_wait_nodebug(hc); } @@ -2327,7 +2401,7 @@ handle_timer_irq(struct hfc_multi *hc) spin_unlock_irqrestore(&HFClock, flags); } - if (hc->type != 1 || hc->e1_state == 1) + if (hc->ctype != HFC_TYPE_E1 || hc->e1_state == 1) for (ch = 0; ch <= 31; ch++) { if (hc->created[hc->chan[ch].port]) { hfcmulti_tx(hc, ch); @@ -2350,7 +2424,7 @@ handle_timer_irq(struct hfc_multi *hc) } } } - if (hc->type == 1 && hc->created[0]) { + if (hc->ctype == HFC_TYPE_E1 && hc->created[0]) { dch = hc->chan[hc->dslot].dch; if (test_bit(HFC_CFG_REPORT_LOS, &hc->chan[hc->dslot].cfg)) { /* LOS */ @@ -2610,7 +2684,10 @@ hfcmulti_interrupt(int intno, void *dev_id) "card %d, this is no bug.\n", hc->id + 1, irqsem); irqsem = hc->id + 1; #endif - +#ifdef CONFIG_MISDN_HFCMULTI_8xx + if (hc->immap->im_cpm.cp_pbdat & hc->pb_irqmsk) + goto irq_notforus; +#endif if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) { spin_lock_irqsave(&plx_lock, flags); plx_acc = hc->plx_membase + PLX_INTCSR; @@ -2650,7 +2727,7 @@ hfcmulti_interrupt(int intno, void *dev_id) } hc->irqcnt++; if (r_irq_statech) { - if (hc->type != 1) + if (hc->ctype != HFC_TYPE_E1) ph_state_irq(hc, r_irq_statech); } if (status & V_EXT_IRQSTA) @@ -2664,7 +2741,7 @@ hfcmulti_interrupt(int intno, void *dev_id) r_irq_misc = HFC_inb_nodebug(hc, R_IRQ_MISC); r_irq_misc &= hc->hw.r_irqmsk_misc; /* ignore disabled irqs */ if (r_irq_misc & V_STA_IRQ) { - if (hc->type == 1) { + if (hc->ctype == HFC_TYPE_E1) { /* state machine */ dch = hc->chan[hc->dslot].dch; e1_syncsta = HFC_inb_nodebug(hc, R_SYNC_STA); @@ -2786,7 +2863,8 @@ mode_hfcmulti(struct hfc_multi *hc, int ch, int protocol, int slot_tx, if (hc->slot_owner[oslot_tx<<1] == ch) { HFC_outb(hc, R_SLOT, oslot_tx << 1); HFC_outb(hc, A_SL_CFG, 0); - HFC_outb(hc, A_CONF, 0); + if (hc->ctype != HFC_TYPE_XHFC) + HFC_outb(hc, A_CONF, 0); hc->slot_owner[oslot_tx<<1] = -1; } else { if (debug & DEBUG_HFCMULTI_MODE) @@ -2839,7 +2917,9 @@ mode_hfcmulti(struct hfc_multi *hc, int ch, int protocol, int slot_tx, flow_tx, routing, conf); HFC_outb(hc, R_SLOT, slot_tx << 1); HFC_outb(hc, A_SL_CFG, (ch<<1) | routing); - HFC_outb(hc, A_CONF, (conf < 0) ? 0 : (conf | V_CONF_SL)); + if (hc->ctype != HFC_TYPE_XHFC) + HFC_outb(hc, A_CONF, + (conf < 0) ? 0 : (conf | V_CONF_SL)); hc->slot_owner[slot_tx << 1] = ch; hc->chan[ch].slot_tx = slot_tx; hc->chan[ch].bank_tx = bank_tx; @@ -2889,7 +2969,7 @@ mode_hfcmulti(struct hfc_multi *hc, int ch, int protocol, int slot_tx, HFC_outb(hc, A_IRQ_MSK, 0); HFC_outb(hc, R_INC_RES_FIFO, V_RES_F); HFC_wait(hc); - if (hc->chan[ch].bch && hc->type != 1) { + if (hc->chan[ch].bch && hc->ctype != HFC_TYPE_E1) { hc->hw.a_st_ctrl0[hc->chan[ch].port] &= ((ch & 0x3) == 0)? ~V_B1_EN: ~V_B2_EN; HFC_outb(hc, R_ST_SEL, hc->chan[ch].port); @@ -2965,8 +3045,13 @@ mode_hfcmulti(struct hfc_multi *hc, int ch, int protocol, int slot_tx, /* enable TX fifo */ HFC_outb(hc, R_FIFO, ch << 1); HFC_wait(hc); - HFC_outb(hc, A_CON_HDLC, flow_tx | 0x00 | - V_HDLC_TRP | V_IFF); + if (hc->ctype == HFC_TYPE_XHFC) + HFC_outb(hc, A_CON_HDLC, flow_tx | 0x07 << 2 | + V_HDLC_TRP | V_IFF); + /* Enable FIFO, no interrupt */ + else + HFC_outb(hc, A_CON_HDLC, flow_tx | 0x00 | + V_HDLC_TRP | V_IFF); HFC_outb(hc, A_SUBCH_CFG, 0); HFC_outb(hc, A_IRQ_MSK, 0); HFC_outb(hc, R_INC_RES_FIFO, V_RES_F); @@ -2976,13 +3061,19 @@ mode_hfcmulti(struct hfc_multi *hc, int ch, int protocol, int slot_tx, /* enable RX fifo */ HFC_outb(hc, R_FIFO, (ch<<1)|1); HFC_wait(hc); - HFC_outb(hc, A_CON_HDLC, flow_rx | 0x00 | V_HDLC_TRP); + if (hc->ctype == HFC_TYPE_XHFC) + HFC_outb(hc, A_CON_HDLC, flow_rx | 0x07 << 2 | + V_HDLC_TRP); + /* Enable FIFO, no interrupt*/ + else + HFC_outb(hc, A_CON_HDLC, flow_rx | 0x00 | + V_HDLC_TRP); HFC_outb(hc, A_SUBCH_CFG, 0); HFC_outb(hc, A_IRQ_MSK, 0); HFC_outb(hc, R_INC_RES_FIFO, V_RES_F); HFC_wait(hc); } - if (hc->type != 1) { + if (hc->ctype != HFC_TYPE_E1) { hc->hw.a_st_ctrl0[hc->chan[ch].port] |= ((ch & 0x3) == 0) ? V_B1_EN : V_B2_EN; HFC_outb(hc, R_ST_SEL, hc->chan[ch].port); @@ -3003,7 +3094,7 @@ mode_hfcmulti(struct hfc_multi *hc, int ch, int protocol, int slot_tx, /* enable TX fifo */ HFC_outb(hc, R_FIFO, ch<<1); HFC_wait(hc); - if (hc->type == 1 || hc->chan[ch].bch) { + if (hc->ctype == HFC_TYPE_E1 || hc->chan[ch].bch) { /* E1 or B-channel */ HFC_outb(hc, A_CON_HDLC, flow_tx | 0x04); HFC_outb(hc, A_SUBCH_CFG, 0); @@ -3019,7 +3110,7 @@ mode_hfcmulti(struct hfc_multi *hc, int ch, int protocol, int slot_tx, HFC_outb(hc, R_FIFO, (ch<<1)|1); HFC_wait(hc); HFC_outb(hc, A_CON_HDLC, flow_rx | 0x04); - if (hc->type == 1 || hc->chan[ch].bch) + if (hc->ctype == HFC_TYPE_E1 || hc->chan[ch].bch) HFC_outb(hc, A_SUBCH_CFG, 0); /* full 8 bits */ else HFC_outb(hc, A_SUBCH_CFG, 2); /* 2 bits dchannel */ @@ -3028,7 +3119,7 @@ mode_hfcmulti(struct hfc_multi *hc, int ch, int protocol, int slot_tx, HFC_wait(hc); if (hc->chan[ch].bch) { test_and_set_bit(FLG_HDLC, &hc->chan[ch].bch->Flags); - if (hc->type != 1) { + if (hc->ctype != HFC_TYPE_E1) { hc->hw.a_st_ctrl0[hc->chan[ch].port] |= ((ch&0x3) == 0) ? V_B1_EN : V_B2_EN; HFC_outb(hc, R_ST_SEL, hc->chan[ch].port); @@ -3108,7 +3199,7 @@ hfcm_l1callback(struct dchannel *dch, u_int cmd) case HW_RESET_REQ: /* start activation */ spin_lock_irqsave(&hc->lock, flags); - if (hc->type == 1) { + if (hc->ctype == HFC_TYPE_E1) { if (debug & DEBUG_HFCMULTI_MSG) printk(KERN_DEBUG "%s: HW_RESET_REQ no BRI\n", @@ -3129,7 +3220,7 @@ hfcm_l1callback(struct dchannel *dch, u_int cmd) case HW_DEACT_REQ: /* start deactivation */ spin_lock_irqsave(&hc->lock, flags); - if (hc->type == 1) { + if (hc->ctype == HFC_TYPE_E1) { if (debug & DEBUG_HFCMULTI_MSG) printk(KERN_DEBUG "%s: HW_DEACT_REQ no BRI\n", @@ -3163,7 +3254,7 @@ hfcm_l1callback(struct dchannel *dch, u_int cmd) break; case HW_POWERUP_REQ: spin_lock_irqsave(&hc->lock, flags); - if (hc->type == 1) { + if (hc->ctype == HFC_TYPE_E1) { if (debug & DEBUG_HFCMULTI_MSG) printk(KERN_DEBUG "%s: HW_POWERUP_REQ no BRI\n", @@ -3240,7 +3331,7 @@ handle_dmsg(struct mISDNchannel *ch, struct sk_buff *skb) __func__, hc->chan[dch->slot].port, hc->ports-1); /* start activation */ - if (hc->type == 1) { + if (hc->ctype == HFC_TYPE_E1) { ph_state_change(dch); if (debug & DEBUG_HFCMULTI_STATE) printk(KERN_DEBUG @@ -3273,7 +3364,7 @@ handle_dmsg(struct mISDNchannel *ch, struct sk_buff *skb) __func__, hc->chan[dch->slot].port, hc->ports-1); /* start deactivation */ - if (hc->type == 1) { + if (hc->ctype == HFC_TYPE_E1) { if (debug & DEBUG_HFCMULTI_MSG) printk(KERN_DEBUG "%s: PH_DEACTIVATE no BRI\n", @@ -3493,6 +3584,8 @@ channel_bctrl(struct bchannel *bch, struct mISDN_ctrl_req *cq) features->hfc_id = hc->id; if (test_bit(HFC_CHIP_DTMF, &hc->chip)) features->hfc_dtmf = 1; + if (test_bit(HFC_CHIP_CONF, &hc->chip)) + features->hfc_conf = 1; features->hfc_loops = 0; if (test_bit(HFC_CHIP_B410P, &hc->chip)) { features->hfc_echocanhw = 1; @@ -3630,7 +3723,7 @@ ph_state_change(struct dchannel *dch) hc = dch->hw; ch = dch->slot; - if (hc->type == 1) { + if (hc->ctype == HFC_TYPE_E1) { if (dch->dev.D.protocol == ISDN_P_TE_E1) { if (debug & DEBUG_HFCMULTI_STATE) printk(KERN_DEBUG @@ -3755,7 +3848,7 @@ hfcmulti_initmode(struct dchannel *dch) if (debug & DEBUG_HFCMULTI_INIT) printk(KERN_DEBUG "%s: entered\n", __func__); - if (hc->type == 1) { + if (hc->ctype == HFC_TYPE_E1) { hc->chan[hc->dslot].slot_tx = -1; hc->chan[hc->dslot].slot_rx = -1; hc->chan[hc->dslot].conf = -1; @@ -3904,6 +3997,11 @@ hfcmulti_initmode(struct dchannel *dch) } if (!test_bit(HFC_CFG_NONCAP_TX, &hc->chan[i].cfg)) hc->hw.a_st_ctrl0[pt] |= V_TX_LI; + if (hc->ctype == HFC_TYPE_XHFC) { + hc->hw.a_st_ctrl0[pt] |= 0x40 /* V_ST_PU_CTRL */; + HFC_outb(hc, 0x35 /* A_ST_CTRL3 */, + 0x7c << 1 /* V_ST_PULSE */); + } /* line setup */ HFC_outb(hc, A_ST_CTRL0, hc->hw.a_st_ctrl0[pt]); /* disable E-channel */ @@ -3990,7 +4088,7 @@ open_bchannel(struct hfc_multi *hc, struct dchannel *dch, return -EINVAL; if (rq->protocol == ISDN_P_NONE) return -EINVAL; - if (hc->type == 1) + if (hc->ctype == HFC_TYPE_E1) ch = rq->adr.channel; else ch = (rq->adr.channel - 1) + (dch->slot - 2); @@ -4081,7 +4179,7 @@ hfcm_dctrl(struct mISDNchannel *ch, u_int cmd, void *arg) switch (rq->protocol) { case ISDN_P_TE_S0: case ISDN_P_NT_S0: - if (hc->type == 1) { + if (hc->ctype == HFC_TYPE_E1) { err = -EINVAL; break; } @@ -4089,7 +4187,7 @@ hfcm_dctrl(struct mISDNchannel *ch, u_int cmd, void *arg) break; case ISDN_P_TE_E1: case ISDN_P_NT_E1: - if (hc->type != 1) { + if (hc->ctype != HFC_TYPE_E1) { err = -EINVAL; break; } @@ -4156,13 +4254,13 @@ init_card(struct hfc_multi *hc) disable_hwirq(hc); spin_unlock_irqrestore(&hc->lock, flags); - if (request_irq(hc->pci_dev->irq, hfcmulti_interrupt, IRQF_SHARED, + if (request_irq(hc->irq, hfcmulti_interrupt, IRQF_SHARED, "HFC-multi", hc)) { printk(KERN_WARNING "mISDN: Could not get interrupt %d.\n", - hc->pci_dev->irq); + hc->irq); + hc->irq = 0; return -EIO; } - hc->irq = hc->pci_dev->irq; if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) { spin_lock_irqsave(&plx_lock, plx_flags); @@ -4269,6 +4367,10 @@ setup_pci(struct hfc_multi *hc, struct pci_dev *pdev, hc->ledstate = 0xAFFEAFFE; hc->opticalsupport = m->opticalsupport; + hc->pci_iobase = 0; + hc->pci_membase = NULL; + hc->plx_membase = NULL; + /* set memory access methods */ if (m->io_mode) /* use mode from card config */ hc->io_mode = m->io_mode; @@ -4276,44 +4378,12 @@ setup_pci(struct hfc_multi *hc, struct pci_dev *pdev, case HFC_IO_MODE_PLXSD: test_and_set_bit(HFC_CHIP_PLXSD, &hc->chip); hc->slots = 128; /* required */ - /* fall through */ - case HFC_IO_MODE_PCIMEM: hc->HFC_outb = HFC_outb_pcimem; hc->HFC_inb = HFC_inb_pcimem; hc->HFC_inw = HFC_inw_pcimem; hc->HFC_wait = HFC_wait_pcimem; hc->read_fifo = read_fifo_pcimem; hc->write_fifo = write_fifo_pcimem; - break; - case HFC_IO_MODE_REGIO: - hc->HFC_outb = HFC_outb_regio; - hc->HFC_inb = HFC_inb_regio; - hc->HFC_inw = HFC_inw_regio; - hc->HFC_wait = HFC_wait_regio; - hc->read_fifo = read_fifo_regio; - hc->write_fifo = write_fifo_regio; - break; - default: - printk(KERN_WARNING "HFC-multi: Invalid IO mode.\n"); - pci_disable_device(hc->pci_dev); - return -EIO; - } - hc->HFC_outb_nodebug = hc->HFC_outb; - hc->HFC_inb_nodebug = hc->HFC_inb; - hc->HFC_inw_nodebug = hc->HFC_inw; - hc->HFC_wait_nodebug = hc->HFC_wait; -#ifdef HFC_REGISTER_DEBUG - hc->HFC_outb = HFC_outb_debug; - hc->HFC_inb = HFC_inb_debug; - hc->HFC_inw = HFC_inw_debug; - hc->HFC_wait = HFC_wait_debug; -#endif - hc->pci_iobase = 0; - hc->pci_membase = NULL; - hc->plx_membase = NULL; - - switch (hc->io_mode) { - case HFC_IO_MODE_PLXSD: hc->plx_origmembase = hc->pci_dev->resource[0].start; /* MEMBASE 1 is PLX PCI Bridge */ @@ -4361,6 +4431,12 @@ setup_pci(struct hfc_multi *hc, struct pci_dev *pdev, pci_write_config_word(hc->pci_dev, PCI_COMMAND, PCI_ENA_MEMIO); break; case HFC_IO_MODE_PCIMEM: + hc->HFC_outb = HFC_outb_pcimem; + hc->HFC_inb = HFC_inb_pcimem; + hc->HFC_inw = HFC_inw_pcimem; + hc->HFC_wait = HFC_wait_pcimem; + hc->read_fifo = read_fifo_pcimem; + hc->write_fifo = write_fifo_pcimem; hc->pci_origmembase = hc->pci_dev->resource[1].start; if (!hc->pci_origmembase) { printk(KERN_WARNING @@ -4377,12 +4453,18 @@ setup_pci(struct hfc_multi *hc, struct pci_dev *pdev, pci_disable_device(hc->pci_dev); return -EIO; } - printk(KERN_INFO "card %d: defined at MEMBASE %#lx (%#lx) IRQ %d " - "HZ %d leds-type %d\n", hc->id, (u_long)hc->pci_membase, + printk(KERN_INFO "card %d: defined at MEMBASE %#lx (%#lx) IRQ " + "%d HZ %d leds-type %d\n", hc->id, (u_long)hc->pci_membase, hc->pci_origmembase, hc->pci_dev->irq, HZ, hc->leds); pci_write_config_word(hc->pci_dev, PCI_COMMAND, PCI_ENA_MEMIO); break; case HFC_IO_MODE_REGIO: + hc->HFC_outb = HFC_outb_regio; + hc->HFC_inb = HFC_inb_regio; + hc->HFC_inw = HFC_inw_regio; + hc->HFC_wait = HFC_wait_regio; + hc->read_fifo = read_fifo_regio; + hc->write_fifo = write_fifo_regio; hc->pci_iobase = (u_int) hc->pci_dev->resource[0].start; if (!hc->pci_iobase) { printk(KERN_WARNING @@ -4464,7 +4546,7 @@ release_port(struct hfc_multi *hc, struct dchannel *dch) dch->timer.function = NULL; } - if (hc->type == 1) { /* E1 */ + if (hc->ctype == HFC_TYPE_E1) { /* E1 */ /* remove sync */ if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) { hc->syncronized = 0; @@ -4863,9 +4945,15 @@ init_multi_port(struct hfc_multi *hc, int pt) test_and_set_bit(HFC_CFG_DIS_ECHANNEL, &hc->chan[i + 2].cfg); } - snprintf(name, MISDN_MAX_IDLEN - 1, "hfc-%ds.%d-%d", - hc->type, HFC_cnt + 1, pt + 1); - ret = mISDN_register_device(&dch->dev, &hc->pci_dev->dev, name); + if (hc->ctype == HFC_TYPE_XHFC) { + snprintf(name, MISDN_MAX_IDLEN - 1, "xhfc.%d-%d", + HFC_cnt + 1, pt + 1); + ret = mISDN_register_device(&dch->dev, NULL, name); + } else { + snprintf(name, MISDN_MAX_IDLEN - 1, "hfc-%ds.%d-%d", + hc->ctype, HFC_cnt + 1, pt + 1); + ret = mISDN_register_device(&dch->dev, &hc->pci_dev->dev, name); + } if (ret) goto free_chan; hc->created[pt] = 1; @@ -4876,9 +4964,9 @@ free_chan: } static int -hfcmulti_init(struct pci_dev *pdev, const struct pci_device_id *ent) +hfcmulti_init(struct hm_map *m, struct pci_dev *pdev, + const struct pci_device_id *ent) { - struct hm_map *m = (struct hm_map *)ent->driver_data; int ret_err = 0; int pt; struct hfc_multi *hc; @@ -4913,16 +5001,18 @@ hfcmulti_init(struct pci_dev *pdev, const struct pci_device_id *ent) } spin_lock_init(&hc->lock); hc->mtyp = m; - hc->type = m->type; + hc->ctype = m->type; hc->ports = m->ports; hc->id = HFC_cnt; hc->pcm = pcm[HFC_cnt]; hc->io_mode = iomode[HFC_cnt]; - if (dslot[HFC_cnt] < 0 && hc->type == 1) { + if (dslot[HFC_cnt] < 0 && hc->ctype == HFC_TYPE_E1) { hc->dslot = 0; printk(KERN_INFO "HFC-E1 card has disabled D-channel, but " "31 B-channels\n"); - } if (dslot[HFC_cnt] > 0 && dslot[HFC_cnt] < 32 && hc->type == 1) { + } + if (dslot[HFC_cnt] > 0 && dslot[HFC_cnt] < 32 + && hc->ctype == HFC_TYPE_E1) { hc->dslot = dslot[HFC_cnt]; printk(KERN_INFO "HFC-E1 card has alternating D-channel on " "time slot %d\n", dslot[HFC_cnt]); @@ -4944,8 +5034,11 @@ hfcmulti_init(struct pci_dev *pdev, const struct pci_device_id *ent) for (i = 0; i < (poll >> 1); i++) hc->silence_data[i] = hc->silence; - if (!(type[HFC_cnt] & 0x200)) - test_and_set_bit(HFC_CHIP_DTMF, &hc->chip); + if (hc->ctype != HFC_TYPE_XHFC) { + if (!(type[HFC_cnt] & 0x200)) + test_and_set_bit(HFC_CHIP_DTMF, &hc->chip); + test_and_set_bit(HFC_CHIP_CONF, &hc->chip); + } if (type[HFC_cnt] & 0x800) test_and_set_bit(HFC_CHIP_PCM_SLAVE, &hc->chip); @@ -4969,8 +5062,18 @@ hfcmulti_init(struct pci_dev *pdev, const struct pci_device_id *ent) printk(KERN_NOTICE "Watchdog enabled\n"); } - /* setup pci, hc->slots may change due to PLXSD */ - ret_err = setup_pci(hc, pdev, ent); + if (pdev && ent) + /* setup pci, hc->slots may change due to PLXSD */ + ret_err = setup_pci(hc, pdev, ent); + else +#ifdef CONFIG_MISDN_HFCMULTI_8xx + ret_err = setup_embedded(hc, m); +#else + { + printk(KERN_WARNING "Embedded IO Mode not selected\n"); + ret_err = -EIO; + } +#endif if (ret_err) { if (hc == syncmaster) syncmaster = NULL; @@ -4978,7 +5081,17 @@ hfcmulti_init(struct pci_dev *pdev, const struct pci_device_id *ent) return ret_err; } - /* crate channels */ + hc->HFC_outb_nodebug = hc->HFC_outb; + hc->HFC_inb_nodebug = hc->HFC_inb; + hc->HFC_inw_nodebug = hc->HFC_inw; + hc->HFC_wait_nodebug = hc->HFC_wait; +#ifdef HFC_REGISTER_DEBUG + hc->HFC_outb = HFC_outb_debug; + hc->HFC_inb = HFC_inb_debug; + hc->HFC_inw = HFC_inw_debug; + hc->HFC_wait = HFC_wait_debug; +#endif + /* create channels */ for (pt = 0; pt < hc->ports; pt++) { if (Port_cnt >= MAX_PORTS) { printk(KERN_ERR "too many ports (max=%d).\n", @@ -4986,7 +5099,7 @@ hfcmulti_init(struct pci_dev *pdev, const struct pci_device_id *ent) ret_err = -EINVAL; goto free_card; } - if (hc->type == 1) + if (hc->ctype == HFC_TYPE_E1) ret_err = init_e1_port(hc, m); else ret_err = init_multi_port(hc, pt); @@ -5070,6 +5183,7 @@ hfcmulti_init(struct pci_dev *pdev, const struct pci_device_id *ent) hc->iclock = mISDN_register_clock("HFCMulti", 0, clockctl, hc); /* initialize hardware */ + hc->irq = (m->irq) ? : hc->pci_dev->irq; ret_err = init_card(hc); if (ret_err) { printk(KERN_ERR "init card returns %d\n", ret_err); @@ -5120,45 +5234,47 @@ static void __devexit hfc_remove_pci(struct pci_dev *pdev) #define VENDOR_PRIM "PrimuX" static const struct hm_map hfcm_map[] = { -/*0*/ {VENDOR_BN, "HFC-1S Card (mini PCI)", 4, 1, 1, 3, 0, DIP_4S, 0}, -/*1*/ {VENDOR_BN, "HFC-2S Card", 4, 2, 1, 3, 0, DIP_4S, 0}, -/*2*/ {VENDOR_BN, "HFC-2S Card (mini PCI)", 4, 2, 1, 3, 0, DIP_4S, 0}, -/*3*/ {VENDOR_BN, "HFC-4S Card", 4, 4, 1, 2, 0, DIP_4S, 0}, -/*4*/ {VENDOR_BN, "HFC-4S Card (mini PCI)", 4, 4, 1, 2, 0, 0, 0}, -/*5*/ {VENDOR_CCD, "HFC-4S Eval (old)", 4, 4, 0, 0, 0, 0, 0}, -/*6*/ {VENDOR_CCD, "HFC-4S IOB4ST", 4, 4, 1, 2, 0, DIP_4S, 0}, -/*7*/ {VENDOR_CCD, "HFC-4S", 4, 4, 1, 2, 0, 0, 0}, -/*8*/ {VENDOR_DIG, "HFC-4S Card", 4, 4, 0, 2, 0, 0, HFC_IO_MODE_REGIO}, -/*9*/ {VENDOR_CCD, "HFC-4S Swyx 4xS0 SX2 QuadBri", 4, 4, 1, 2, 0, 0, 0}, -/*10*/ {VENDOR_JH, "HFC-4S (junghanns 2.0)", 4, 4, 1, 2, 0, 0, 0}, -/*11*/ {VENDOR_PRIM, "HFC-2S Primux Card", 4, 2, 0, 0, 0, 0, 0}, - -/*12*/ {VENDOR_BN, "HFC-8S Card", 8, 8, 1, 0, 0, 0, 0}, +/*0*/ {VENDOR_BN, "HFC-1S Card (mini PCI)", 4, 1, 1, 3, 0, DIP_4S, 0, 0}, +/*1*/ {VENDOR_BN, "HFC-2S Card", 4, 2, 1, 3, 0, DIP_4S, 0, 0}, +/*2*/ {VENDOR_BN, "HFC-2S Card (mini PCI)", 4, 2, 1, 3, 0, DIP_4S, 0, 0}, +/*3*/ {VENDOR_BN, "HFC-4S Card", 4, 4, 1, 2, 0, DIP_4S, 0, 0}, +/*4*/ {VENDOR_BN, "HFC-4S Card (mini PCI)", 4, 4, 1, 2, 0, 0, 0, 0}, +/*5*/ {VENDOR_CCD, "HFC-4S Eval (old)", 4, 4, 0, 0, 0, 0, 0, 0}, +/*6*/ {VENDOR_CCD, "HFC-4S IOB4ST", 4, 4, 1, 2, 0, DIP_4S, 0, 0}, +/*7*/ {VENDOR_CCD, "HFC-4S", 4, 4, 1, 2, 0, 0, 0, 0}, +/*8*/ {VENDOR_DIG, "HFC-4S Card", 4, 4, 0, 2, 0, 0, HFC_IO_MODE_REGIO, 0}, +/*9*/ {VENDOR_CCD, "HFC-4S Swyx 4xS0 SX2 QuadBri", 4, 4, 1, 2, 0, 0, 0, 0}, +/*10*/ {VENDOR_JH, "HFC-4S (junghanns 2.0)", 4, 4, 1, 2, 0, 0, 0, 0}, +/*11*/ {VENDOR_PRIM, "HFC-2S Primux Card", 4, 2, 0, 0, 0, 0, 0, 0}, + +/*12*/ {VENDOR_BN, "HFC-8S Card", 8, 8, 1, 0, 0, 0, 0, 0}, /*13*/ {VENDOR_BN, "HFC-8S Card (+)", 8, 8, 1, 8, 0, DIP_8S, - HFC_IO_MODE_REGIO}, -/*14*/ {VENDOR_CCD, "HFC-8S Eval (old)", 8, 8, 0, 0, 0, 0, 0}, -/*15*/ {VENDOR_CCD, "HFC-8S IOB4ST Recording", 8, 8, 1, 0, 0, 0, 0}, + HFC_IO_MODE_REGIO, 0}, +/*14*/ {VENDOR_CCD, "HFC-8S Eval (old)", 8, 8, 0, 0, 0, 0, 0, 0}, +/*15*/ {VENDOR_CCD, "HFC-8S IOB4ST Recording", 8, 8, 1, 0, 0, 0, 0, 0}, -/*16*/ {VENDOR_CCD, "HFC-8S IOB8ST", 8, 8, 1, 0, 0, 0, 0}, -/*17*/ {VENDOR_CCD, "HFC-8S", 8, 8, 1, 0, 0, 0, 0}, -/*18*/ {VENDOR_CCD, "HFC-8S", 8, 8, 1, 0, 0, 0, 0}, +/*16*/ {VENDOR_CCD, "HFC-8S IOB8ST", 8, 8, 1, 0, 0, 0, 0, 0}, +/*17*/ {VENDOR_CCD, "HFC-8S", 8, 8, 1, 0, 0, 0, 0, 0}, +/*18*/ {VENDOR_CCD, "HFC-8S", 8, 8, 1, 0, 0, 0, 0, 0}, -/*19*/ {VENDOR_BN, "HFC-E1 Card", 1, 1, 0, 1, 0, DIP_E1, 0}, -/*20*/ {VENDOR_BN, "HFC-E1 Card (mini PCI)", 1, 1, 0, 1, 0, 0, 0}, -/*21*/ {VENDOR_BN, "HFC-E1+ Card (Dual)", 1, 1, 0, 1, 0, DIP_E1, 0}, -/*22*/ {VENDOR_BN, "HFC-E1 Card (Dual)", 1, 1, 0, 1, 0, DIP_E1, 0}, +/*19*/ {VENDOR_BN, "HFC-E1 Card", 1, 1, 0, 1, 0, DIP_E1, 0, 0}, +/*20*/ {VENDOR_BN, "HFC-E1 Card (mini PCI)", 1, 1, 0, 1, 0, 0, 0, 0}, +/*21*/ {VENDOR_BN, "HFC-E1+ Card (Dual)", 1, 1, 0, 1, 0, DIP_E1, 0, 0}, +/*22*/ {VENDOR_BN, "HFC-E1 Card (Dual)", 1, 1, 0, 1, 0, DIP_E1, 0, 0}, -/*23*/ {VENDOR_CCD, "HFC-E1 Eval (old)", 1, 1, 0, 0, 0, 0, 0}, -/*24*/ {VENDOR_CCD, "HFC-E1 IOB1E1", 1, 1, 0, 1, 0, 0, 0}, -/*25*/ {VENDOR_CCD, "HFC-E1", 1, 1, 0, 1, 0, 0, 0}, +/*23*/ {VENDOR_CCD, "HFC-E1 Eval (old)", 1, 1, 0, 0, 0, 0, 0, 0}, +/*24*/ {VENDOR_CCD, "HFC-E1 IOB1E1", 1, 1, 0, 1, 0, 0, 0, 0}, +/*25*/ {VENDOR_CCD, "HFC-E1", 1, 1, 0, 1, 0, 0, 0, 0}, /*26*/ {VENDOR_CCD, "HFC-4S Speech Design", 4, 4, 0, 0, 0, 0, - HFC_IO_MODE_PLXSD}, + HFC_IO_MODE_PLXSD, 0}, /*27*/ {VENDOR_CCD, "HFC-E1 Speech Design", 1, 1, 0, 0, 0, 0, - HFC_IO_MODE_PLXSD}, -/*28*/ {VENDOR_CCD, "HFC-4S OpenVox", 4, 4, 1, 0, 0, 0, 0}, -/*29*/ {VENDOR_CCD, "HFC-2S OpenVox", 4, 2, 1, 0, 0, 0, 0}, -/*30*/ {VENDOR_CCD, "HFC-8S OpenVox", 8, 8, 1, 0, 0, 0, 0}, + HFC_IO_MODE_PLXSD, 0}, +/*28*/ {VENDOR_CCD, "HFC-4S OpenVox", 4, 4, 1, 0, 0, 0, 0, 0}, +/*29*/ {VENDOR_CCD, "HFC-2S OpenVox", 4, 2, 1, 0, 0, 0, 0, 0}, +/*30*/ {VENDOR_CCD, "HFC-8S OpenVox", 8, 8, 1, 0, 0, 0, 0, 0}, +/*31*/ {VENDOR_CCD, "XHFC-4S Speech Design", 5, 4, 0, 0, 0, 0, + HFC_IO_MODE_EMBSD, XHFC_IRQ}, }; #undef H @@ -5265,7 +5381,7 @@ hfcmulti_probe(struct pci_dev *pdev, const struct pci_device_id *ent) "Please contact the driver maintainer for support.\n"); return -ENODEV; } - ret = hfcmulti_init(pdev, ent); + ret = hfcmulti_init(m, pdev, ent); if (ret) return ret; HFC_cnt++; @@ -5295,6 +5411,8 @@ static int __init HFCmulti_init(void) { int err; + int i, xhfc = 0; + struct hm_map m; printk(KERN_INFO "mISDN: HFC-multi driver %s\n", HFC_MULTI_VERSION); @@ -5342,11 +5460,43 @@ HFCmulti_init(void) if (!clock) clock = 1; + /* Register the embedded devices. + * This should be done before the PCI cards registration */ + switch (hwid) { + case HWID_MINIP4: + xhfc = 1; + m = hfcm_map[31]; + break; + case HWID_MINIP8: + xhfc = 2; + m = hfcm_map[31]; + break; + case HWID_MINIP16: + xhfc = 4; + m = hfcm_map[31]; + break; + default: + xhfc = 0; + } + + for (i = 0; i < xhfc; ++i) { + err = hfcmulti_init(&m, NULL, NULL); + if (err) { + printk(KERN_ERR "error registering embedded driver: " + "%x\n", err); + return -err; + } + HFC_cnt++; + printk(KERN_INFO "%d devices registered\n", HFC_cnt); + } + + /* Register the PCI cards */ err = pci_register_driver(&hfcmultipci_driver); if (err < 0) { printk(KERN_ERR "error registering pci driver: %x\n", err); return err; } + return 0; } diff --git a/drivers/isdn/mISDN/dsp_cmx.c b/drivers/isdn/mISDN/dsp_cmx.c index d19b4f6d7d8..05866184ba2 100644 --- a/drivers/isdn/mISDN/dsp_cmx.c +++ b/drivers/isdn/mISDN/dsp_cmx.c @@ -947,6 +947,10 @@ conf_software: if (current_conf >= 0) { join_members: list_for_each_entry(member, &conf->mlist, list) { + /* if no conference engine on our chip, change to + * software */ + if (!member->dsp->features.hfc_conf) + goto conf_software; /* in case of hdlc, change to software */ if (member->dsp->hdlc) goto conf_software; diff --git a/include/linux/mISDNdsp.h b/include/linux/mISDNdsp.h index 2c483d45f14..41d1eeb9b3b 100644 --- a/include/linux/mISDNdsp.h +++ b/include/linux/mISDNdsp.h @@ -25,6 +25,7 @@ extern void mISDN_dsp_element_unregister(struct mISDN_dsp_element *elem); struct dsp_features { int hfc_id; /* unique id to identify the chip (or -1) */ int hfc_dtmf; /* set if HFCmulti card supports dtmf */ + int hfc_conf; /* set if HFCmulti card supports conferences */ int hfc_loops; /* set if card supports tone loops */ int hfc_echocanhw; /* set if card supports echocancelation*/ int pcm_id; /* unique id to identify the pcm bus (or -1) */ -- cgit v1.2.3-70-g09d2 From 7245a2fe3c10ed7c2e9b1c8a83af5919c0cc0a89 Mon Sep 17 00:00:00 2001 From: Andreas Eversberg Date: Fri, 22 May 2009 11:04:55 +0000 Subject: mISDN: Add PCI ID for Junghanns 8S card new id for HFC-8S Signed-off-by: Andreas Eversberg Signed-off-by: Karsten Keil Signed-off-by: David S. Miller --- drivers/isdn/hardware/mISDN/hfcmulti.c | 3 +++ include/linux/pci_ids.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c index 5be4edf631d..50e9f4d88f4 100644 --- a/drivers/isdn/hardware/mISDN/hfcmulti.c +++ b/drivers/isdn/hardware/mISDN/hfcmulti.c @@ -5275,6 +5275,7 @@ static const struct hm_map hfcm_map[] = { /*30*/ {VENDOR_CCD, "HFC-8S OpenVox", 8, 8, 1, 0, 0, 0, 0, 0}, /*31*/ {VENDOR_CCD, "XHFC-4S Speech Design", 5, 4, 0, 0, 0, 0, HFC_IO_MODE_EMBSD, XHFC_IRQ}, +/*32*/ {VENDOR_JH, "HFC-8S (junghanns)", 8, 8, 1, 0, 0, 0, 0, 0}, }; #undef H @@ -5328,6 +5329,8 @@ static struct pci_device_id hfmultipci_ids[] __devinitdata = { PCI_SUBDEVICE_ID_CCD_HFC8S, 0, 0, H(18)}, /* 8S */ { PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC8S, PCI_VENDOR_ID_CCD, PCI_SUBDEVICE_ID_CCD_OV8S, 0, 0, H(30)}, /* OpenVox 8 */ + { PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC8S, PCI_VENDOR_ID_CCD, + PCI_SUBDEVICE_ID_CCD_JH8S, 0, 0, H(32)}, /* Junganns 8S */ /* Cards with HFC-E1 Chip */ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 3038ac25491..4bdff984ac9 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1914,6 +1914,7 @@ #define PCI_SUBDEVICE_ID_CCD_SWYX4S 0xB540 #define PCI_SUBDEVICE_ID_CCD_JH4S20 0xB550 #define PCI_SUBDEVICE_ID_CCD_IOB8ST_1 0xB552 +#define PCI_SUBDEVICE_ID_CCD_JH8S 0xB55B #define PCI_SUBDEVICE_ID_CCD_BN4S 0xB560 #define PCI_SUBDEVICE_ID_CCD_BN8S 0xB562 #define PCI_SUBDEVICE_ID_CCD_BNE1 0xB563 -- cgit v1.2.3-70-g09d2 From eac74af9b547e29c9634ed5eff4d514349e73310 Mon Sep 17 00:00:00 2001 From: Karsten Keil Date: Fri, 22 May 2009 11:04:56 +0000 Subject: mISDN: Cleanup debug messages This patch make debug printk's KERN_DEBUG and also fix some codestyle issues. Signed-off-by: Karsten Keil Signed-off-by: David S. Miller --- drivers/isdn/hardware/mISDN/hfcmulti.c | 159 +++++++++++++++++---------------- drivers/isdn/hardware/mISDN/hfcpci.c | 28 +++--- drivers/isdn/mISDN/dsp_audio.c | 5 +- drivers/isdn/mISDN/dsp_cmx.c | 7 +- drivers/isdn/mISDN/dsp_core.c | 14 +-- drivers/isdn/mISDN/dsp_ecdis.h | 2 +- drivers/isdn/mISDN/dsp_tones.c | 23 +++-- drivers/isdn/mISDN/l1oip_core.c | 30 +++---- drivers/isdn/mISDN/socket.c | 2 +- drivers/isdn/mISDN/tei.c | 1 - drivers/isdn/mISDN/timerdev.c | 2 +- include/linux/mISDNif.h | 14 +-- 12 files changed, 148 insertions(+), 139 deletions(-) (limited to 'include') diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c index 50e9f4d88f4..d60f5b7a41d 100644 --- a/drivers/isdn/hardware/mISDN/hfcmulti.c +++ b/drivers/isdn/hardware/mISDN/hfcmulti.c @@ -275,7 +275,7 @@ HFC_outb_pcimem(struct hfc_multi *hc, u_char reg, u_char val, HFC_outb_pcimem(struct hfc_multi *hc, u_char reg, u_char val) #endif { - writeb(val, (hc->pci_membase)+reg); + writeb(val, hc->pci_membase + reg); } static u_char #ifdef HFC_REGISTER_DEBUG @@ -284,7 +284,7 @@ HFC_inb_pcimem(struct hfc_multi *hc, u_char reg, const char *function, int line) HFC_inb_pcimem(struct hfc_multi *hc, u_char reg) #endif { - return readb((hc->pci_membase)+reg); + return readb(hc->pci_membase + reg); } static u_short #ifdef HFC_REGISTER_DEBUG @@ -293,7 +293,7 @@ HFC_inw_pcimem(struct hfc_multi *hc, u_char reg, const char *function, int line) HFC_inw_pcimem(struct hfc_multi *hc, u_char reg) #endif { - return readw((hc->pci_membase)+reg); + return readw(hc->pci_membase + reg); } static void #ifdef HFC_REGISTER_DEBUG @@ -302,7 +302,8 @@ HFC_wait_pcimem(struct hfc_multi *hc, const char *function, int line) HFC_wait_pcimem(struct hfc_multi *hc) #endif { - while (readb((hc->pci_membase)+R_STATUS) & V_BUSY); + while (readb(hc->pci_membase + R_STATUS) & V_BUSY) + cpu_relax(); } /* HFC_IO_MODE_REGIO */ @@ -314,7 +315,7 @@ HFC_outb_regio(struct hfc_multi *hc, u_char reg, u_char val, HFC_outb_regio(struct hfc_multi *hc, u_char reg, u_char val) #endif { - outb(reg, (hc->pci_iobase)+4); + outb(reg, hc->pci_iobase + 4); outb(val, hc->pci_iobase); } static u_char @@ -324,7 +325,7 @@ HFC_inb_regio(struct hfc_multi *hc, u_char reg, const char *function, int line) HFC_inb_regio(struct hfc_multi *hc, u_char reg) #endif { - outb(reg, (hc->pci_iobase)+4); + outb(reg, hc->pci_iobase + 4); return inb(hc->pci_iobase); } static u_short @@ -334,7 +335,7 @@ HFC_inw_regio(struct hfc_multi *hc, u_char reg, const char *function, int line) HFC_inw_regio(struct hfc_multi *hc, u_char reg) #endif { - outb(reg, (hc->pci_iobase)+4); + outb(reg, hc->pci_iobase + 4); return inw(hc->pci_iobase); } static void @@ -344,8 +345,9 @@ HFC_wait_regio(struct hfc_multi *hc, const char *function, int line) HFC_wait_regio(struct hfc_multi *hc) #endif { - outb(R_STATUS, (hc->pci_iobase)+4); - while (inb(hc->pci_iobase) & V_BUSY); + outb(R_STATUS, hc->pci_iobase + 4); + while (inb(hc->pci_iobase) & V_BUSY) + cpu_relax(); } #ifdef HFC_REGISTER_DEBUG @@ -364,14 +366,14 @@ HFC_outb_debug(struct hfc_multi *hc, u_char reg, u_char val, if (regname[0] == '\0') strcpy(regname, "register"); - bits[7] = '0'+(!!(val&1)); - bits[6] = '0'+(!!(val&2)); - bits[5] = '0'+(!!(val&4)); - bits[4] = '0'+(!!(val&8)); - bits[3] = '0'+(!!(val&16)); - bits[2] = '0'+(!!(val&32)); - bits[1] = '0'+(!!(val&64)); - bits[0] = '0'+(!!(val&128)); + bits[7] = '0' + (!!(val & 1)); + bits[6] = '0' + (!!(val & 2)); + bits[5] = '0' + (!!(val & 4)); + bits[4] = '0' + (!!(val & 8)); + bits[3] = '0' + (!!(val & 16)); + bits[2] = '0' + (!!(val & 32)); + bits[1] = '0' + (!!(val & 64)); + bits[0] = '0' + (!!(val & 128)); printk(KERN_DEBUG "HFC_outb(chip %d, %02x=%s, 0x%02x=%s); in %s() line %d\n", hc->id, reg, regname, val, bits, function, line); @@ -394,14 +396,14 @@ HFC_inb_debug(struct hfc_multi *hc, u_char reg, const char *function, int line) if (regname[0] == '\0') strcpy(regname, "register"); - bits[7] = '0'+(!!(val&1)); - bits[6] = '0'+(!!(val&2)); - bits[5] = '0'+(!!(val&4)); - bits[4] = '0'+(!!(val&8)); - bits[3] = '0'+(!!(val&16)); - bits[2] = '0'+(!!(val&32)); - bits[1] = '0'+(!!(val&64)); - bits[0] = '0'+(!!(val&128)); + bits[7] = '0' + (!!(val & 1)); + bits[6] = '0' + (!!(val & 2)); + bits[5] = '0' + (!!(val & 4)); + bits[4] = '0' + (!!(val & 8)); + bits[3] = '0' + (!!(val & 16)); + bits[2] = '0' + (!!(val & 32)); + bits[1] = '0' + (!!(val & 64)); + bits[0] = '0' + (!!(val & 128)); printk(KERN_DEBUG "HFC_inb(chip %d, %02x=%s) = 0x%02x=%s; in %s() line %d\n", hc->id, reg, regname, val, bits, function, line); @@ -481,6 +483,7 @@ write_fifo_pcimem(struct hfc_multi *hc, u_char *data, int len) len--; } } + /* read fifo data (REGIO) */ static void read_fifo_regio(struct hfc_multi *hc, u_char *data, int len) @@ -526,7 +529,6 @@ read_fifo_pcimem(struct hfc_multi *hc, u_char *data, int len) } } - static void enable_hwirq(struct hfc_multi *hc) { @@ -1011,7 +1013,7 @@ plxsd_checksync(struct hfc_multi *hc, int rm) if (hc->syncronized) { if (syncmaster == NULL) { if (debug & DEBUG_HFCMULTI_PLXSD) - printk(KERN_WARNING "%s: GOT sync on card %d" + printk(KERN_DEBUG "%s: GOT sync on card %d" " (id=%d)\n", __func__, hc->id + 1, hc->id); hfcmulti_resync(hc, hc, rm); @@ -1019,7 +1021,7 @@ plxsd_checksync(struct hfc_multi *hc, int rm) } else { if (syncmaster == hc) { if (debug & DEBUG_HFCMULTI_PLXSD) - printk(KERN_WARNING "%s: LOST sync on card %d" + printk(KERN_DEBUG "%s: LOST sync on card %d" " (id=%d)\n", __func__, hc->id + 1, hc->id); hfcmulti_resync(hc, NULL, rm); @@ -1068,7 +1070,7 @@ release_io_hfcmulti(struct hfc_multi *hc) pv &= ~PLX_DSP_RES_N; writel(pv, plx_acc_32); if (debug & DEBUG_HFCMULTI_INIT) - printk(KERN_WARNING "%s: PCM off: PLX_GPIO=%x\n", + printk(KERN_DEBUG "%s: PCM off: PLX_GPIO=%x\n", __func__, pv); spin_unlock_irqrestore(&plx_lock, plx_flags); } @@ -1119,8 +1121,8 @@ init_chip(struct hfc_multi *hc) if (debug & DEBUG_HFCMULTI_INIT) printk(KERN_DEBUG "%s: entered\n", __func__); val = HFC_inb(hc, R_CHIP_ID); - if ((val>>4) != 0x8 && (val>>4) != 0xc && (val>>4) != 0xe - && (val>>1) != 0x31) { + if ((val >> 4) != 0x8 && (val >> 4) != 0xc && (val >> 4) != 0xe && + (val >> 1) != 0x31) { printk(KERN_INFO "HFC_multi: unknown CHIP_ID:%x\n", (u_int)val); err = -EIO; goto out; @@ -1202,7 +1204,7 @@ init_chip(struct hfc_multi *hc) writel(pv, plx_acc_32); spin_unlock_irqrestore(&plx_lock, plx_flags); if (debug & DEBUG_HFCMULTI_INIT) - printk(KERN_WARNING "%s: slave/term: PLX_GPIO=%x\n", + printk(KERN_DEBUG "%s: slave/term: PLX_GPIO=%x\n", __func__, pv); /* * If we are the 3rd PLXSD card or higher, we must turn @@ -1230,8 +1232,9 @@ init_chip(struct hfc_multi *hc) writel(pv, plx_acc_32); spin_unlock_irqrestore(&plx_lock, plx_flags); if (debug & DEBUG_HFCMULTI_INIT) - printk(KERN_WARNING "%s: term off: PLX_GPIO=%x\n", - __func__, pv); + printk(KERN_DEBUG + "%s: term off: PLX_GPIO=%x\n", + __func__, pv); } spin_unlock_irqrestore(&HFClock, hfc_flags); hc->hw.r_pcm_md0 = V_F0_LEN; /* shift clock for DSP */ @@ -1292,13 +1295,13 @@ init_chip(struct hfc_multi *hc) pv |= PLX_MASTER_EN | PLX_SLAVE_EN_N; pv |= PLX_SYNC_O_EN; if (debug & DEBUG_HFCMULTI_INIT) - printk(KERN_WARNING "%s: master: PLX_GPIO=%x\n", + printk(KERN_DEBUG "%s: master: PLX_GPIO=%x\n", __func__, pv); } else { pv &= ~(PLX_MASTER_EN | PLX_SLAVE_EN_N); pv &= ~PLX_SYNC_O_EN; if (debug & DEBUG_HFCMULTI_INIT) - printk(KERN_WARNING "%s: slave: PLX_GPIO=%x\n", + printk(KERN_DEBUG "%s: slave: PLX_GPIO=%x\n", __func__, pv); } writel(pv, plx_acc_32); @@ -1410,8 +1413,8 @@ controller_fail: writel(pv, plx_acc_32); spin_unlock_irqrestore(&plx_lock, plx_flags); if (debug & DEBUG_HFCMULTI_INIT) - printk(KERN_WARNING "%s: master: PLX_GPIO" - "=%x\n", __func__, pv); + printk(KERN_DEBUG "%s: master: " + "PLX_GPIO=%x\n", __func__, pv); } hc->hw.r_pcm_md0 |= V_PCM_MD; HFC_outb(hc, R_PCM_MD0, hc->hw.r_pcm_md0 | 0x00); @@ -1445,7 +1448,7 @@ controller_fail: writel(pv, plx_acc_32); spin_unlock_irqrestore(&plx_lock, plx_flags); if (debug & DEBUG_HFCMULTI_INIT) - printk(KERN_WARNING "%s: reset off: PLX_GPIO=%x\n", + printk(KERN_DEBUG "%s: reset off: PLX_GPIO=%x\n", __func__, pv); } @@ -1878,7 +1881,7 @@ hfcmulti_dtmf(struct hfc_multi *hc) hc->chan[ch].coeff_count = 0; skb = mI_alloc_skb(512, GFP_ATOMIC); if (!skb) { - printk(KERN_WARNING "%s: No memory for skb\n", + printk(KERN_DEBUG "%s: No memory for skb\n", __func__); continue; } @@ -2104,7 +2107,7 @@ next_frame: printk(KERN_DEBUG "%s(card %d): fifo(%d) has %d bytes space " "left (z1=%04x, z2=%04x) sending %d of %d bytes %s\n", __func__, hc->id + 1, ch, Zspace, z1, z2, ii-i, len-i, - temp ? "HDLC":"TRANS"); + temp ? "HDLC" : "TRANS"); /* Have to prep the audio data */ hc->write_fifo(hc, d, ii - i); @@ -2780,13 +2783,13 @@ hfcmulti_interrupt(int intno, void *dev_id) handle_timer_irq(hc); } - if (r_irq_misc & V_DTMF_IRQ) { + if (r_irq_misc & V_DTMF_IRQ) hfcmulti_dtmf(hc); - } + if (r_irq_misc & V_IRQ_PROC) { static int irq_proc_cnt; if (!irq_proc_cnt++) - printk(KERN_WARNING "%s: got V_IRQ_PROC -" + printk(KERN_DEBUG "%s: got V_IRQ_PROC -" " this should not happen\n", __func__); } @@ -2936,7 +2939,7 @@ mode_hfcmulti(struct hfc_multi *hc, int ch, int protocol, int slot_tx, else flow_rx = 0xc0; /* ST->(FIFO,PCM) */ /* put on slot */ - routing = bank_rx?0x80:0xc0; /* reversed */ + routing = bank_rx ? 0x80 : 0xc0; /* reversed */ if (conf >= 0 || bank_rx > 1) routing = 0x40; /* loop */ if (debug & DEBUG_HFCMULTI_MODE) @@ -2971,7 +2974,7 @@ mode_hfcmulti(struct hfc_multi *hc, int ch, int protocol, int slot_tx, HFC_wait(hc); if (hc->chan[ch].bch && hc->ctype != HFC_TYPE_E1) { hc->hw.a_st_ctrl0[hc->chan[ch].port] &= - ((ch & 0x3) == 0)? ~V_B1_EN: ~V_B2_EN; + ((ch & 0x3) == 0) ? ~V_B1_EN : ~V_B2_EN; HFC_outb(hc, R_ST_SEL, hc->chan[ch].port); /* undocumented: delay after R_ST_SEL */ udelay(1); @@ -3505,9 +3508,9 @@ handle_bmsg(struct mISDNchannel *ch, struct sk_buff *skb) switch (hh->id) { case HFC_SPL_LOOP_ON: /* set sample loop */ if (debug & DEBUG_HFCMULTI_MSG) - printk(KERN_DEBUG - "%s: HFC_SPL_LOOP_ON (len = %d)\n", - __func__, skb->len); + printk(KERN_DEBUG + "%s: HFC_SPL_LOOP_ON (len = %d)\n", + __func__, skb->len); ret = 0; break; case HFC_SPL_LOOP_OFF: /* set silence */ @@ -3716,8 +3719,7 @@ ph_state_change(struct dchannel *dch) int ch, i; if (!dch) { - printk(KERN_WARNING "%s: ERROR given dch is NULL\n", - __func__); + printk(KERN_WARNING "%s: ERROR given dch is NULL\n", __func__); return; } hc = dch->hw; @@ -3738,14 +3740,15 @@ ph_state_change(struct dchannel *dch) switch (dch->state) { case (1): if (hc->e1_state != 1) { - for (i = 1; i <= 31; i++) { - /* reset fifos on e1 activation */ - HFC_outb_nodebug(hc, R_FIFO, (i << 1) | 1); - HFC_wait_nodebug(hc); - HFC_outb_nodebug(hc, - R_INC_RES_FIFO, V_RES_F); - HFC_wait_nodebug(hc); - } + for (i = 1; i <= 31; i++) { + /* reset fifos on e1 activation */ + HFC_outb_nodebug(hc, R_FIFO, + (i << 1) | 1); + HFC_wait_nodebug(hc); + HFC_outb_nodebug(hc, R_INC_RES_FIFO, + V_RES_F); + HFC_wait_nodebug(hc); + } } test_and_set_bit(FLG_ACTIVE, &dch->Flags); _queue_data(&dch->dev.D, PH_ACTIVATE_IND, @@ -4045,12 +4048,12 @@ open_dchannel(struct hfc_multi *hc, struct dchannel *dch, return -EINVAL; if ((dch->dev.D.protocol != ISDN_P_NONE) && (dch->dev.D.protocol != rq->protocol)) { - if (debug & DEBUG_HFCMULTI_MODE) - printk(KERN_WARNING "%s: change protocol %x to %x\n", - __func__, dch->dev.D.protocol, rq->protocol); + if (debug & DEBUG_HFCMULTI_MODE) + printk(KERN_DEBUG "%s: change protocol %x to %x\n", + __func__, dch->dev.D.protocol, rq->protocol); } - if ((dch->dev.D.protocol == ISDN_P_TE_S0) - && (rq->protocol != ISDN_P_TE_S0)) + if ((dch->dev.D.protocol == ISDN_P_TE_S0) && + (rq->protocol != ISDN_P_TE_S0)) l1_event(dch->l1, CLOSE_CHANNEL); if (dch->dev.D.protocol != rq->protocol) { if (rq->protocol == ISDN_P_TE_S0) { @@ -4127,9 +4130,9 @@ channel_dctrl(struct dchannel *dch, struct mISDN_ctrl_req *cq) wd_cnt = cq->p1 & 0xf; wd_mode = !!(cq->p1 >> 4); if (debug & DEBUG_HFCMULTI_MSG) - printk(KERN_DEBUG - "%s: MISDN_CTRL_HFC_WD_INIT mode %s counter 0x%x\n", - __func__, wd_mode ? "AUTO" : "MANUAL", wd_cnt); + printk(KERN_DEBUG "%s: MISDN_CTRL_HFC_WD_INIT mode %s" + ", counter 0x%x\n", __func__, + wd_mode ? "AUTO" : "MANUAL", wd_cnt); /* set the watchdog timer */ HFC_outb(hc, R_TI_WD, poll_timer | (wd_cnt << 4)); hc->hw.r_bert_wd_md = (wd_mode ? V_AUTO_WD_RES : 0); @@ -4139,8 +4142,8 @@ channel_dctrl(struct dchannel *dch, struct mISDN_ctrl_req *cq) HFC_outb(hc, R_BERT_WD_MD, hc->hw.r_bert_wd_md | V_WD_RES); if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) { /* enable the watchdog output for Speech-Design */ - HFC_outb(hc, R_GPIO_SEL, V_GPIO_SEL7); - HFC_outb(hc, R_GPIO_EN1, V_GPIO_EN15); + HFC_outb(hc, R_GPIO_SEL, V_GPIO_SEL7); + HFC_outb(hc, R_GPIO_EN1, V_GPIO_EN15); HFC_outb(hc, R_GPIO_OUT1, 0); HFC_outb(hc, R_GPIO_OUT1, V_GPIO_OUT15); } @@ -4319,7 +4322,7 @@ error: } if (debug & DEBUG_HFCMULTI_INIT) - printk(KERN_WARNING "%s: free irq %d\n", __func__, hc->irq); + printk(KERN_DEBUG "%s: free irq %d\n", __func__, hc->irq); if (hc->irq) { free_irq(hc->irq, hc); hc->irq = 0; @@ -4624,7 +4627,7 @@ release_card(struct hfc_multi *hc) int ch; if (debug & DEBUG_HFCMULTI_INIT) - printk(KERN_WARNING "%s: release card (%d) entered\n", + printk(KERN_DEBUG "%s: release card (%d) entered\n", __func__, hc->id); /* unregister clock source */ @@ -4653,7 +4656,7 @@ release_card(struct hfc_multi *hc) /* release hardware & irq */ if (hc->irq) { if (debug & DEBUG_HFCMULTI_INIT) - printk(KERN_WARNING "%s: free irq %d\n", + printk(KERN_DEBUG "%s: free irq %d\n", __func__, hc->irq); free_irq(hc->irq, hc); hc->irq = 0; @@ -4662,17 +4665,17 @@ release_card(struct hfc_multi *hc) release_io_hfcmulti(hc); if (debug & DEBUG_HFCMULTI_INIT) - printk(KERN_WARNING "%s: remove instance from list\n", + printk(KERN_DEBUG "%s: remove instance from list\n", __func__); list_del(&hc->list); if (debug & DEBUG_HFCMULTI_INIT) - printk(KERN_WARNING "%s: delete instance\n", __func__); + printk(KERN_DEBUG "%s: delete instance\n", __func__); if (hc == syncmaster) syncmaster = NULL; kfree(hc); if (debug & DEBUG_HFCMULTI_INIT) - printk(KERN_WARNING "%s: card successfully removed\n", + printk(KERN_DEBUG "%s: card successfully removed\n", __func__); } @@ -4695,7 +4698,7 @@ init_e1_port(struct hfc_multi *hc, struct hm_map *m) (1 << (ISDN_P_B_HDLC & ISDN_P_B_MASK)); dch->dev.D.send = handle_dmsg; dch->dev.D.ctrl = hfcm_dctrl; - dch->dev.nrbchan = (hc->dslot)?30:31; + dch->dev.nrbchan = (hc->dslot) ? 30 : 31; dch->slot = hc->dslot; hc->chan[hc->dslot].dch = dch; hc->chan[hc->dslot].port = 0; @@ -4937,7 +4940,7 @@ init_multi_port(struct hfc_multi *hc, int pt) } /* disable E-channel */ if (port[Port_cnt] & 0x004) { - if (debug & DEBUG_HFCMULTI_INIT) + if (debug & DEBUG_HFCMULTI_INIT) printk(KERN_DEBUG "%s: PROTOCOL disable E-channel: " "card(%d) port(%d)\n", @@ -5222,7 +5225,7 @@ static void __devexit hfc_remove_pci(struct pci_dev *pdev) spin_unlock_irqrestore(&HFClock, flags); } else { if (debug) - printk(KERN_WARNING "%s: drvdata allready removed\n", + printk(KERN_DEBUG "%s: drvdata allready removed\n", __func__); } } diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c index 60dc92562c6..776afc8c927 100644 --- a/drivers/isdn/hardware/mISDN/hfcpci.c +++ b/drivers/isdn/hardware/mISDN/hfcpci.c @@ -257,7 +257,7 @@ reset_hfcpci(struct hfc_pci *hc) Write_hfc(hc, HFCPCI_INT_M1, hc->hw.int_m1); /* Clear already pending ints */ - if (Read_hfc(hc, HFCPCI_INT_S1)); + val = Read_hfc(hc, HFCPCI_INT_S1); /* set NT/TE mode */ hfcpci_setmode(hc); @@ -499,7 +499,8 @@ receive_dmsg(struct hfc_pci *hc) df->f2 = ((df->f2 + 1) & MAX_D_FRAMES) | (MAX_D_FRAMES + 1); /* next buffer */ df->za[df->f2 & D_FREG_MASK].z2 = - cpu_to_le16((le16_to_cpu(zp->z2) + rcnt) & (D_FIFO_SIZE - 1)); + cpu_to_le16((le16_to_cpu(zp->z2) + rcnt) & + (D_FIFO_SIZE - 1)); } else { dch->rx_skb = mI_alloc_skb(rcnt - 3, GFP_ATOMIC); if (!dch->rx_skb) { @@ -966,6 +967,7 @@ static void ph_state_nt(struct dchannel *dch) { struct hfc_pci *hc = dch->hw; + u_char val; if (dch->debug) printk(KERN_DEBUG "%s: NT newstate %x\n", @@ -979,7 +981,7 @@ ph_state_nt(struct dchannel *dch) hc->hw.int_m1 &= ~HFCPCI_INTS_TIMER; Write_hfc(hc, HFCPCI_INT_M1, hc->hw.int_m1); /* Clear already pending ints */ - if (Read_hfc(hc, HFCPCI_INT_S1)); + val = Read_hfc(hc, HFCPCI_INT_S1); Write_hfc(hc, HFCPCI_STATES, 4 | HFCPCI_LOAD_STATE); udelay(10); Write_hfc(hc, HFCPCI_STATES, 4); @@ -1268,8 +1270,7 @@ mode_hfcpci(struct bchannel *bch, int bc, int protocol) rx_slot = (bc>>8) & 0xff; tx_slot = (bc>>16) & 0xff; bc = bc & 0xff; - } else if (test_bit(HFC_CFG_PCM, &hc->cfg) && - (protocol > ISDN_P_NONE)) + } else if (test_bit(HFC_CFG_PCM, &hc->cfg) && (protocol > ISDN_P_NONE)) printk(KERN_WARNING "%s: no pcm channel id but HFC_CFG_PCM\n", __func__); if (hc->chanlimit > 1) { @@ -1327,8 +1328,8 @@ mode_hfcpci(struct bchannel *bch, int bc, int protocol) case (ISDN_P_B_RAW): bch->state = protocol; bch->nr = bc; - hfcpci_clear_fifo_rx(hc, (fifo2 & 2)?1:0); - hfcpci_clear_fifo_tx(hc, (fifo2 & 2)?1:0); + hfcpci_clear_fifo_rx(hc, (fifo2 & 2) ? 1 : 0); + hfcpci_clear_fifo_tx(hc, (fifo2 & 2) ? 1 : 0); if (bc & 2) { hc->hw.sctrl |= SCTRL_B2_ENA; hc->hw.sctrl_r |= SCTRL_B2_ENA; @@ -1362,8 +1363,8 @@ mode_hfcpci(struct bchannel *bch, int bc, int protocol) case (ISDN_P_B_HDLC): bch->state = protocol; bch->nr = bc; - hfcpci_clear_fifo_rx(hc, (fifo2 & 2)?1:0); - hfcpci_clear_fifo_tx(hc, (fifo2 & 2)?1:0); + hfcpci_clear_fifo_rx(hc, (fifo2 & 2) ? 1 : 0); + hfcpci_clear_fifo_tx(hc, (fifo2 & 2) ? 1 : 0); if (bc & 2) { hc->hw.sctrl |= SCTRL_B2_ENA; hc->hw.sctrl_r |= SCTRL_B2_ENA; @@ -1457,7 +1458,7 @@ set_hfcpci_rxtest(struct bchannel *bch, int protocol, int chan) switch (protocol) { case (ISDN_P_B_RAW): bch->state = protocol; - hfcpci_clear_fifo_rx(hc, (chan & 2)?1:0); + hfcpci_clear_fifo_rx(hc, (chan & 2) ? 1 : 0); if (chan & 2) { hc->hw.sctrl_r |= SCTRL_B2_ENA; hc->hw.fifo_en |= HFCPCI_FIFOEN_B2RX; @@ -1482,7 +1483,7 @@ set_hfcpci_rxtest(struct bchannel *bch, int protocol, int chan) break; case (ISDN_P_B_HDLC): bch->state = protocol; - hfcpci_clear_fifo_rx(hc, (chan & 2)?1:0); + hfcpci_clear_fifo_rx(hc, (chan & 2) ? 1 : 0); if (chan & 2) { hc->hw.sctrl_r |= SCTRL_B2_ENA; hc->hw.last_bfifo_cnt[1] = 0; @@ -2047,7 +2048,8 @@ setup_hw(struct hfc_pci *hc) printk(KERN_WARNING "HFC-PCI: No IRQ for PCI card found\n"); return 1; } - hc->hw.pci_io = (char __iomem *)(unsigned long)hc->pdev->resource[1].start; + hc->hw.pci_io = + (char __iomem *)(unsigned long)hc->pdev->resource[1].start; if (!hc->hw.pci_io) { printk(KERN_WARNING "HFC-PCI: No IO-Mem for PCI card found\n"); @@ -2289,7 +2291,7 @@ hfc_remove_pci(struct pci_dev *pdev) release_card(card); else if (debug) - printk(KERN_WARNING "%s: drvdata already removed\n", + printk(KERN_DEBUG "%s: drvdata already removed\n", __func__); } diff --git a/drivers/isdn/mISDN/dsp_audio.c b/drivers/isdn/mISDN/dsp_audio.c index de3795e3f43..9c7c6451bf3 100644 --- a/drivers/isdn/mISDN/dsp_audio.c +++ b/drivers/isdn/mISDN/dsp_audio.c @@ -210,9 +210,8 @@ dsp_audio_generate_seven(void) j = 0; for (k = 0; k < 256; k++) { if (dsp_audio_alaw_to_s32[k] - < dsp_audio_alaw_to_s32[i]) { - j++; - } + < dsp_audio_alaw_to_s32[i]) + j++; } sorted_alaw[j] = i; } diff --git a/drivers/isdn/mISDN/dsp_cmx.c b/drivers/isdn/mISDN/dsp_cmx.c index 05866184ba2..9c7c0d1ba55 100644 --- a/drivers/isdn/mISDN/dsp_cmx.c +++ b/drivers/isdn/mISDN/dsp_cmx.c @@ -238,7 +238,7 @@ dsp_cmx_add_conf_member(struct dsp *dsp, struct dsp_conf *conf) member = kzalloc(sizeof(struct dsp_conf_member), GFP_ATOMIC); if (!member) { - printk(KERN_ERR "kmalloc struct dsp_conf_member failed\n"); + printk(KERN_ERR "kzalloc struct dsp_conf_member failed\n"); return -ENOMEM; } member->dsp = dsp; @@ -317,7 +317,7 @@ static struct dsp_conf conf = kzalloc(sizeof(struct dsp_conf), GFP_ATOMIC); if (!conf) { - printk(KERN_ERR "kmalloc struct dsp_conf failed\n"); + printk(KERN_ERR "kzalloc struct dsp_conf failed\n"); return NULL; } INIT_LIST_HEAD(&conf->mlist); @@ -1389,7 +1389,8 @@ dsp_cmx_send_member(struct dsp *dsp, int len, s32 *c, int members) while (r != rr && t != tt) { #ifdef CMX_TX_DEBUG if (strlen(debugbuf) < 48) - sprintf(debugbuf+strlen(debugbuf), " %02x", p[t]); + sprintf(debugbuf+strlen(debugbuf), " %02x", + p[t]); #endif *d++ = p[t]; /* write tx_buff */ t = (t+1) & CMX_BUFF_MASK; diff --git a/drivers/isdn/mISDN/dsp_core.c b/drivers/isdn/mISDN/dsp_core.c index a0e0af81eb2..c35750647c6 100644 --- a/drivers/isdn/mISDN/dsp_core.c +++ b/drivers/isdn/mISDN/dsp_core.c @@ -280,7 +280,7 @@ dsp_fill_empty(struct dsp *dsp) static int dsp_control_req(struct dsp *dsp, struct mISDNhead *hh, struct sk_buff *skb) { - struct sk_buff *nskb; + struct sk_buff *nskb; int ret = 0; int cont; u8 *data; @@ -558,7 +558,7 @@ tone_off: dsp->pipeline.inuse = 1; dsp_cmx_hardware(dsp->conf, dsp); ret = dsp_pipeline_build(&dsp->pipeline, - len > 0 ? (char *)data : NULL); + len > 0 ? data : NULL); dsp_cmx_hardware(dsp->conf, dsp); dsp_rx_off(dsp); } @@ -720,7 +720,7 @@ dsp_function(struct mISDNchannel *ch, struct sk_buff *skb) /* check if dtmf soft decoding is turned on */ if (dsp->dtmf.software) { digits = dsp_dtmf_goertzel_decode(dsp, skb->data, - skb->len, (dsp_options&DSP_OPT_ULAW)?1:0); + skb->len, (dsp_options&DSP_OPT_ULAW) ? 1 : 0); } /* we need to process receive data if software */ if (dsp->conf && dsp->conf->software) { @@ -952,7 +952,7 @@ dsp_ctrl(struct mISDNchannel *ch, u_int cmd, void *arg) int err = 0; if (debug & DEBUG_DSP_CTRL) - printk(KERN_DEBUG "%s:(%x)\n", __func__, cmd); + printk(KERN_DEBUG "%s:(%x)\n", __func__, cmd); switch (cmd) { case OPEN_CHANNEL: @@ -1175,9 +1175,9 @@ static int dsp_init(void) /* init conversion tables */ dsp_audio_generate_law_tables(); - dsp_silence = (dsp_options&DSP_OPT_ULAW)?0xff:0x2a; - dsp_audio_law_to_s32 = (dsp_options&DSP_OPT_ULAW)?dsp_audio_ulaw_to_s32: - dsp_audio_alaw_to_s32; + dsp_silence = (dsp_options&DSP_OPT_ULAW) ? 0xff : 0x2a; + dsp_audio_law_to_s32 = (dsp_options&DSP_OPT_ULAW) ? + dsp_audio_ulaw_to_s32 : dsp_audio_alaw_to_s32; dsp_audio_generate_s2law_table(); dsp_audio_generate_seven(); dsp_audio_generate_mix_table(); diff --git a/drivers/isdn/mISDN/dsp_ecdis.h b/drivers/isdn/mISDN/dsp_ecdis.h index 8a20af43308..21dbd153ee2 100644 --- a/drivers/isdn/mISDN/dsp_ecdis.h +++ b/drivers/isdn/mISDN/dsp_ecdis.h @@ -91,7 +91,7 @@ int16_t amp) && det->tone_cycle_duration <= 475*8) { det->good_cycles++; if (det->good_cycles > 2) - det->hit = TRUE; + det->hit = TRUE; } det->tone_cycle_duration = 0; } diff --git a/drivers/isdn/mISDN/dsp_tones.c b/drivers/isdn/mISDN/dsp_tones.c index 7a9af66f4b1..1debf53670d 100644 --- a/drivers/isdn/mISDN/dsp_tones.c +++ b/drivers/isdn/mISDN/dsp_tones.c @@ -253,18 +253,24 @@ static struct pattern { {8000, 0, 0, 0, 0, 0, 0, 0, 0, 0} }, {TONE_GERMAN_DIALPBX, - {DATA_GA, DATA_S, DATA_GA, DATA_S, DATA_GA, DATA_S, NULL, NULL, NULL, NULL}, - {SIZE_GA, SIZE_S, SIZE_GA, SIZE_S, SIZE_GA, SIZE_S, NULL, NULL, NULL, NULL}, + {DATA_GA, DATA_S, DATA_GA, DATA_S, DATA_GA, DATA_S, NULL, NULL, NULL, + NULL}, + {SIZE_GA, SIZE_S, SIZE_GA, SIZE_S, SIZE_GA, SIZE_S, NULL, NULL, NULL, + NULL}, {2000, 2000, 2000, 2000, 2000, 12000, 0, 0, 0, 0} }, {TONE_GERMAN_OLDDIALPBX, - {DATA_GO, DATA_S, DATA_GO, DATA_S, DATA_GO, DATA_S, NULL, NULL, NULL, NULL}, - {SIZE_GO, SIZE_S, SIZE_GO, SIZE_S, SIZE_GO, SIZE_S, NULL, NULL, NULL, NULL}, + {DATA_GO, DATA_S, DATA_GO, DATA_S, DATA_GO, DATA_S, NULL, NULL, NULL, + NULL}, + {SIZE_GO, SIZE_S, SIZE_GO, SIZE_S, SIZE_GO, SIZE_S, NULL, NULL, NULL, + NULL}, {2000, 2000, 2000, 2000, 2000, 12000, 0, 0, 0, 0} }, {TONE_AMERICAN_DIALPBX, - {DATA_DT, DATA_S, DATA_DT, DATA_S, DATA_DT, DATA_S, NULL, NULL, NULL, NULL}, - {SIZE_DT, SIZE_S, SIZE_DT, SIZE_S, SIZE_DT, SIZE_S, NULL, NULL, NULL, NULL}, + {DATA_DT, DATA_S, DATA_DT, DATA_S, DATA_DT, DATA_S, NULL, NULL, NULL, + NULL}, + {SIZE_DT, SIZE_S, SIZE_DT, SIZE_S, SIZE_DT, SIZE_S, NULL, NULL, NULL, + NULL}, {2000, 2000, 2000, 2000, 2000, 12000, 0, 0, 0, 0} }, {TONE_GERMAN_RINGING, @@ -434,7 +440,7 @@ dsp_tone_hw_message(struct dsp *dsp, u8 *sample, int len) /* unlocking is not required, because we don't expect a response */ nskb = _alloc_mISDN_skb(PH_CONTROL_REQ, - (len)?HFC_SPL_LOOP_ON:HFC_SPL_LOOP_OFF, len, sample, + (len) ? HFC_SPL_LOOP_ON : HFC_SPL_LOOP_OFF, len, sample, GFP_ATOMIC); if (nskb) { if (dsp->ch.peer) { @@ -498,8 +504,7 @@ dsp_tone(struct dsp *dsp, int tone) /* we turn off the tone */ if (!tone) { - if (dsp->features.hfc_loops) - if (timer_pending(&tonet->tl)) + if (dsp->features.hfc_loops && timer_pending(&tonet->tl)) del_timer(&tonet->tl); if (dsp->features.hfc_loops) dsp_tone_hw_message(dsp, NULL, 0); diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c index ea3c3aa2004..d9cf83b17e3 100644 --- a/drivers/isdn/mISDN/l1oip_core.c +++ b/drivers/isdn/mISDN/l1oip_core.c @@ -308,8 +308,8 @@ l1oip_socket_send(struct l1oip *hc, u8 localcodec, u8 channel, u32 chanmask, /* assemble frame */ *p++ = (L1OIP_VERSION<<6) /* version and coding */ - | (hc->pri?0x20:0x00) /* type */ - | (hc->id?0x10:0x00) /* id */ + | (hc->pri ? 0x20 : 0x00) /* type */ + | (hc->id ? 0x10 : 0x00) /* id */ | localcodec; if (hc->id) { *p++ = hc->id>>24; /* id */ @@ -317,7 +317,7 @@ l1oip_socket_send(struct l1oip *hc, u8 localcodec, u8 channel, u32 chanmask, *p++ = hc->id>>8; *p++ = hc->id; } - *p++ = (multi == 1)?0x80:0x00 + channel; /* m-flag, channel */ + *p++ = (multi == 1) ? 0x80 : 0x00 + channel; /* m-flag, channel */ if (multi == 1) *p++ = len; /* length */ *p++ = timebase>>8; /* time base */ @@ -401,12 +401,12 @@ l1oip_socket_recv(struct l1oip *hc, u8 remotecodec, u8 channel, u16 timebase, } /* prepare message */ - nskb = mI_alloc_skb((remotecodec == 3)?(len<<1):len, GFP_ATOMIC); + nskb = mI_alloc_skb((remotecodec == 3) ? (len<<1) : len, GFP_ATOMIC); if (!nskb) { printk(KERN_ERR "%s: No mem for skb.\n", __func__); return; } - p = skb_put(nskb, (remotecodec == 3)?(len<<1):len); + p = skb_put(nskb, (remotecodec == 3) ? (len<<1) : len); if (remotecodec == 1 && ulaw) l1oip_alaw_to_ulaw(buf, len, p); @@ -458,7 +458,7 @@ l1oip_socket_recv(struct l1oip *hc, u8 remotecodec, u8 channel, u16 timebase, hc->chan[channel].disorder_flag ^= 1; if (nskb) #endif - queue_ch_frame(&bch->ch, PH_DATA_IND, rx_counter, nskb); + queue_ch_frame(&bch->ch, PH_DATA_IND, rx_counter, nskb); } } @@ -749,8 +749,8 @@ l1oip_socket_thread(void *data) l1oip_socket_parse(hc, &sin_rx, recvbuf, recvlen); } else { if (debug & DEBUG_L1OIP_SOCKET) - printk(KERN_WARNING "%s: broken pipe on socket\n", - __func__); + printk(KERN_WARNING + "%s: broken pipe on socket\n", __func__); } } @@ -925,7 +925,7 @@ handle_dmsg(struct mISDNchannel *ch, struct sk_buff *skb) p = skb->data; l = skb->len; while (l) { - ll = (l < L1OIP_MAX_PERFRAME)?l:L1OIP_MAX_PERFRAME; + ll = (l < L1OIP_MAX_PERFRAME) ? l : L1OIP_MAX_PERFRAME; l1oip_socket_send(hc, 0, dch->slot, 0, hc->chan[dch->slot].tx_counter++, p, ll); p += ll; @@ -1173,7 +1173,7 @@ handle_bmsg(struct mISDNchannel *ch, struct sk_buff *skb) p = skb->data; l = skb->len; while (l) { - ll = (l < L1OIP_MAX_PERFRAME)?l:L1OIP_MAX_PERFRAME; + ll = (l < L1OIP_MAX_PERFRAME) ? l : L1OIP_MAX_PERFRAME; l1oip_socket_send(hc, hc->codec, bch->slot, 0, hc->chan[bch->slot].tx_counter, p, ll); hc->chan[bch->slot].tx_counter += ll; @@ -1331,8 +1331,8 @@ init_card(struct l1oip *hc, int pri, int bundle) spin_lock_init(&hc->socket_lock); hc->idx = l1oip_cnt; hc->pri = pri; - hc->d_idx = pri?16:3; - hc->b_num = pri?30:2; + hc->d_idx = pri ? 16 : 3; + hc->b_num = pri ? 30 : 2; hc->bundle = bundle; if (hc->pri) sprintf(hc->name, "l1oip-e1.%d", l1oip_cnt + 1); @@ -1517,9 +1517,9 @@ l1oip_init(void) if (debug & DEBUG_L1OIP_INIT) printk(KERN_DEBUG "%s: interface %d is %s with %s.\n", - __func__, l1oip_cnt, pri?"PRI":"BRI", - bundle?"bundled IP packet for all B-channels" - :"seperate IP packets for every B-channel"); + __func__, l1oip_cnt, pri ? "PRI" : "BRI", + bundle ? "bundled IP packet for all B-channels" : + "seperate IP packets for every B-channel"); hc = kzalloc(sizeof(struct l1oip), GFP_ATOMIC); if (!hc) { diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index 2a2c30a9438..c36f5213745 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -222,7 +222,7 @@ mISDN_sock_sendmsg(struct kiocb *iocb, struct socket *sock, } else { /* use default for L2 messages */ if ((sk->sk_protocol == ISDN_P_LAPD_TE) || (sk->sk_protocol == ISDN_P_LAPD_NT)) - mISDN_HEAD_ID(skb) = _pms(sk)->ch.nr; + mISDN_HEAD_ID(skb) = _pms(sk)->ch.nr; } if (*debug & DEBUG_SOCKET) diff --git a/drivers/isdn/mISDN/tei.c b/drivers/isdn/mISDN/tei.c index 778b660f067..bfcdd97df95 100644 --- a/drivers/isdn/mISDN/tei.c +++ b/drivers/isdn/mISDN/tei.c @@ -872,7 +872,6 @@ ph_data_ind(struct manager *mgr, struct sk_buff *skb) __func__, skb->len); goto done; } - if (*debug & DEBUG_L2_TEI) if ((skb->data[0] >> 2) != TEI_SAPI) /* not for us */ goto done; diff --git a/drivers/isdn/mISDN/timerdev.c b/drivers/isdn/mISDN/timerdev.c index bbd99d3282c..5b7e9bf514f 100644 --- a/drivers/isdn/mISDN/timerdev.c +++ b/drivers/isdn/mISDN/timerdev.c @@ -259,7 +259,7 @@ mISDN_ioctl(struct inode *inode, struct file *filep, unsigned int cmd, return ret; } -static struct file_operations mISDN_fops = { +static const struct file_operations mISDN_fops = { .read = mISDN_read, .poll = mISDN_poll, .ioctl = mISDN_ioctl, diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index 0b28fd1e393..45100b39a7c 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -292,19 +292,19 @@ struct mISDN_devrename { /* MPH_INFORMATION_REQ payload */ struct ph_info_ch { - __u32 protocol; - __u64 Flags; + __u32 protocol; + __u64 Flags; }; struct ph_info_dch { - struct ph_info_ch ch; - __u16 state; - __u16 num_bch; + struct ph_info_ch ch; + __u16 state; + __u16 num_bch; }; struct ph_info { - struct ph_info_dch dch; - struct ph_info_ch bch[]; + struct ph_info_dch dch; + struct ph_info_ch bch[]; }; /* timer device ioctl */ -- cgit v1.2.3-70-g09d2 From daebafed7fef54fcc73d2d01431122cfd578d1e0 Mon Sep 17 00:00:00 2001 From: Andreas Eversberg Date: Mon, 25 May 2009 00:56:56 -0700 Subject: mISDN: Added PCI ID for new Junghanns.net Single E1 cards. The new ID is validated by Cologne Chip. LEDs control is also supported. Signed-off-by: Andreas Eversberg Signed-off-by: Karsten Keil Signed-off-by: David S. Miller --- drivers/isdn/hardware/mISDN/hfcmulti.c | 4 ++++ include/linux/pci_ids.h | 1 + 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c index d60f5b7a41d..e1dab30aed3 100644 --- a/drivers/isdn/hardware/mISDN/hfcmulti.c +++ b/drivers/isdn/hardware/mISDN/hfcmulti.c @@ -5357,6 +5357,10 @@ static struct pci_device_id hfmultipci_ids[] __devinitdata = { PCI_SUBDEVICE_ID_CCD_SPD4S, 0, 0, H(26)}, /* PLX PCI Bridge */ { PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9030, PCI_VENDOR_ID_CCD, PCI_SUBDEVICE_ID_CCD_SPDE1, 0, 0, H(27)}, /* PLX PCI Bridge */ + + { PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFCE1, PCI_VENDOR_ID_CCD, + PCI_SUBDEVICE_ID_CCD_JHSE1, 0, 0, H(25)}, /* Junghanns E1 */ + { PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC4S, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, { PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC8S, PCI_ANY_ID, PCI_ANY_ID, diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 4bdff984ac9..12db06cf0e2 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1914,6 +1914,7 @@ #define PCI_SUBDEVICE_ID_CCD_SWYX4S 0xB540 #define PCI_SUBDEVICE_ID_CCD_JH4S20 0xB550 #define PCI_SUBDEVICE_ID_CCD_IOB8ST_1 0xB552 +#define PCI_SUBDEVICE_ID_CCD_JHSE1 0xB553 #define PCI_SUBDEVICE_ID_CCD_JH8S 0xB55B #define PCI_SUBDEVICE_ID_CCD_BN4S 0xB560 #define PCI_SUBDEVICE_ID_CCD_BN8S 0xB562 -- cgit v1.2.3-70-g09d2 From e3804cbebb67887879102925961d41b503f7fbe3 Mon Sep 17 00:00:00 2001 From: Alexander Beregalov Date: Mon, 25 May 2009 01:53:53 -0700 Subject: net: remove COMPAT_NET_DEV_OPS All drivers are already converted to new net_device_ops API and nobody uses old API anymore. Signed-off-by: Alexander Beregalov Signed-off-by: David S. Miller --- drivers/net/Kconfig | 9 --------- include/linux/netdevice.h | 38 ----------------------------------- net/802/fddi.c | 4 ---- net/802/hippi.c | 5 ----- net/8021q/vlan_dev.c | 1 - net/appletalk/dev.c | 11 ----------- net/core/dev.c | 50 ----------------------------------------------- net/ethernet/eth.c | 5 ----- 8 files changed, 123 deletions(-) (limited to 'include') diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 0fb446e047e..efa659f0fb5 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -25,15 +25,6 @@ menuconfig NETDEVICES # that for each of the symbols. if NETDEVICES -config COMPAT_NET_DEV_OPS - default y - bool "Enable older network device API compatibility" - ---help--- - This option enables kernel compatibility with older network devices - that do not use net_device_ops interface. - - If unsure, say Y. - config IFB tristate "Intermediate Functional Block support" depends on NET_CLS_ACT diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f8574e76b74..ae3c2099a04 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -901,44 +901,6 @@ struct net_device /* max exchange id for FCoE LRO by ddp */ unsigned int fcoe_ddp_xid; #endif - -#ifdef CONFIG_COMPAT_NET_DEV_OPS - struct { - int (*init)(struct net_device *dev); - void (*uninit)(struct net_device *dev); - int (*open)(struct net_device *dev); - int (*stop)(struct net_device *dev); - int (*hard_start_xmit) (struct sk_buff *skb, - struct net_device *dev); - u16 (*select_queue)(struct net_device *dev, - struct sk_buff *skb); - void (*change_rx_flags)(struct net_device *dev, - int flags); - void (*set_rx_mode)(struct net_device *dev); - void (*set_multicast_list)(struct net_device *dev); - int (*set_mac_address)(struct net_device *dev, - void *addr); - int (*validate_addr)(struct net_device *dev); - int (*do_ioctl)(struct net_device *dev, - struct ifreq *ifr, int cmd); - int (*set_config)(struct net_device *dev, - struct ifmap *map); - int (*change_mtu)(struct net_device *dev, int new_mtu); - int (*neigh_setup)(struct net_device *dev, - struct neigh_parms *); - void (*tx_timeout) (struct net_device *dev); - struct net_device_stats* (*get_stats)(struct net_device *dev); - void (*vlan_rx_register)(struct net_device *dev, - struct vlan_group *grp); - void (*vlan_rx_add_vid)(struct net_device *dev, - unsigned short vid); - void (*vlan_rx_kill_vid)(struct net_device *dev, - unsigned short vid); -#ifdef CONFIG_NET_POLL_CONTROLLER - void (*poll_controller)(struct net_device *dev); -#endif - }; -#endif }; #define to_net_dev(d) container_of(d, struct net_device, dev) diff --git a/net/802/fddi.c b/net/802/fddi.c index 539e6064e6d..3ef0ab0a543 100644 --- a/net/802/fddi.c +++ b/net/802/fddi.c @@ -185,10 +185,6 @@ static const struct header_ops fddi_header_ops = { static void fddi_setup(struct net_device *dev) { dev->header_ops = &fddi_header_ops; -#ifdef CONFIG_COMPAT_NET_DEV_OPS - dev->change_mtu = fddi_change_mtu, -#endif - dev->type = ARPHRD_FDDI; dev->hard_header_len = FDDI_K_SNAP_HLEN+3; /* Assume 802.2 SNAP hdr len + 3 pad bytes */ dev->mtu = FDDI_K_SNAP_DLEN; /* Assume max payload of 802.2 SNAP frame */ diff --git a/net/802/hippi.c b/net/802/hippi.c index 313b9ebf92e..cd3e8e92952 100644 --- a/net/802/hippi.c +++ b/net/802/hippi.c @@ -193,11 +193,6 @@ static const struct header_ops hippi_header_ops = { static void hippi_setup(struct net_device *dev) { -#ifdef CONFIG_COMPAT_NET_DEV_OPS - dev->change_mtu = hippi_change_mtu; - dev->set_mac_address = hippi_mac_addr; - dev->neigh_setup = hippi_neigh_setup_dev; -#endif dev->header_ops = &hippi_header_ops; /* diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index ff7572ac548..1e2ad4c7c59 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -644,7 +644,6 @@ static int vlan_dev_init(struct net_device *dev) dev->hard_header_len = real_dev->hard_header_len + VLAN_HLEN; dev->netdev_ops = &vlan_netdev_ops; } - netdev_resync_ops(dev); if (is_vlan_dev(real_dev)) subclass = 1; diff --git a/net/appletalk/dev.c b/net/appletalk/dev.c index 72277d70c98..6c8016f6186 100644 --- a/net/appletalk/dev.c +++ b/net/appletalk/dev.c @@ -9,21 +9,10 @@ #include #include -#ifdef CONFIG_COMPAT_NET_DEV_OPS -static int ltalk_change_mtu(struct net_device *dev, int mtu) -{ - return -EINVAL; -} -#endif - static void ltalk_setup(struct net_device *dev) { /* Fill in the fields of the device structure with localtalk-generic values. */ -#ifdef CONFIG_COMPAT_NET_DEV_OPS - dev->change_mtu = ltalk_change_mtu; -#endif - dev->type = ARPHRD_LOCALTLK; dev->hard_header_len = LTALK_HLEN; dev->mtu = LTALK_MTU; diff --git a/net/core/dev.c b/net/core/dev.c index 3942266d1f6..241613f6dd2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4580,39 +4580,6 @@ unsigned long netdev_fix_features(unsigned long features, const char *name) } EXPORT_SYMBOL(netdev_fix_features); -/* Some devices need to (re-)set their netdev_ops inside - * ->init() or similar. If that happens, we have to setup - * the compat pointers again. - */ -void netdev_resync_ops(struct net_device *dev) -{ -#ifdef CONFIG_COMPAT_NET_DEV_OPS - const struct net_device_ops *ops = dev->netdev_ops; - - dev->init = ops->ndo_init; - dev->uninit = ops->ndo_uninit; - dev->open = ops->ndo_open; - dev->change_rx_flags = ops->ndo_change_rx_flags; - dev->set_rx_mode = ops->ndo_set_rx_mode; - dev->set_multicast_list = ops->ndo_set_multicast_list; - dev->set_mac_address = ops->ndo_set_mac_address; - dev->validate_addr = ops->ndo_validate_addr; - dev->do_ioctl = ops->ndo_do_ioctl; - dev->set_config = ops->ndo_set_config; - dev->change_mtu = ops->ndo_change_mtu; - dev->neigh_setup = ops->ndo_neigh_setup; - dev->tx_timeout = ops->ndo_tx_timeout; - dev->get_stats = ops->ndo_get_stats; - dev->vlan_rx_register = ops->ndo_vlan_rx_register; - dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid; - dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid; -#ifdef CONFIG_NET_POLL_CONTROLLER - dev->poll_controller = ops->ndo_poll_controller; -#endif -#endif -} -EXPORT_SYMBOL(netdev_resync_ops); - /** * register_netdevice - register a network device * @dev: device to register @@ -4652,23 +4619,6 @@ int register_netdevice(struct net_device *dev) dev->iflink = -1; -#ifdef CONFIG_COMPAT_NET_DEV_OPS - /* Netdevice_ops API compatibility support. - * This is temporary until all network devices are converted. - */ - if (dev->netdev_ops) { - netdev_resync_ops(dev); - } else { - char drivername[64]; - pr_info("%s (%s): not using net_device_ops yet\n", - dev->name, netdev_drivername(dev, drivername, 64)); - - /* This works only because net_device_ops and the - compatibility structure are the same. */ - dev->netdev_ops = (void *) &(dev->init); - } -#endif - /* Init, if this function is available */ if (dev->netdev_ops->ndo_init) { ret = dev->netdev_ops->ndo_init(dev); diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 280352aba40..5a883affecd 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -337,11 +337,6 @@ const struct header_ops eth_header_ops ____cacheline_aligned = { void ether_setup(struct net_device *dev) { dev->header_ops = ð_header_ops; -#ifdef CONFIG_COMPAT_NET_DEV_OPS - dev->change_mtu = eth_change_mtu; - dev->set_mac_address = eth_mac_addr; - dev->validate_addr = eth_validate_addr; -#endif dev->type = ARPHRD_ETHER; dev->hard_header_len = ETH_HLEN; dev->mtu = ETH_DATA_LEN; -- cgit v1.2.3-70-g09d2 From e527ea312f31e88a7fa5472b71db71c565b0d44f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 25 May 2009 14:45:25 +0200 Subject: perf_counter: Remove unused ABI bits extra_config_len isn't used for anything, remove it. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Cc: Arnaldo Carvalho de Melo Cc: John Kacur LKML-Reference: <20090525124600.116035832@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 2ddf5e3c551..b1f2bac09f9 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -154,11 +154,11 @@ struct perf_counter_hw_event { __reserved_1 : 51; - __u32 extra_config_len; __u32 wakeup_events; /* wakeup every n events */ + __u32 __reserved_2; - __u64 __reserved_2; __u64 __reserved_3; + __u64 __reserved_4; }; /* -- cgit v1.2.3-70-g09d2 From 6ab423e0eaca827fbd201ca4ae7d4f8573a366b2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 25 May 2009 14:45:27 +0200 Subject: perf_counter: Propagate inheritance failures down the fork() path Fail fork() when we fail inheritance for some reason (-ENOMEM most likely). Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Cc: Arnaldo Carvalho de Melo Cc: John Kacur LKML-Reference: <20090525124600.324656474@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 4 ++-- kernel/fork.c | 6 +++++- kernel/perf_counter.c | 20 ++++++++++++-------- 3 files changed, 19 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index b1f2bac09f9..d3e85de9bf1 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -566,7 +566,7 @@ extern void perf_counter_task_sched_in(struct task_struct *task, int cpu); extern void perf_counter_task_sched_out(struct task_struct *task, struct task_struct *next, int cpu); extern void perf_counter_task_tick(struct task_struct *task, int cpu); -extern void perf_counter_init_task(struct task_struct *child); +extern int perf_counter_init_task(struct task_struct *child); extern void perf_counter_exit_task(struct task_struct *child); extern void perf_counter_do_pending(void); extern void perf_counter_print_debug(void); @@ -631,7 +631,7 @@ perf_counter_task_sched_out(struct task_struct *task, struct task_struct *next, int cpu) { } static inline void perf_counter_task_tick(struct task_struct *task, int cpu) { } -static inline void perf_counter_init_task(struct task_struct *child) { } +static inline int perf_counter_init_task(struct task_struct *child) { } static inline void perf_counter_exit_task(struct task_struct *child) { } static inline void perf_counter_do_pending(void) { } static inline void perf_counter_print_debug(void) { } diff --git a/kernel/fork.c b/kernel/fork.c index 675e01e9072..c07c3335cea 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1095,7 +1095,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p, clone_flags); - perf_counter_init_task(p); + + retval = perf_counter_init_task(p); + if (retval) + goto bad_fork_cleanup_policy; if ((retval = audit_alloc(p))) goto bad_fork_cleanup_policy; @@ -1295,6 +1298,7 @@ bad_fork_cleanup_semundo: bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_policy: + perf_counter_exit_task(p); #ifdef CONFIG_NUMA mpol_put(p->mempolicy); bad_fork_cleanup_cgroup: diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 217dbcce2eb..7a7a144870e 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -3434,18 +3434,23 @@ again: /* * Initialize the perf_counter context in task_struct */ -void perf_counter_init_task(struct task_struct *child) +int perf_counter_init_task(struct task_struct *child) { struct perf_counter_context *child_ctx, *parent_ctx; struct perf_counter *counter; struct task_struct *parent = current; int inherited_all = 1; + int ret = 0; child->perf_counter_ctxp = NULL; mutex_init(&child->perf_counter_mutex); INIT_LIST_HEAD(&child->perf_counter_list); + parent_ctx = parent->perf_counter_ctxp; + if (likely(!parent_ctx || !parent_ctx->nr_counters)) + return 0; + /* * This is executed from the parent task context, so inherit * counters that have been marked for cloning. @@ -3454,11 +3459,7 @@ void perf_counter_init_task(struct task_struct *child) child_ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL); if (!child_ctx) - return; - - parent_ctx = parent->perf_counter_ctxp; - if (likely(!parent_ctx || !parent_ctx->nr_counters)) - return; + return -ENOMEM; __perf_counter_init_context(child_ctx, child); child->perf_counter_ctxp = child_ctx; @@ -3482,8 +3483,9 @@ void perf_counter_init_task(struct task_struct *child) continue; } - if (inherit_group(counter, parent, - parent_ctx, child, child_ctx)) { + ret = inherit_group(counter, parent, parent_ctx, + child, child_ctx); + if (ret) { inherited_all = 0; break; } @@ -3505,6 +3507,8 @@ void perf_counter_init_task(struct task_struct *child) } mutex_unlock(&parent_ctx->mutex); + + return ret; } static void __cpuinit perf_counter_init_cpu(int cpu) -- cgit v1.2.3-70-g09d2 From 48e22d56ecdeddd1ffb42a02fccba5c6ef42b133 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 25 May 2009 17:39:04 +0200 Subject: perf_counter: x86: Remove interrupt throttle remove the x86 specific interrupt throttle Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Cc: Arnaldo Carvalho de Melo Cc: John Kacur LKML-Reference: <20090525153931.616671838@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic.c | 2 -- arch/x86/kernel/cpu/perf_counter.c | 47 ++++---------------------------------- include/linux/perf_counter.h | 2 -- 3 files changed, 5 insertions(+), 46 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index b4f64402a82..89b63b5fad3 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -763,8 +763,6 @@ static void local_apic_timer_interrupt(void) inc_irq_stat(apic_timer_irqs); evt->event_handler(evt); - - perf_counter_unthrottle(); } /* diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index c14437faf5d..8c8177f859f 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -718,11 +718,6 @@ static void intel_pmu_save_and_restart(struct perf_counter *counter) intel_pmu_enable_counter(hwc, idx); } -/* - * Maximum interrupt frequency of 100KHz per CPU - */ -#define PERFMON_MAX_INTERRUPTS (100000/HZ) - /* * This handler is triggered by the local APIC, so the APIC IRQ handling * rules apply: @@ -775,15 +770,14 @@ again: if (status) goto again; - if (++cpuc->interrupts != PERFMON_MAX_INTERRUPTS) - perf_enable(); + perf_enable(); return 1; } static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi) { - int cpu, idx, throttle = 0, handled = 0; + int cpu, idx, handled = 0; struct cpu_hw_counters *cpuc; struct perf_counter *counter; struct hw_perf_counter *hwc; @@ -792,16 +786,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi) cpu = smp_processor_id(); cpuc = &per_cpu(cpu_hw_counters, cpu); - if (++cpuc->interrupts == PERFMON_MAX_INTERRUPTS) { - throttle = 1; - __perf_disable(); - cpuc->enabled = 0; - barrier(); - } - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - int disable = 0; - if (!test_bit(idx, cpuc->active_mask)) continue; @@ -809,45 +794,23 @@ static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi) hwc = &counter->hw; if (counter->hw_event.nmi != nmi) - goto next; + continue; val = x86_perf_counter_update(counter, hwc, idx); if (val & (1ULL << (x86_pmu.counter_bits - 1))) - goto next; + continue; /* counter overflow */ x86_perf_counter_set_period(counter, hwc, idx); handled = 1; inc_irq_stat(apic_perf_irqs); - disable = perf_counter_overflow(counter, nmi, regs, 0); - -next: - if (disable || throttle) + if (perf_counter_overflow(counter, nmi, regs, 0)) amd_pmu_disable_counter(hwc, idx); } return handled; } -void perf_counter_unthrottle(void) -{ - struct cpu_hw_counters *cpuc; - - if (!x86_pmu_initialized()) - return; - - cpuc = &__get_cpu_var(cpu_hw_counters); - if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) { - /* - * Clear them before re-enabling irqs/NMIs again: - */ - cpuc->interrupts = 0; - perf_enable(); - } else { - cpuc->interrupts = 0; - } -} - void smp_perf_counter_interrupt(struct pt_regs *regs) { irq_enter(); diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index d3e85de9bf1..0c160be2078 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -570,7 +570,6 @@ extern int perf_counter_init_task(struct task_struct *child); extern void perf_counter_exit_task(struct task_struct *child); extern void perf_counter_do_pending(void); extern void perf_counter_print_debug(void); -extern void perf_counter_unthrottle(void); extern void __perf_disable(void); extern bool __perf_enable(void); extern void perf_disable(void); @@ -635,7 +634,6 @@ static inline int perf_counter_init_task(struct task_struct *child) { } static inline void perf_counter_exit_task(struct task_struct *child) { } static inline void perf_counter_do_pending(void) { } static inline void perf_counter_print_debug(void) { } -static inline void perf_counter_unthrottle(void) { } static inline void perf_disable(void) { } static inline void perf_enable(void) { } static inline int perf_counter_task_disable(void) { return -EINVAL; } -- cgit v1.2.3-70-g09d2 From a78ac3258782f3e64cb40beb5990808e1febcc0c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 25 May 2009 17:39:05 +0200 Subject: perf_counter: Generic per counter interrupt throttle Introduce a generic per counter interrupt throttle. This uses the perf_counter_overflow() quick disable to throttle a specific counter when its going too fast when a pmu->unthrottle() method is provided which can undo the quick disable. Power needs to implement both the quick disable and the unthrottle method. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Cc: Arnaldo Carvalho de Melo Cc: John Kacur LKML-Reference: <20090525153931.703093461@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 13 +++++++++ include/linux/perf_counter.h | 11 +++++++ kernel/perf_counter.c | 59 +++++++++++++++++++++++++++++++++++--- kernel/sysctl.c | 8 ++++++ 4 files changed, 87 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 8c8177f859f..c4b543d1a86 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -623,6 +623,18 @@ try_generic: return 0; } +static void x86_pmu_unthrottle(struct perf_counter *counter) +{ + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + struct hw_perf_counter *hwc = &counter->hw; + + if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX || + cpuc->counters[hwc->idx] != counter)) + return; + + x86_pmu.enable(hwc, hwc->idx); +} + void perf_counter_print_debug(void) { u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; @@ -1038,6 +1050,7 @@ static const struct pmu pmu = { .enable = x86_pmu_enable, .disable = x86_pmu_disable, .read = x86_pmu_read, + .unthrottle = x86_pmu_unthrottle, }; const struct pmu *hw_perf_counter_init(struct perf_counter *counter) diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 0c160be2078..e3a7585d3e4 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -266,6 +266,15 @@ enum perf_event_type { */ PERF_EVENT_PERIOD = 4, + /* + * struct { + * struct perf_event_header header; + * u64 time; + * }; + */ + PERF_EVENT_THROTTLE = 5, + PERF_EVENT_UNTHROTTLE = 6, + /* * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field * will be PERF_RECORD_* @@ -367,6 +376,7 @@ struct pmu { int (*enable) (struct perf_counter *counter); void (*disable) (struct perf_counter *counter); void (*read) (struct perf_counter *counter); + void (*unthrottle) (struct perf_counter *counter); }; /** @@ -613,6 +623,7 @@ extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); extern int sysctl_perf_counter_priv; extern int sysctl_perf_counter_mlock; +extern int sysctl_perf_counter_limit; extern void perf_counter_init(void); diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 14b1fe98483..ec9c4007a7f 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -46,6 +46,7 @@ static atomic_t nr_comm_tracking __read_mostly; int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */ int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */ +int sysctl_perf_counter_limit __read_mostly = 100000; /* max NMIs per second */ /* * Lock for (sysadmin-configurable) counter reservations: @@ -1066,12 +1067,15 @@ static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu) __perf_counter_sched_in(ctx, cpuctx, cpu); } +#define MAX_INTERRUPTS (~0ULL) + +static void perf_log_throttle(struct perf_counter *counter, int enable); static void perf_log_period(struct perf_counter *counter, u64 period); static void perf_adjust_freq(struct perf_counter_context *ctx) { struct perf_counter *counter; - u64 irq_period; + u64 interrupts, irq_period; u64 events, period; s64 delta; @@ -1080,10 +1084,19 @@ static void perf_adjust_freq(struct perf_counter_context *ctx) if (counter->state != PERF_COUNTER_STATE_ACTIVE) continue; + interrupts = counter->hw.interrupts; + counter->hw.interrupts = 0; + + if (interrupts == MAX_INTERRUPTS) { + perf_log_throttle(counter, 1); + counter->pmu->unthrottle(counter); + interrupts = 2*sysctl_perf_counter_limit/HZ; + } + if (!counter->hw_event.freq || !counter->hw_event.irq_freq) continue; - events = HZ * counter->hw.interrupts * counter->hw.irq_period; + events = HZ * interrupts * counter->hw.irq_period; period = div64_u64(events, counter->hw_event.irq_freq); delta = (s64)(1 + period - counter->hw.irq_period); @@ -1097,7 +1110,6 @@ static void perf_adjust_freq(struct perf_counter_context *ctx) perf_log_period(counter, irq_period); counter->hw.irq_period = irq_period; - counter->hw.interrupts = 0; } spin_unlock(&ctx->lock); } @@ -2543,6 +2555,35 @@ static void perf_log_period(struct perf_counter *counter, u64 period) perf_output_end(&handle); } +/* + * IRQ throttle logging + */ + +static void perf_log_throttle(struct perf_counter *counter, int enable) +{ + struct perf_output_handle handle; + int ret; + + struct { + struct perf_event_header header; + u64 time; + } throttle_event = { + .header = { + .type = PERF_EVENT_THROTTLE + 1, + .misc = 0, + .size = sizeof(throttle_event), + }, + .time = sched_clock(), + }; + + ret = perf_output_begin(&handle, counter, sizeof(throttle_event), 0, 0); + if (ret) + return; + + perf_output_put(&handle, throttle_event); + perf_output_end(&handle); +} + /* * Generic counter overflow handling. */ @@ -2551,9 +2592,19 @@ int perf_counter_overflow(struct perf_counter *counter, int nmi, struct pt_regs *regs, u64 addr) { int events = atomic_read(&counter->event_limit); + int throttle = counter->pmu->unthrottle != NULL; int ret = 0; - counter->hw.interrupts++; + if (!throttle) { + counter->hw.interrupts++; + } else if (counter->hw.interrupts != MAX_INTERRUPTS) { + counter->hw.interrupts++; + if (HZ*counter->hw.interrupts > (u64)sysctl_perf_counter_limit) { + counter->hw.interrupts = MAX_INTERRUPTS; + perf_log_throttle(counter, 0); + ret = 1; + } + } /* * XXX event_limit might not quite work as expected on inherited diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3cb1849f598..0c4bf863afa 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -930,6 +930,14 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "perf_counter_int_limit", + .data = &sysctl_perf_counter_limit, + .maxlen = sizeof(sysctl_perf_counter_limit), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, #endif /* * NOTE: do not add new entries to this table unless you have read -- cgit v1.2.3-70-g09d2 From 0127c3ea082ee9f1034789b978dfc7fd83254617 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 25 May 2009 22:03:26 +0200 Subject: perf_counter: fix warning & lockup - remove bogus warning - fix wakeup from NMI path lockup - also fix up whitespace noise in perf_counter.h Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Cc: Arnaldo Carvalho de Melo Cc: John Kacur LKML-Reference: <20090525153931.703093461@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 78 ++++++++++++++++++++++---------------------- kernel/perf_counter.c | 4 +-- 2 files changed, 40 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index e3a7585d3e4..2b16ed37b74 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -73,7 +73,7 @@ enum sw_event_ids { PERF_SW_EVENTS_MAX = 7, }; -#define __PERF_COUNTER_MASK(name) \ +#define __PERF_COUNTER_MASK(name) \ (((1ULL << PERF_COUNTER_##name##_BITS) - 1) << \ PERF_COUNTER_##name##_SHIFT) @@ -98,14 +98,14 @@ enum sw_event_ids { * in the overflow packets. */ enum perf_counter_record_format { - PERF_RECORD_IP = 1U << 0, - PERF_RECORD_TID = 1U << 1, - PERF_RECORD_TIME = 1U << 2, - PERF_RECORD_ADDR = 1U << 3, - PERF_RECORD_GROUP = 1U << 4, - PERF_RECORD_CALLCHAIN = 1U << 5, - PERF_RECORD_CONFIG = 1U << 6, - PERF_RECORD_CPU = 1U << 7, + PERF_RECORD_IP = 1U << 0, + PERF_RECORD_TID = 1U << 1, + PERF_RECORD_TIME = 1U << 2, + PERF_RECORD_ADDR = 1U << 3, + PERF_RECORD_GROUP = 1U << 4, + PERF_RECORD_CALLCHAIN = 1U << 5, + PERF_RECORD_CONFIG = 1U << 6, + PERF_RECORD_CPU = 1U << 7, }; /* @@ -235,13 +235,13 @@ enum perf_event_type { * correlate userspace IPs to code. They have the following structure: * * struct { - * struct perf_event_header header; + * struct perf_event_header header; * - * u32 pid, tid; - * u64 addr; - * u64 len; - * u64 pgoff; - * char filename[]; + * u32 pid, tid; + * u64 addr; + * u64 len; + * u64 pgoff; + * char filename[]; * }; */ PERF_EVENT_MMAP = 1, @@ -249,27 +249,27 @@ enum perf_event_type { /* * struct { - * struct perf_event_header header; + * struct perf_event_header header; * - * u32 pid, tid; - * char comm[]; + * u32 pid, tid; + * char comm[]; * }; */ PERF_EVENT_COMM = 3, /* * struct { - * struct perf_event_header header; - * u64 time; - * u64 irq_period; + * struct perf_event_header header; + * u64 time; + * u64 irq_period; * }; */ PERF_EVENT_PERIOD = 4, /* * struct { - * struct perf_event_header header; - * u64 time; + * struct perf_event_header header; + * u64 time; * }; */ PERF_EVENT_THROTTLE = 5, @@ -280,23 +280,23 @@ enum perf_event_type { * will be PERF_RECORD_* * * struct { - * struct perf_event_header header; + * struct perf_event_header header; * - * { u64 ip; } && PERF_RECORD_IP - * { u32 pid, tid; } && PERF_RECORD_TID - * { u64 time; } && PERF_RECORD_TIME - * { u64 addr; } && PERF_RECORD_ADDR - * { u64 config; } && PERF_RECORD_CONFIG - * { u32 cpu, res; } && PERF_RECORD_CPU + * { u64 ip; } && PERF_RECORD_IP + * { u32 pid, tid; } && PERF_RECORD_TID + * { u64 time; } && PERF_RECORD_TIME + * { u64 addr; } && PERF_RECORD_ADDR + * { u64 config; } && PERF_RECORD_CONFIG + * { u32 cpu, res; } && PERF_RECORD_CPU * - * { u64 nr; - * { u64 event, val; } cnt[nr]; } && PERF_RECORD_GROUP + * { u64 nr; + * { u64 event, val; } cnt[nr]; } && PERF_RECORD_GROUP * - * { u16 nr, - * hv, - * kernel, - * user; - * u64 ips[nr]; } && PERF_RECORD_CALLCHAIN + * { u16 nr, + * hv, + * kernel, + * user; + * u64 ips[nr]; } && PERF_RECORD_CALLCHAIN * }; */ }; @@ -406,7 +406,7 @@ struct perf_mmap_data { atomic_t wakeup; /* needs a wakeup */ struct perf_counter_mmap_page *user_page; - void *data_pages[0]; + void *data_pages[0]; }; struct perf_pending_entry { @@ -422,7 +422,7 @@ struct perf_counter { struct list_head list_entry; struct list_head event_entry; struct list_head sibling_list; - int nr_siblings; + int nr_siblings; struct perf_counter *group_leader; const struct pmu *pmu; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index ec9c4007a7f..070f92d3232 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -2576,7 +2576,7 @@ static void perf_log_throttle(struct perf_counter *counter, int enable) .time = sched_clock(), }; - ret = perf_output_begin(&handle, counter, sizeof(throttle_event), 0, 0); + ret = perf_output_begin(&handle, counter, sizeof(throttle_event), 1, 0); if (ret) return; @@ -3449,8 +3449,6 @@ void perf_counter_exit_task(struct task_struct *child) struct perf_counter_context *child_ctx; unsigned long flags; - WARN_ON_ONCE(child != current); - child_ctx = child->perf_counter_ctxp; if (likely(!child_ctx)) -- cgit v1.2.3-70-g09d2 From b0aae68cc5508f3c2fbf728988c954db4c8b8a53 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 21 May 2009 13:59:18 +0800 Subject: tracing/events: change the type of __str_loc_item to unsigned short When defining a dynamic size string, we add __str_loc_##item to the trace entry, and it stores the location of the actual string in entry->_str_data[] 'unsigned short' should be sufficient to store this information, thus we save 2 bytes per dyn-size string in the ring buffer. [ Impact: reduce memory occupied by dyn-size strings in ring buffer ] Signed-off-by: Li Zefan Cc: Steven Rostedt LKML-Reference: <4A14EDB6.2050507@cn.fujitsu.com> Signed-off-by: Frederic Weisbecker --- include/trace/ftrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index edb02bc9f8f..b5ff2e8229e 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -25,7 +25,7 @@ #define __field(type, item) type item; #undef __string -#define __string(item, src) int __str_loc_##item; +#define __string(item, src) unsigned short __str_loc_##item; #undef TP_STRUCT__entry #define TP_STRUCT__entry(args...) args -- cgit v1.2.3-70-g09d2 From 08baf561083bc27a953aa087dd8a664bb2b88e8e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 25 May 2009 22:58:01 -0700 Subject: net: txq_trans_update() helper We would like to get rid of netdev->trans_start = jiffies; that about all net drivers have to use in their start_xmit() function, and use txq->trans_start instead. This can be done generically in core network, as suggested by David. Some devices, (particularly loopback) dont need trans_start update, because they dont have transmit watchdog. We could add a new device flag, or rely on fact that txq->tran_start can be updated is txq->xmit_lock_owner is different than -1. Use a helper function to hide our choice. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ++++++ net/core/dev.c | 3 +++ net/core/netpoll.c | 5 ++++- net/core/pktgen.c | 1 + net/sched/sch_teql.c | 1 + 5 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ae3c2099a04..586b71f0358 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1674,6 +1674,12 @@ static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) spin_unlock_bh(&txq->_xmit_lock); } +static inline void txq_trans_update(struct netdev_queue *txq) +{ + if (txq->xmit_lock_owner != -1) + txq->trans_start = jiffies; +} + /** * netif_tx_lock - grab network device transmit lock * @dev: network device diff --git a/net/core/dev.c b/net/core/dev.c index 241613f6dd2..5eb3e48ab31 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1698,6 +1698,8 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, skb->dst = NULL; } rc = ops->ndo_start_xmit(skb, dev); + if (rc == 0) + txq_trans_update(txq); /* * TODO: if skb_orphan() was called by * dev->hard_start_xmit() (for example, the unmodified @@ -1727,6 +1729,7 @@ gso: skb->next = nskb; return rc; } + txq_trans_update(txq); if (unlikely(netif_tx_queue_stopped(txq) && skb->next)) return NETDEV_TX_BUSY; } while (skb->next); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 67b4f3e3d4a..7ab31a7576a 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -302,8 +302,11 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; tries > 0; --tries) { if (__netif_tx_trylock(txq)) { - if (!netif_tx_queue_stopped(txq)) + if (!netif_tx_queue_stopped(txq)) { status = ops->ndo_start_xmit(skb, dev); + if (status == NETDEV_TX_OK) + txq_trans_update(txq); + } __netif_tx_unlock(txq); if (status == NETDEV_TX_OK) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 0666a827bc6..b8ccd3c88d6 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3438,6 +3438,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) retry_now: ret = (*xmit)(pkt_dev->skb, odev); if (likely(ret == NETDEV_TX_OK)) { + txq_trans_update(txq); pkt_dev->last_ok = 1; pkt_dev->sofar++; pkt_dev->seq_num++; diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 428a5ef5b94..a886496bdc3 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -308,6 +308,7 @@ restart: if (!netif_tx_queue_stopped(slave_txq) && !netif_tx_queue_frozen(slave_txq) && slave_ops->ndo_start_xmit(skb, slave) == 0) { + txq_trans_update(slave_txq); __netif_tx_unlock(slave_txq); master->slaves = NEXT_SLAVE(q); netif_wake_queue(dev); -- cgit v1.2.3-70-g09d2 From b90cf6681f4f6263920616e7ca2fd09130e4143a Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 5 Apr 2009 08:23:44 -0700 Subject: [MTD] Remove option for add_mtd_partitions() to not register partitions. This breaks the dilnetpc map driver, but it could be fixed not to use that option. We want to simplify the partition handling, and this is a step towards that. Remove superfluous 'index' field from private struct mtd_part too, while we're at it. Signed-off-by: David Woodhouse --- drivers/mtd/mtdpart.c | 18 ++++-------------- include/linux/mtd/partitions.h | 1 - 2 files changed, 4 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c index 63d1cd2c17b..349fcbe5cc0 100644 --- a/drivers/mtd/mtdpart.c +++ b/drivers/mtd/mtdpart.c @@ -27,9 +27,7 @@ struct mtd_part { struct mtd_info mtd; struct mtd_info *master; uint64_t offset; - int index; struct list_head list; - int registered; }; /* @@ -321,8 +319,7 @@ int del_mtd_partitions(struct mtd_info *master) list_for_each_entry_safe(slave, next, &mtd_partitions, list) if (slave->master == master) { list_del(&slave->list); - if (slave->registered) - del_mtd_device(&slave->mtd); + del_mtd_device(&slave->mtd); kfree(slave); } @@ -412,7 +409,6 @@ static struct mtd_part *add_one_partition(struct mtd_info *master, slave->mtd.erase = part_erase; slave->master = master; slave->offset = part->offset; - slave->index = partno; if (slave->offset == MTDPART_OFS_APPEND) slave->offset = cur_offset; @@ -500,15 +496,9 @@ static struct mtd_part *add_one_partition(struct mtd_info *master, } out_register: - if (part->mtdp) { - /* store the object pointer (caller may or may not register it*/ - *part->mtdp = &slave->mtd; - slave->registered = 0; - } else { - /* register our partition */ - add_mtd_device(&slave->mtd); - slave->registered = 1; - } + /* register our partition */ + add_mtd_device(&slave->mtd); + return slave; } diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h index 7535a74083b..af6dcb992bc 100644 --- a/include/linux/mtd/partitions.h +++ b/include/linux/mtd/partitions.h @@ -40,7 +40,6 @@ struct mtd_partition { uint64_t offset; /* offset within the master MTD space */ uint32_t mask_flags; /* master MTD flags to mask out for this partition */ struct nand_ecclayout *ecclayout; /* out of band layout for this partition (NAND only)*/ - struct mtd_info **mtdp; /* pointer to store the MTD object */ }; #define MTDPART_OFS_NXTBLK (-2) -- cgit v1.2.3-70-g09d2 From be74b73a57645cc253d881ab0c1014eb64b9cf22 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 26 May 2009 20:25:22 +0200 Subject: tracing: add __print_flags for events Developers have been asking for the ability in the ftrace event tracer to display names of bits in a flags variable. Instead of printing out c2, it would be easier to read FOO|BAR|GOO, assuming that FOO is bit 1, BAR is bit 6 and GOO is bit 7. Some examples where this would be useful are the state flags in a context switch, kmalloc flags, and even permision flags in accessing files. [ v2 changes include: Frederic Weisbecker's idea of using a mask instead of bits, thus we can output GFP_KERNEL instead of GPF_WAIT|GFP_IO|GFP_FS. Li Zefan's idea of allowing the caller of __print_flags to add their own delimiter (or no delimiter) where we can get for file permissions rwx instead of r|w|x. ] [ v3 changes: Christoph Hellwig's idea of using an array instead of va_args. ] [ Impact: better displaying of flags in trace output ] Signed-off-by: Steven Rostedt Signed-off-by: Frederic Weisbecker --- include/linux/ftrace_event.h | 13 ++++++++++++- include/trace/ftrace.h | 14 ++++++++++++++ kernel/trace/trace_output.c | 39 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index bae51ddfabd..4b58cf1a11c 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -3,12 +3,23 @@ #include #include - +#include struct trace_array; struct tracer; struct dentry; +DECLARE_PER_CPU(struct trace_seq, ftrace_event_seq); + +struct trace_print_flags { + unsigned long mask; + const char *name; +}; + +const char *ftrace_print_flags_seq(struct trace_seq *p, const char *delim, + unsigned long flags, + const struct trace_print_flags *flag_array); + /* * The trace entry - the most basic unit of tracing. This is what * is printed in the end as a single line in the trace output, such as: diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index b5ff2e8229e..22c94719c56 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -87,6 +87,7 @@ * struct trace_seq *s = &iter->seq; * struct ftrace_raw_ *field; <-- defined in stage 1 * struct trace_entry *entry; + * struct trace_seq *p; * int ret; * * entry = iter->ent; @@ -98,7 +99,9 @@ * * field = (typeof(field))entry; * + * p = get_cpu_var(ftrace_event_seq); * ret = trace_seq_printf(s, "\n"); + * put_cpu(); * if (!ret) * return TRACE_TYPE_PARTIAL_LINE; * @@ -119,6 +122,14 @@ #undef __get_str #define __get_str(field) ((char *)__entry + __entry->__str_loc_##field) +#undef __print_flags +#define __print_flags(flag, delim, flag_array...) \ + ({ \ + static const struct trace_print_flags flags[] = \ + { flag_array, { -1, NULL }}; \ + ftrace_print_flags_seq(p, delim, flag, flags); \ + }) + #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ enum print_line_t \ @@ -127,6 +138,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ struct trace_seq *s = &iter->seq; \ struct ftrace_raw_##call *field; \ struct trace_entry *entry; \ + struct trace_seq *p; \ int ret; \ \ entry = iter->ent; \ @@ -138,7 +150,9 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ \ field = (typeof(field))entry; \ \ + p = &get_cpu_var(ftrace_event_seq); \ ret = trace_seq_printf(s, #call ": " print); \ + put_cpu(); \ if (!ret) \ return TRACE_TYPE_PARTIAL_LINE; \ \ diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 7136420603a..a4840c260c8 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -15,6 +15,9 @@ #define EVENT_HASHSIZE 128 static DECLARE_RWSEM(trace_event_mutex); + +DEFINE_PER_CPU(struct trace_seq, ftrace_event_seq); + static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly; static int next_event_type = __TRACE_LAST_TYPE + 1; @@ -212,6 +215,42 @@ int trace_seq_path(struct trace_seq *s, struct path *path) return 0; } +const char * +ftrace_print_flags_seq(struct trace_seq *p, const char *delim, + unsigned long flags, + const struct trace_print_flags *flag_array) +{ + unsigned long mask; + const char *str; + int i; + + trace_seq_init(p); + + for (i = 0; flag_array[i].name && flags; i++) { + + mask = flag_array[i].mask; + if ((flags & mask) != mask) + continue; + + str = flag_array[i].name; + flags &= ~mask; + if (p->len && delim) + trace_seq_puts(p, delim); + trace_seq_puts(p, str); + } + + /* check for left over flags */ + if (flags) { + if (p->len && delim) + trace_seq_puts(p, delim); + trace_seq_printf(p, "0x%lx", flags); + } + + trace_seq_putc(p, 0); + + return p->buffer; +} + #ifdef CONFIG_KRETPROBES static inline const char *kretprobed(const char *name) { -- cgit v1.2.3-70-g09d2 From 937cdb9db7f59278d0cb1582e6e64e3dfd73b4fc Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 15 May 2009 10:51:13 -0400 Subject: tracing: add previous task state info to sched switch event It is useful to see the state of a task that is being switched out. This patch adds the output of the state of the previous task in the context switch event. [ Impact: see state of switched out task in context switch ] Signed-off-by: Steven Rostedt Signed-off-by: Frederic Weisbecker --- include/trace/events/sched.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index dd4033cf5b0..24ab5bcff7b 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -156,6 +156,7 @@ TRACE_EVENT(sched_switch, __array( char, prev_comm, TASK_COMM_LEN ) __field( pid_t, prev_pid ) __field( int, prev_prio ) + __field( long, prev_state ) __array( char, next_comm, TASK_COMM_LEN ) __field( pid_t, next_pid ) __field( int, next_prio ) @@ -165,13 +166,19 @@ TRACE_EVENT(sched_switch, memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); __entry->prev_pid = prev->pid; __entry->prev_prio = prev->prio; + __entry->prev_state = prev->state; memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); __entry->next_pid = next->pid; __entry->next_prio = next->prio; ), - TP_printk("task %s:%d [%d] ==> %s:%d [%d]", + TP_printk("task %s:%d [%d] (%s) ==> %s:%d [%d]", __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, + __entry->prev_state ? + __print_flags(__entry->prev_state, "|", + { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, + { 16, "Z" }, { 32, "X" }, { 64, "x" }, + { 128, "W" }) : "R", __entry->next_comm, __entry->next_pid, __entry->next_prio) ); -- cgit v1.2.3-70-g09d2 From 62ba180e80f4194a498585ac0e4c07daa8ca08d1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 15 May 2009 16:16:30 -0400 Subject: tracing: add flag output for kmem events This patch changes the output for gfp_flags from being a simple hex value to the actual names. gfp_flags=GFP_ATOMIC instead of gfp_flags=00000020 And even gfp_flags=GFP_KERNEL instead of gfp_flags=000000d0 (Thanks to Frederic Weisbecker for pointing out that the first version had a bad order of GFP masks) [ Impact: more human readable output from tracer ] Acked-by: Eduard - Gabriel Munteanu Signed-off-by: Steven Rostedt Signed-off-by: Frederic Weisbecker --- include/trace/events/kmem.h | 53 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 45 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index c22c42f980b..9baba50d651 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -7,6 +7,43 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kmem +/* + * The order of these masks is important. Matching masks will be seen + * first and the left over flags will end up showing by themselves. + * + * For example, if we have GFP_KERNEL before GFP_USER we wil get: + * + * GFP_KERNEL|GFP_HARDWALL + * + * Thus most bits set go first. + */ +#define show_gfp_flags(flags) \ + (flags) ? __print_flags(flags, "|", \ + {(unsigned long)GFP_HIGHUSER_MOVABLE, "GFP_HIGHUSER_MOVABLE"}, \ + {(unsigned long)GFP_HIGHUSER, "GFP_HIGHUSER"}, \ + {(unsigned long)GFP_USER, "GFP_USER"}, \ + {(unsigned long)GFP_TEMPORARY, "GFP_TEMPORARY"}, \ + {(unsigned long)GFP_KERNEL, "GFP_KERNEL"}, \ + {(unsigned long)GFP_NOFS, "GFP_NOFS"}, \ + {(unsigned long)GFP_ATOMIC, "GFP_ATOMIC"}, \ + {(unsigned long)GFP_NOIO, "GFP_NOIO"}, \ + {(unsigned long)__GFP_HIGH, "GFP_HIGH"}, \ + {(unsigned long)__GFP_WAIT, "GFP_WAIT"}, \ + {(unsigned long)__GFP_IO, "GFP_IO"}, \ + {(unsigned long)__GFP_COLD, "GFP_COLD"}, \ + {(unsigned long)__GFP_NOWARN, "GFP_NOWARN"}, \ + {(unsigned long)__GFP_REPEAT, "GFP_REPEAT"}, \ + {(unsigned long)__GFP_NOFAIL, "GFP_NOFAIL"}, \ + {(unsigned long)__GFP_NORETRY, "GFP_NORETRY"}, \ + {(unsigned long)__GFP_COMP, "GFP_COMP"}, \ + {(unsigned long)__GFP_ZERO, "GFP_ZERO"}, \ + {(unsigned long)__GFP_NOMEMALLOC, "GFP_NOMEMALLOC"}, \ + {(unsigned long)__GFP_HARDWALL, "GFP_HARDWALL"}, \ + {(unsigned long)__GFP_THISNODE, "GFP_THISNODE"}, \ + {(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \ + {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"} \ + ) : "GFP_NOWAIT" + TRACE_EVENT(kmalloc, TP_PROTO(unsigned long call_site, @@ -33,12 +70,12 @@ TRACE_EVENT(kmalloc, __entry->gfp_flags = gfp_flags; ), - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s", __entry->call_site, __entry->ptr, __entry->bytes_req, __entry->bytes_alloc, - __entry->gfp_flags) + show_gfp_flags(__entry->gfp_flags)) ); TRACE_EVENT(kmem_cache_alloc, @@ -67,12 +104,12 @@ TRACE_EVENT(kmem_cache_alloc, __entry->gfp_flags = gfp_flags; ), - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s", __entry->call_site, __entry->ptr, __entry->bytes_req, __entry->bytes_alloc, - __entry->gfp_flags) + show_gfp_flags(__entry->gfp_flags)) ); TRACE_EVENT(kmalloc_node, @@ -104,12 +141,12 @@ TRACE_EVENT(kmalloc_node, __entry->node = node; ), - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d", __entry->call_site, __entry->ptr, __entry->bytes_req, __entry->bytes_alloc, - __entry->gfp_flags, + show_gfp_flags(__entry->gfp_flags), __entry->node) ); @@ -142,12 +179,12 @@ TRACE_EVENT(kmem_cache_alloc_node, __entry->node = node; ), - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d", __entry->call_site, __entry->ptr, __entry->bytes_req, __entry->bytes_alloc, - __entry->gfp_flags, + show_gfp_flags(__entry->gfp_flags), __entry->node) ); -- cgit v1.2.3-70-g09d2 From 0f4fc29dd68dfab9c6ddd5d087d34a5b6818cb00 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 20 May 2009 19:21:47 -0400 Subject: tracing: add __print_symbolic to trace events This patch adds __print_symbolic which is similar to __print_flags but works for an enumeration type instead. That is, there is only a one to one mapping between the values and the symbols. When a match is made, then it is printed, otherwise the hex value is outputed. [ Impact: add interface for showing symbol names in events ] Signed-off-by: Steven Rostedt Signed-off-by: Frederic Weisbecker --- include/linux/ftrace_event.h | 3 +++ include/trace/ftrace.h | 8 ++++++++ kernel/trace/trace_output.c | 25 +++++++++++++++++++++++++ 3 files changed, 36 insertions(+) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 4b58cf1a11c..bbf40f624fc 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -20,6 +20,9 @@ const char *ftrace_print_flags_seq(struct trace_seq *p, const char *delim, unsigned long flags, const struct trace_print_flags *flag_array); +const char *ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, + const struct trace_print_flags *symbol_array); + /* * The trace entry - the most basic unit of tracing. This is what * is printed in the end as a single line in the trace output, such as: diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 22c94719c56..87fc227c6fb 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -130,6 +130,14 @@ ftrace_print_flags_seq(p, delim, flag, flags); \ }) +#undef __print_symbolic +#define __print_symbolic(value, symbol_array...) \ + ({ \ + static const struct trace_print_flags symbols[] = \ + { symbol_array, { -1, NULL }}; \ + ftrace_print_symbols_seq(p, value, symbols); \ + }) + #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ enum print_line_t \ diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index a4840c260c8..c12d95db2f5 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -251,6 +251,31 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim, return p->buffer; } +const char * +ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, + const struct trace_print_flags *symbol_array) +{ + int i; + + trace_seq_init(p); + + for (i = 0; symbol_array[i].name; i++) { + + if (val != symbol_array[i].mask) + continue; + + trace_seq_puts(p, symbol_array[i].name); + break; + } + + if (!p->len) + trace_seq_printf(p, "0x%lx", val); + + trace_seq_putc(p, 0); + + return p->buffer; +} + #ifdef CONFIG_KRETPROBES static inline const char *kretprobed(const char *name) { -- cgit v1.2.3-70-g09d2 From c2adae0970ca1db8adb92fb56ae3bcabd916e8bd Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 20 May 2009 19:56:19 -0400 Subject: tracing: convert irq events to use __print_symbolic The recording of the names at trace time is inefficient. This patch implements the softirq event recording to only record the vector and then use the __print_symbolic interface to print out the names. [ Impact: faster recording of softirq events ] Signed-off-by: Steven Rostedt Signed-off-by: Frederic Weisbecker --- include/trace/events/irq.h | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index 32a9f7ef432..683fb36a994 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -7,6 +7,19 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM irq +#define softirq_name(sirq) { sirq, #sirq } +#define show_softirq_name(val) \ + __print_symbolic(val, \ + softirq_name(HI_SOFTIRQ), \ + softirq_name(TIMER_SOFTIRQ), \ + softirq_name(NET_TX_SOFTIRQ), \ + softirq_name(NET_RX_SOFTIRQ), \ + softirq_name(BLOCK_SOFTIRQ), \ + softirq_name(TASKLET_SOFTIRQ), \ + softirq_name(SCHED_SOFTIRQ), \ + softirq_name(HRTIMER_SOFTIRQ), \ + softirq_name(RCU_SOFTIRQ)) + /** * irq_handler_entry - called immediately before the irq action handler * @irq: irq number @@ -87,15 +100,14 @@ TRACE_EVENT(softirq_entry, TP_STRUCT__entry( __field( int, vec ) - __string( name, softirq_to_name[h-vec] ) ), TP_fast_assign( __entry->vec = (int)(h - vec); - __assign_str(name, softirq_to_name[h-vec]); ), - TP_printk("softirq=%d action=%s", __entry->vec, __get_str(name)) + TP_printk("softirq=%d action=%s", __entry->vec, + show_softirq_name(__entry->vec)) ); /** @@ -117,15 +129,14 @@ TRACE_EVENT(softirq_exit, TP_STRUCT__entry( __field( int, vec ) - __string( name, softirq_to_name[h-vec] ) ), TP_fast_assign( __entry->vec = (int)(h - vec); - __assign_str(name, softirq_to_name[h-vec]); ), - TP_printk("softirq=%d action=%s", __entry->vec, __get_str(name)) + TP_printk("softirq=%d action=%s", __entry->vec, + show_softirq_name(__entry->vec)) ); #endif /* _TRACE_IRQ_H */ -- cgit v1.2.3-70-g09d2 From 8e401eccd3a62fb57f117bb09b7c1fc70ab19e8c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 26 May 2009 21:16:25 -0700 Subject: phy: Eliminate references to BUS_ID_SIZE. Just use the constant 20 to keep things working. If someone is so motivated, this can be converted over to dynamic strings. I tried and it's a lot of work. But for now this is good enough. Signed-off-by: David S. Miller --- include/linux/phy.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index c216e4e503b..b1368b8f657 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -79,7 +79,7 @@ typedef enum { * Need to be a little smaller than phydev->dev.bus_id to leave room * for the ":%02x" */ -#define MII_BUS_ID_SIZE (BUS_ID_SIZE - 3) +#define MII_BUS_ID_SIZE (20 - 3) /* * The Bus class for PHYs. Devices which provide access to @@ -407,7 +407,7 @@ struct phy_driver { /* A Structure for boards to register fixups with the PHY Lib */ struct phy_fixup { struct list_head list; - char bus_id[BUS_ID_SIZE]; + char bus_id[20]; u32 phy_uid; u32 phy_uid_mask; int (*run)(struct phy_device *phydev); -- cgit v1.2.3-70-g09d2 From b74be6119e9e38390395f08767b7c84de9023b38 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Wed, 22 Apr 2009 10:20:23 +0800 Subject: ACPICA: Update error/warning interfaces Moved the module name and line number to the end of the message. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown --- drivers/acpi/acpica/utmisc.c | 20 +++++++++----------- include/acpi/acpixf.h | 5 ++++- 2 files changed, 13 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/drivers/acpi/acpica/utmisc.c b/drivers/acpi/acpica/utmisc.c index 1c9e250caef..fbe782348b0 100644 --- a/drivers/acpi/acpica/utmisc.c +++ b/drivers/acpi/acpica/utmisc.c @@ -1033,11 +1033,12 @@ acpi_error(const char *module_name, u32 line_number, const char *format, ...) { va_list args; - acpi_os_printf("ACPI Error (%s-%04d): ", module_name, line_number); + acpi_os_printf("ACPI Error: "); va_start(args, format); acpi_os_vprintf(format, args); - acpi_os_printf(" [%X]\n", ACPI_CA_VERSION); + acpi_os_printf(" %8.8X %s-%u\n", ACPI_CA_VERSION, module_name, + line_number); va_end(args); } @@ -1047,12 +1048,12 @@ acpi_exception(const char *module_name, { va_list args; - acpi_os_printf("ACPI Exception (%s-%04d): %s, ", module_name, - line_number, acpi_format_exception(status)); + acpi_os_printf("ACPI Exception: %s, ", acpi_format_exception(status)); va_start(args, format); acpi_os_vprintf(format, args); - acpi_os_printf(" [%X]\n", ACPI_CA_VERSION); + acpi_os_printf(" %8.8X %s-%u\n", ACPI_CA_VERSION, module_name, + line_number); va_end(args); } @@ -1061,11 +1062,12 @@ acpi_warning(const char *module_name, u32 line_number, const char *format, ...) { va_list args; - acpi_os_printf("ACPI Warning (%s-%04d): ", module_name, line_number); + acpi_os_printf("ACPI Warning: "); va_start(args, format); acpi_os_vprintf(format, args); - acpi_os_printf(" [%X]\n", ACPI_CA_VERSION); + acpi_os_printf(" %8.8X %s-%u\n", ACPI_CA_VERSION, module_name, + line_number); va_end(args); } @@ -1074,10 +1076,6 @@ acpi_info(const char *module_name, u32 line_number, const char *format, ...) { va_list args; - /* - * Removed module_name, line_number, and acpica version, not needed - * for info output - */ acpi_os_printf("ACPI: "); va_start(args, format); diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 4db89e98535..a868df6fcef 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -375,7 +375,7 @@ acpi_status acpi_leave_sleep_state_prep(u8 sleep_state); acpi_status acpi_leave_sleep_state(u8 sleep_state); /* - * Debug output + * Error/Warning output */ void ACPI_INTERNAL_VAR_XFACE acpi_error(const char *module_name, @@ -394,6 +394,9 @@ void ACPI_INTERNAL_VAR_XFACE acpi_info(const char *module_name, u32 line_number, const char *format, ...) ACPI_PRINTF_LIKE(3); +/* + * Debug output + */ #ifdef ACPI_DEBUG_OUTPUT void ACPI_INTERNAL_VAR_XFACE -- cgit v1.2.3-70-g09d2 From 8eb7b2477c4e4fec4788605e4edb5f7acafb59ff Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Wed, 22 Apr 2009 10:28:22 +0800 Subject: ACPICA: Fix a few warnings for gcc 3.4.4 Mostly for acpiexec, one in the core subsystem. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown --- drivers/acpi/acpica/hwregs.c | 4 ++-- include/acpi/platform/acgcc.h | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/acpi/acpica/hwregs.c b/drivers/acpi/acpica/hwregs.c index 7b2fb602b5c..23d5505cb1f 100644 --- a/drivers/acpi/acpica/hwregs.c +++ b/drivers/acpi/acpica/hwregs.c @@ -81,9 +81,9 @@ acpi_status acpi_hw_clear_acpi_status(void) ACPI_FUNCTION_TRACE(hw_clear_acpi_status); - ACPI_DEBUG_PRINT((ACPI_DB_IO, "About to write %04X to %0llX\n", + ACPI_DEBUG_PRINT((ACPI_DB_IO, "About to write %04X to %8.8X%8.8X\n", ACPI_BITMASK_ALL_FIXED_STATUS, - acpi_gbl_xpm1a_status.address)); + ACPI_FORMAT_UINT64(acpi_gbl_xpm1a_status.address))); lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock); diff --git a/include/acpi/platform/acgcc.h b/include/acpi/platform/acgcc.h index 8e2cdc57b19..935c5d7fc86 100644 --- a/include/acpi/platform/acgcc.h +++ b/include/acpi/platform/acgcc.h @@ -62,4 +62,8 @@ */ #define ACPI_UNUSED_VAR __attribute__ ((unused)) +#ifdef _ANSI +#define inline +#endif + #endif /* __ACGCC_H__ */ -- cgit v1.2.3-70-g09d2 From 35d7c1cfe5262480d3d8e6ccd7e4caf3a9c8ab39 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Wed, 22 Apr 2009 12:41:27 +0800 Subject: ACPICA: Cleanup byte/word/dword extraction macros, fix possible warnings Removed unnecessary masking. For the 64-bit macros, removed the structure overlay. Fixes aliasing warnings seen with gcc 4+ compilers. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown --- include/acpi/actypes.h | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index f555d927f7c..37ba576d06e 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -429,20 +429,12 @@ typedef unsigned long long acpi_integer; /* Data manipulation */ -#define ACPI_LOWORD(l) ((u16)(u32)(l)) -#define ACPI_HIWORD(l) ((u16)((((u32)(l)) >> 16) & 0xFFFF)) -#define ACPI_LOBYTE(l) ((u8)(u16)(l)) -#define ACPI_HIBYTE(l) ((u8)((((u16)(l)) >> 8) & 0xFF)) - -/* Full 64-bit integer must be available on both 32-bit and 64-bit platforms */ - -struct acpi_integer_overlay { - u32 lo_dword; - u32 hi_dword; -}; - -#define ACPI_LODWORD(integer) (ACPI_CAST_PTR (struct acpi_integer_overlay, &integer)->lo_dword) -#define ACPI_HIDWORD(integer) (ACPI_CAST_PTR (struct acpi_integer_overlay, &integer)->hi_dword) +#define ACPI_LOBYTE(integer) ((u8) (u16)(integer)) +#define ACPI_HIBYTE(integer) ((u8) (((u16)(integer)) >> 8)) +#define ACPI_LOWORD(integer) ((u16) (u32)(integer)) +#define ACPI_HIWORD(integer) ((u16)(((u32)(integer)) >> 16)) +#define ACPI_LODWORD(integer64) ((u32) (u64)(integer64)) +#define ACPI_HIDWORD(integer64) ((u32)(((u64)(integer64)) >> 32)) #define ACPI_SET_BIT(target,bit) ((target) |= (bit)) #define ACPI_CLEAR_BIT(target,bit) ((target) &= ~(bit)) -- cgit v1.2.3-70-g09d2 From e0c437bcca6926b541c738b5c64445654750b365 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Wed, 22 Apr 2009 13:39:47 +0800 Subject: ACPICA: Linux OSL: cleanup/update/merge Merge the OSL with the actual file used by Linux, so that the file does not require patching when integrated with Linux. General cleanup and some restructuring. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown --- include/acpi/platform/aclinux.h | 63 +++++++++++++++++++++++++---------------- 1 file changed, 38 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h index 6d49b2a498c..fcb8e4b159b 100644 --- a/include/acpi/platform/aclinux.h +++ b/include/acpi/platform/aclinux.h @@ -1,11 +1,11 @@ /****************************************************************************** * - * Name: aclinux.h - OS specific defines, etc. + * Name: aclinux.h - OS specific defines, etc. for Linux * *****************************************************************************/ /* - * Copyright (C) 2000 - 2008, Intel Corp. + * Copyright (C) 2000 - 2009, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -44,10 +44,13 @@ #ifndef __ACLINUX_H__ #define __ACLINUX_H__ +/* Common (in-kernel/user-space) ACPICA configuration */ + #define ACPI_USE_SYSTEM_CLIBRARY #define ACPI_USE_DO_WHILE_0 #define ACPI_MUTEX_TYPE ACPI_BINARY_SEMAPHORE + #ifdef __KERNEL__ #include @@ -63,15 +66,18 @@ #include #include -/* Host-dependent types and defines */ +/* Host-dependent types and defines for in-kernel ACPICA */ #define ACPI_MACHINE_WIDTH BITS_PER_LONG -#define acpi_cache_t struct kmem_cache -#define acpi_spinlock spinlock_t * #define ACPI_EXPORT_SYMBOL(symbol) EXPORT_SYMBOL(symbol); #define strtoul simple_strtoul -#else /* !__KERNEL__ */ +#define acpi_cache_t struct kmem_cache +#define acpi_spinlock spinlock_t * +#define acpi_cpu_flags unsigned long +#define acpi_thread_id struct task_struct * + +#else /* !__KERNEL__ */ #include #include @@ -79,6 +85,11 @@ #include #include +/* Host-dependent types and defines for user-space ACPICA */ + +#define ACPI_FLUSH_CPU_CACHE() +#define acpi_thread_id pthread_t + #if defined(__ia64__) || defined(__x86_64__) #define ACPI_MACHINE_WIDTH 64 #define COMPILER_DEPENDENT_INT64 long @@ -94,17 +105,17 @@ #define __cdecl #endif -#define ACPI_FLUSH_CPU_CACHE() -#endif /* __KERNEL__ */ +#endif /* __KERNEL__ */ /* Linux uses GCC */ #include "acgcc.h" -#define acpi_cpu_flags unsigned long - -#define acpi_thread_id struct task_struct * +#ifdef __KERNEL__ +/* + * Overrides for in-kernel ACPICA + */ static inline acpi_thread_id acpi_os_get_thread_id(void) { return current; @@ -119,30 +130,32 @@ static inline acpi_thread_id acpi_os_get_thread_id(void) #include static inline void *acpi_os_allocate(acpi_size size) { - return kmalloc(size, irqs_disabled()? GFP_ATOMIC : GFP_KERNEL); + return kmalloc(size, irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL); } + static inline void *acpi_os_allocate_zeroed(acpi_size size) { - return kzalloc(size, irqs_disabled()? GFP_ATOMIC : GFP_KERNEL); + return kzalloc(size, irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL); } static inline void *acpi_os_acquire_object(acpi_cache_t * cache) { return kmem_cache_zalloc(cache, - irqs_disabled()? GFP_ATOMIC : GFP_KERNEL); + irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL); } -#define ACPI_ALLOCATE(a) acpi_os_allocate(a) -#define ACPI_ALLOCATE_ZEROED(a) acpi_os_allocate_zeroed(a) -#define ACPI_FREE(a) kfree(a) +#define ACPI_ALLOCATE(a) acpi_os_allocate(a) +#define ACPI_ALLOCATE_ZEROED(a) acpi_os_allocate_zeroed(a) +#define ACPI_FREE(a) kfree(a) -/* - * We need to show where it is safe to preempt execution of ACPICA - */ -#define ACPI_PREEMPTION_POINT() \ - do { \ - if (!irqs_disabled()) \ - cond_resched(); \ +/* Used within ACPICA to show where it is safe to preempt execution */ + +#define ACPI_PREEMPTION_POINT() \ + do { \ + if (!irqs_disabled()) \ + cond_resched(); \ } while (0) -#endif /* __ACLINUX_H__ */ +#endif /* __KERNEL__ */ + +#endif /* __ACLINUX_H__ */ -- cgit v1.2.3-70-g09d2 From ba9e2ae443feb7231d9631ea0f62b63e26cfb9b1 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Fri, 24 Apr 2009 10:43:43 +0800 Subject: ACPICA: Update version to 20090422. Version 20090422. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown --- include/acpi/acpixf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index a868df6fcef..60b76a3fb3a 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -47,7 +47,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20090320 +#define ACPI_CA_VERSION 0x20090422 #include "actypes.h" #include "actbl.h" -- cgit v1.2.3-70-g09d2 From b2f7ddcfcb9c2436896cb339a7ff70245648f033 Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Thu, 21 May 2009 10:42:09 +0800 Subject: ACPICA: New: AcpiInstallMethod - install a single control method This interface enables the override or creation of a single control method. Useful to repair a bug or install a missing method. Signed-off-by: Lin Ming Signed-off-by: Bob Moore Signed-off-by: Len Brown --- drivers/acpi/acpica/aclocal.h | 1 + drivers/acpi/acpica/amlcode.h | 2 +- drivers/acpi/acpica/excreate.c | 2 +- drivers/acpi/acpica/exdump.c | 6 +- drivers/acpi/acpica/nsobject.c | 9 +++ drivers/acpi/acpica/nsxfname.c | 150 +++++++++++++++++++++++++++++++++++++++++ include/acpi/acpixf.h | 2 + 7 files changed, 168 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h index 2ec394a328e..882b4b55867 100644 --- a/drivers/acpi/acpica/aclocal.h +++ b/drivers/acpi/acpica/aclocal.h @@ -205,6 +205,7 @@ struct acpi_namespace_node { #define ANOBJ_METHOD_LOCAL 0x08 /* Node is a method local */ #define ANOBJ_SUBTREE_HAS_INI 0x10 /* Used to optimize device initialization */ #define ANOBJ_EVALUATED 0x20 /* Set on first evaluation of node */ +#define ANOBJ_ALLOCATED_BUFFER 0x40 /* Method AML buffer is dynamic (install_method) */ #define ANOBJ_IS_EXTERNAL 0x08 /* i_aSL only: This object created via External() */ #define ANOBJ_METHOD_NO_RETVAL 0x10 /* i_aSL only: Method has no return value */ diff --git a/drivers/acpi/acpica/amlcode.h b/drivers/acpi/acpica/amlcode.h index ff851c5df69..067f967eb38 100644 --- a/drivers/acpi/acpica/amlcode.h +++ b/drivers/acpi/acpica/amlcode.h @@ -483,7 +483,7 @@ typedef enum { #define AML_METHOD_ARG_COUNT 0x07 #define AML_METHOD_SERIALIZED 0x08 -#define AML_METHOD_SYNCH_LEVEL 0xF0 +#define AML_METHOD_SYNC_LEVEL 0xF0 /* METHOD_FLAGS_ARG_COUNT is not used internally, define additional flags */ diff --git a/drivers/acpi/acpica/excreate.c b/drivers/acpi/acpica/excreate.c index a57ad2564ab..02b25d233d9 100644 --- a/drivers/acpi/acpica/excreate.c +++ b/drivers/acpi/acpica/excreate.c @@ -502,7 +502,7 @@ acpi_ex_create_method(u8 * aml_start, * ACPI 2.0: sync_level = sync_level in method declaration */ obj_desc->method.sync_level = (u8) - ((method_flags & AML_METHOD_SYNCH_LEVEL) >> 4); + ((method_flags & AML_METHOD_SYNC_LEVEL) >> 4); } /* Attach the new object to the method Node */ diff --git a/drivers/acpi/acpica/exdump.c b/drivers/acpi/acpica/exdump.c index 89d141fdae0..ec524614e70 100644 --- a/drivers/acpi/acpica/exdump.c +++ b/drivers/acpi/acpica/exdump.c @@ -120,9 +120,11 @@ static struct acpi_exdump_info acpi_ex_dump_event[2] = { {ACPI_EXD_POINTER, ACPI_EXD_OFFSET(event.os_semaphore), "OsSemaphore"} }; -static struct acpi_exdump_info acpi_ex_dump_method[8] = { +static struct acpi_exdump_info acpi_ex_dump_method[9] = { {ACPI_EXD_INIT, ACPI_EXD_TABLE_SIZE(acpi_ex_dump_method), NULL}, - {ACPI_EXD_UINT8, ACPI_EXD_OFFSET(method.param_count), "ParamCount"}, + {ACPI_EXD_UINT8, ACPI_EXD_OFFSET(method.method_flags), "Method Flags"}, + {ACPI_EXD_UINT8, ACPI_EXD_OFFSET(method.param_count), + "Parameter Count"}, {ACPI_EXD_UINT8, ACPI_EXD_OFFSET(method.sync_level), "Sync Level"}, {ACPI_EXD_POINTER, ACPI_EXD_OFFSET(method.mutex), "Mutex"}, {ACPI_EXD_UINT8, ACPI_EXD_OFFSET(method.owner_id), "Owner Id"}, diff --git a/drivers/acpi/acpica/nsobject.c b/drivers/acpi/acpica/nsobject.c index 3eb20bfda9d..60f3af08d28 100644 --- a/drivers/acpi/acpica/nsobject.c +++ b/drivers/acpi/acpica/nsobject.c @@ -213,6 +213,15 @@ void acpi_ns_detach_object(struct acpi_namespace_node *node) return_VOID; } + if (node->flags & ANOBJ_ALLOCATED_BUFFER) { + + /* Free the dynamic aml buffer */ + + if (obj_desc->common.type == ACPI_TYPE_METHOD) { + ACPI_FREE(obj_desc->method.aml_start); + } + } + /* Clear the entry in all cases */ node->object = NULL; diff --git a/drivers/acpi/acpica/nsxfname.c b/drivers/acpi/acpica/nsxfname.c index 9589fea2499..f23593d6add 100644 --- a/drivers/acpi/acpica/nsxfname.c +++ b/drivers/acpi/acpica/nsxfname.c @@ -45,6 +45,8 @@ #include #include "accommon.h" #include "acnamesp.h" +#include "acparser.h" +#include "amlcode.h" #define _COMPONENT ACPI_NAMESPACE ACPI_MODULE_NAME("nsxfname") @@ -358,3 +360,151 @@ acpi_get_object_info(acpi_handle handle, struct acpi_buffer * buffer) } ACPI_EXPORT_SYMBOL(acpi_get_object_info) + +/****************************************************************************** + * + * FUNCTION: acpi_install_method + * + * PARAMETERS: Buffer - An ACPI table containing one control method + * + * RETURN: Status + * + * DESCRIPTION: Install a control method into the namespace. If the method + * name already exists in the namespace, it is overwritten. The + * input buffer must contain a valid DSDT or SSDT containing a + * single control method. + * + ******************************************************************************/ +acpi_status acpi_install_method(u8 *buffer) +{ + struct acpi_table_header *table = + ACPI_CAST_PTR(struct acpi_table_header, buffer); + u8 *aml_buffer; + u8 *aml_start; + char *path; + struct acpi_namespace_node *node; + union acpi_operand_object *method_obj; + struct acpi_parse_state parser_state; + u32 aml_length; + u16 opcode; + u8 method_flags; + acpi_status status; + + /* Parameter validation */ + + if (!buffer) { + return AE_BAD_PARAMETER; + } + + /* Table must be a DSDT or SSDT */ + + if (!ACPI_COMPARE_NAME(table->signature, ACPI_SIG_DSDT) && + !ACPI_COMPARE_NAME(table->signature, ACPI_SIG_SSDT)) { + return AE_BAD_HEADER; + } + + /* First AML opcode in the table must be a control method */ + + parser_state.aml = buffer + sizeof(struct acpi_table_header); + opcode = acpi_ps_peek_opcode(&parser_state); + if (opcode != AML_METHOD_OP) { + return AE_BAD_PARAMETER; + } + + /* Extract method information from the raw AML */ + + parser_state.aml += acpi_ps_get_opcode_size(opcode); + parser_state.pkg_end = acpi_ps_get_next_package_end(&parser_state); + path = acpi_ps_get_next_namestring(&parser_state); + method_flags = *parser_state.aml++; + aml_start = parser_state.aml; + aml_length = ACPI_PTR_DIFF(parser_state.pkg_end, aml_start); + + /* + * Allocate resources up-front. We don't want to have to delete a new + * node from the namespace if we cannot allocate memory. + */ + aml_buffer = ACPI_ALLOCATE(aml_length); + if (!aml_buffer) { + return AE_NO_MEMORY; + } + + method_obj = acpi_ut_create_internal_object(ACPI_TYPE_METHOD); + if (!method_obj) { + ACPI_FREE(aml_buffer); + return AE_NO_MEMORY; + } + + /* Lock namespace for acpi_ns_lookup, we may be creating a new node */ + + status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE); + if (ACPI_FAILURE(status)) { + goto error_exit; + } + + /* The lookup either returns an existing node or creates a new one */ + + status = + acpi_ns_lookup(NULL, path, ACPI_TYPE_METHOD, ACPI_IMODE_LOAD_PASS1, + ACPI_NS_DONT_OPEN_SCOPE | ACPI_NS_ERROR_IF_FOUND, + NULL, &node); + + (void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE); + + if (ACPI_FAILURE(status)) { /* ns_lookup */ + if (status != AE_ALREADY_EXISTS) { + goto error_exit; + } + + /* Node existed previously, make sure it is a method node */ + + if (node->type != ACPI_TYPE_METHOD) { + status = AE_TYPE; + goto error_exit; + } + } + + /* Copy the method AML to the local buffer */ + + ACPI_MEMCPY(aml_buffer, aml_start, aml_length); + + /* Initialize the method object with the new method's information */ + + method_obj->method.aml_start = aml_buffer; + method_obj->method.aml_length = aml_length; + + method_obj->method.param_count = (u8) + (method_flags & AML_METHOD_ARG_COUNT); + + method_obj->method.method_flags = (u8) + (method_flags & ~AML_METHOD_ARG_COUNT); + + if (method_flags & AML_METHOD_SERIALIZED) { + method_obj->method.sync_level = (u8) + ((method_flags & AML_METHOD_SYNC_LEVEL) >> 4); + } + + /* + * Now that it is complete, we can attach the new method object to + * the method Node (detaches/deletes any existing object) + */ + status = acpi_ns_attach_object(node, method_obj, ACPI_TYPE_METHOD); + + /* + * Flag indicates AML buffer is dynamic, must be deleted later. + * Must be set only after attach above. + */ + node->flags |= ANOBJ_ALLOCATED_BUFFER; + + /* Remove local reference to the method object */ + + acpi_ut_remove_reference(method_obj); + return status; + +error_exit: + + ACPI_FREE(aml_buffer); + ACPI_FREE(method_obj); + return status; +} +ACPI_EXPORT_SYMBOL(acpi_install_method) diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 60b76a3fb3a..c3b08d3330e 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -201,6 +201,8 @@ acpi_evaluate_object_typed(acpi_handle object, acpi_status acpi_get_object_info(acpi_handle handle, struct acpi_buffer *return_buffer); +acpi_status acpi_install_method(u8 *buffer); + acpi_status acpi_get_next_object(acpi_object_type type, acpi_handle parent, -- cgit v1.2.3-70-g09d2 From d6a1cd4975a5ffaa21a961be04a469519edf50d6 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Thu, 21 May 2009 11:06:53 +0800 Subject: ACPICA: Update version to 20090521. Update version number. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown --- include/acpi/acpixf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index c3b08d3330e..82ec6a3c050 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -47,7 +47,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20090422 +#define ACPI_CA_VERSION 0x20090521 #include "actypes.h" #include "actbl.h" -- cgit v1.2.3-70-g09d2 From 78a478d0efd9e86e5345b436e130497b4e5846e8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 26 May 2009 18:50:21 +0000 Subject: gro: Inline skb_gro_header and cache frag0 virtual address The function skb_gro_header is called four times per packet which quickly adds up at 10Gb/s. This patch inlines it to allow better optimisations. Some architectures perform multiplication for page_address, which is done by each skb_gro_header invocation. This patch caches that value in skb->cb to avoid the unnecessary multiplications. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 22 +++++++++++++++------- net/core/dev.c | 27 ++++++++++++--------------- 2 files changed, 27 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 586b71f0358..61890ed0bcf 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1008,6 +1008,9 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, void netif_napi_del(struct napi_struct *napi); struct napi_gro_cb { + /* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */ + void *frag0; + /* This indicates where we are processing relative to skb->data. */ int data_offset; @@ -1107,9 +1110,9 @@ extern int dev_restart(struct net_device *dev); #ifdef CONFIG_NETPOLL_TRAP extern int netpoll_trap(void); #endif -extern void *skb_gro_header(struct sk_buff *skb, unsigned int hlen); extern int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb); +extern void skb_gro_reset_offset(struct sk_buff *skb); static inline unsigned int skb_gro_offset(const struct sk_buff *skb) { @@ -1126,23 +1129,28 @@ static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len) NAPI_GRO_CB(skb)->data_offset += len; } -static inline void skb_gro_reset_offset(struct sk_buff *skb) +static inline void *skb_gro_header(struct sk_buff *skb, unsigned int hlen) { - NAPI_GRO_CB(skb)->data_offset = 0; + unsigned int offset = skb_gro_offset(skb); + + hlen += offset; + if (!NAPI_GRO_CB(skb)->frag0 || + unlikely(skb_shinfo(skb)->frags[0].size + skb_headlen(skb) < hlen)) + return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL; + + return NAPI_GRO_CB(skb)->frag0 + offset; } static inline void *skb_gro_mac_header(struct sk_buff *skb) { return skb_headlen(skb) ? skb_mac_header(skb) : - page_address(skb_shinfo(skb)->frags[0].page) + - skb_shinfo(skb)->frags[0].page_offset; + NAPI_GRO_CB(skb)->frag0; } static inline void *skb_gro_network_header(struct sk_buff *skb) { return skb_headlen(skb) ? skb_network_header(skb) : - page_address(skb_shinfo(skb)->frags[0].page) + - skb_shinfo(skb)->frags[0].page_offset + skb_network_offset(skb); + NAPI_GRO_CB(skb)->frag0 + skb_network_offset(skb); } static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, diff --git a/net/core/dev.c b/net/core/dev.c index 5eb3e48ab31..bdb1a738193 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2390,21 +2390,6 @@ void napi_gro_flush(struct napi_struct *napi) } EXPORT_SYMBOL(napi_gro_flush); -void *skb_gro_header(struct sk_buff *skb, unsigned int hlen) -{ - unsigned int offset = skb_gro_offset(skb); - - hlen += offset; - if (unlikely(skb_headlen(skb) || - skb_shinfo(skb)->frags[0].size < hlen || - PageHighMem(skb_shinfo(skb)->frags[0].page))) - return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL; - - return page_address(skb_shinfo(skb)->frags[0].page) + - skb_shinfo(skb)->frags[0].page_offset + offset; -} -EXPORT_SYMBOL(skb_gro_header); - int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff **pp = NULL; @@ -2520,6 +2505,18 @@ int napi_skb_finish(int ret, struct sk_buff *skb) } EXPORT_SYMBOL(napi_skb_finish); +void skb_gro_reset_offset(struct sk_buff *skb) +{ + NAPI_GRO_CB(skb)->data_offset = 0; + NAPI_GRO_CB(skb)->frag0 = NULL; + + if (!skb_headlen(skb) && !PageHighMem(skb_shinfo(skb)->frags[0].page)) + NAPI_GRO_CB(skb)->frag0 = + page_address(skb_shinfo(skb)->frags[0].page) + + skb_shinfo(skb)->frags[0].page_offset; +} +EXPORT_SYMBOL(skb_gro_reset_offset); + int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { skb_gro_reset_offset(skb); -- cgit v1.2.3-70-g09d2 From 78d3fd0b7de844a6dad56e9620fc9d2271b32ab9 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 26 May 2009 18:50:23 +0000 Subject: gro: Only use skb_gro_header for completely non-linear packets Currently skb_gro_header is used for packets which put the hardware header in skb->data with the rest in frags. Since the drivers that need this optimisation all provide completely non-linear packets, we can gain extra optimisations by only performing the frag0 optimisation for completely non-linear packets. In particular, we can simply test frag0 (instead of skb_headlen) to see whether the optimisation is in force. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 11 ++++++----- net/core/dev.c | 3 ++- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 61890ed0bcf..d1afc3b6485 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1135,22 +1135,23 @@ static inline void *skb_gro_header(struct sk_buff *skb, unsigned int hlen) hlen += offset; if (!NAPI_GRO_CB(skb)->frag0 || - unlikely(skb_shinfo(skb)->frags[0].size + skb_headlen(skb) < hlen)) + unlikely(skb_shinfo(skb)->frags[0].size < hlen)) { + NAPI_GRO_CB(skb)->frag0 = NULL; return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL; + } return NAPI_GRO_CB(skb)->frag0 + offset; } static inline void *skb_gro_mac_header(struct sk_buff *skb) { - return skb_headlen(skb) ? skb_mac_header(skb) : - NAPI_GRO_CB(skb)->frag0; + return NAPI_GRO_CB(skb)->frag0 ?: skb_mac_header(skb); } static inline void *skb_gro_network_header(struct sk_buff *skb) { - return skb_headlen(skb) ? skb_network_header(skb) : - NAPI_GRO_CB(skb)->frag0 + skb_network_offset(skb); + return (NAPI_GRO_CB(skb)->frag0 ?: skb->data) + + skb_network_offset(skb); } static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, diff --git a/net/core/dev.c b/net/core/dev.c index bdb1a738193..f9d90c56b6f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2510,7 +2510,8 @@ void skb_gro_reset_offset(struct sk_buff *skb) NAPI_GRO_CB(skb)->data_offset = 0; NAPI_GRO_CB(skb)->frag0 = NULL; - if (!skb_headlen(skb) && !PageHighMem(skb_shinfo(skb)->frags[0].page)) + if (skb->mac_header == skb->tail && + !PageHighMem(skb_shinfo(skb)->frags[0].page)) NAPI_GRO_CB(skb)->frag0 = page_address(skb_shinfo(skb)->frags[0].page) + skb_shinfo(skb)->frags[0].page_offset; -- cgit v1.2.3-70-g09d2 From 7489594cb249aeb178287c9a43a9e4f366044259 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 26 May 2009 18:50:27 +0000 Subject: gro: Optimise length comparison in skb_gro_header By caching frag0_len, we can avoid checking both frag0 and the length separately in skb_gro_header. This helps as skb_gro_header is called four times per packet which amounts to a few million times at 10Gb/s. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 +++++-- net/core/dev.c | 5 ++++- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d1afc3b6485..2e44a049be0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1011,6 +1011,9 @@ struct napi_gro_cb { /* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */ void *frag0; + /* Length of frag0. */ + unsigned int frag0_len; + /* This indicates where we are processing relative to skb->data. */ int data_offset; @@ -1134,9 +1137,9 @@ static inline void *skb_gro_header(struct sk_buff *skb, unsigned int hlen) unsigned int offset = skb_gro_offset(skb); hlen += offset; - if (!NAPI_GRO_CB(skb)->frag0 || - unlikely(skb_shinfo(skb)->frags[0].size < hlen)) { + if (NAPI_GRO_CB(skb)->frag0_len < hlen) { NAPI_GRO_CB(skb)->frag0 = NULL; + NAPI_GRO_CB(skb)->frag0_len = 0; return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL; } diff --git a/net/core/dev.c b/net/core/dev.c index f9d90c56b6f..b1722a2d1fb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2509,12 +2509,15 @@ void skb_gro_reset_offset(struct sk_buff *skb) { NAPI_GRO_CB(skb)->data_offset = 0; NAPI_GRO_CB(skb)->frag0 = NULL; + NAPI_GRO_CB(skb)->frag0_len = 0; if (skb->mac_header == skb->tail && - !PageHighMem(skb_shinfo(skb)->frags[0].page)) + !PageHighMem(skb_shinfo(skb)->frags[0].page)) { NAPI_GRO_CB(skb)->frag0 = page_address(skb_shinfo(skb)->frags[0].page) + skb_shinfo(skb)->frags[0].page_offset; + NAPI_GRO_CB(skb)->frag0_len = skb_shinfo(skb)->frags[0].size; + } } EXPORT_SYMBOL(skb_gro_reset_offset); -- cgit v1.2.3-70-g09d2 From a5b1cf288d4200506ab62fbb86cc81ace948a306 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 26 May 2009 18:50:28 +0000 Subject: gro: Avoid unnecessary comparison after skb_gro_header For the overwhelming majority of cases, skb_gro_header's return value cannot be NULL. Yet we must check it because of its current form. This patch splits it up into multiple functions in order to avoid this. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 23 ++++++++++++++--------- net/core/dev.c | 17 ++++++++++++----- net/ipv4/af_inet.c | 13 ++++++++++--- net/ipv4/tcp.c | 22 ++++++++++++++++------ net/ipv6/af_inet6.c | 13 ++++++++++--- 5 files changed, 62 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2e44a049be0..371ece521e5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1132,18 +1132,23 @@ static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len) NAPI_GRO_CB(skb)->data_offset += len; } -static inline void *skb_gro_header(struct sk_buff *skb, unsigned int hlen) +static inline void *skb_gro_header_fast(struct sk_buff *skb, + unsigned int offset) { - unsigned int offset = skb_gro_offset(skb); + return NAPI_GRO_CB(skb)->frag0 + offset; +} - hlen += offset; - if (NAPI_GRO_CB(skb)->frag0_len < hlen) { - NAPI_GRO_CB(skb)->frag0 = NULL; - NAPI_GRO_CB(skb)->frag0_len = 0; - return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL; - } +static inline int skb_gro_header_hard(struct sk_buff *skb, unsigned int hlen) +{ + return NAPI_GRO_CB(skb)->frag0_len < hlen; +} - return NAPI_GRO_CB(skb)->frag0 + offset; +static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, + unsigned int offset) +{ + NAPI_GRO_CB(skb)->frag0 = NULL; + NAPI_GRO_CB(skb)->frag0_len = 0; + return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL; } static inline void *skb_gro_mac_header(struct sk_buff *skb) diff --git a/net/core/dev.c b/net/core/dev.c index b1722a2d1fb..cd29e613bc5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2590,17 +2590,24 @@ struct sk_buff *napi_frags_skb(struct napi_struct *napi) { struct sk_buff *skb = napi->skb; struct ethhdr *eth; + unsigned int hlen; + unsigned int off; napi->skb = NULL; skb_reset_mac_header(skb); skb_gro_reset_offset(skb); - eth = skb_gro_header(skb, sizeof(*eth)); - if (!eth) { - napi_reuse_skb(napi, skb); - skb = NULL; - goto out; + off = skb_gro_offset(skb); + hlen = off + sizeof(*eth); + eth = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, hlen)) { + eth = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!eth)) { + napi_reuse_skb(napi, skb); + skb = NULL; + goto out; + } } skb_gro_pull(skb, sizeof(*eth)); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 170689681aa..644cc553531 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1246,13 +1246,20 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff **pp = NULL; struct sk_buff *p; struct iphdr *iph; + unsigned int hlen; + unsigned int off; int flush = 1; int proto; int id; - iph = skb_gro_header(skb, sizeof(*iph)); - if (unlikely(!iph)) - goto out; + off = skb_gro_offset(skb); + hlen = off + sizeof(*iph); + iph = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, hlen)) { + iph = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!iph)) + goto out; + } proto = iph->protocol & (MAX_INET_PROTOS - 1); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 68342d43189..c3dcec5efea 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2518,20 +2518,30 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) unsigned int thlen; unsigned int flags; unsigned int mss = 1; + unsigned int hlen; + unsigned int off; int flush = 1; int i; - th = skb_gro_header(skb, sizeof(*th)); - if (unlikely(!th)) - goto out; + off = skb_gro_offset(skb); + hlen = off + sizeof(*th); + th = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, hlen)) { + th = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!th)) + goto out; + } thlen = th->doff * 4; if (thlen < sizeof(*th)) goto out; - th = skb_gro_header(skb, thlen); - if (unlikely(!th)) - goto out; + hlen = off + thlen; + if (skb_gro_header_hard(skb, hlen)) { + th = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!th)) + goto out; + } skb_gro_pull(skb, thlen); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 61f55386a23..b6215be0963 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -817,13 +817,20 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, struct sk_buff *p; struct ipv6hdr *iph; unsigned int nlen; + unsigned int hlen; + unsigned int off; int flush = 1; int proto; __wsum csum; - iph = skb_gro_header(skb, sizeof(*iph)); - if (unlikely(!iph)) - goto out; + off = skb_gro_offset(skb); + hlen = off + sizeof(*iph); + iph = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, hlen)) { + iph = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!iph)) + goto out; + } skb_gro_pull(skb, sizeof(*iph)); skb_set_transport_header(skb, skb_gro_offset(skb)); -- cgit v1.2.3-70-g09d2 From cbf806dd9302f3ff27ba496dae474b9da6b58873 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 27 May 2009 06:22:58 -0700 Subject: Input: ucb1400 - move static function from header into core it's a little too large for static line. The ts is currently the only mainline user but Marek Vasut claims that there is a battery driver in an ARM tree which also needs this function. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Dmitry Torokhov --- drivers/mfd/ucb1400_core.c | 20 ++++++++++++++++++++ include/linux/ucb1400.h | 23 ++++------------------- 2 files changed, 24 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/drivers/mfd/ucb1400_core.c b/drivers/mfd/ucb1400_core.c index 178159e264c..78c2135c5de 100644 --- a/drivers/mfd/ucb1400_core.c +++ b/drivers/mfd/ucb1400_core.c @@ -23,6 +23,26 @@ #include #include +unsigned int ucb1400_adc_read(struct snd_ac97 *ac97, u16 adc_channel, + int adcsync) +{ + unsigned int val; + + if (adcsync) + adc_channel |= UCB_ADC_SYNC_ENA; + + ucb1400_reg_write(ac97, UCB_ADC_CR, UCB_ADC_ENA | adc_channel); + ucb1400_reg_write(ac97, UCB_ADC_CR, UCB_ADC_ENA | adc_channel | + UCB_ADC_START); + + while (!((val = ucb1400_reg_read(ac97, UCB_ADC_DATA)) + & UCB_ADC_DAT_VALID)) + schedule_timeout_uninterruptible(1); + + return val & UCB_ADC_DAT_MASK; +} +EXPORT_SYMBOL_GPL(ucb1400_adc_read); + static int ucb1400_core_probe(struct device *dev) { int err; diff --git a/include/linux/ucb1400.h b/include/linux/ucb1400.h index 970473bf8d5..ed889f4168f 100644 --- a/include/linux/ucb1400.h +++ b/include/linux/ucb1400.h @@ -134,28 +134,13 @@ static inline void ucb1400_adc_enable(struct snd_ac97 *ac97) ucb1400_reg_write(ac97, UCB_ADC_CR, UCB_ADC_ENA); } -static unsigned int ucb1400_adc_read(struct snd_ac97 *ac97, u16 adc_channel, - int adcsync) -{ - unsigned int val; - - if (adcsync) - adc_channel |= UCB_ADC_SYNC_ENA; - - ucb1400_reg_write(ac97, UCB_ADC_CR, UCB_ADC_ENA | adc_channel); - ucb1400_reg_write(ac97, UCB_ADC_CR, UCB_ADC_ENA | adc_channel | - UCB_ADC_START); - - while (!((val = ucb1400_reg_read(ac97, UCB_ADC_DATA)) - & UCB_ADC_DAT_VALID)) - schedule_timeout_uninterruptible(1); - - return val & UCB_ADC_DAT_MASK; -} - static inline void ucb1400_adc_disable(struct snd_ac97 *ac97) { ucb1400_reg_write(ac97, UCB_ADC_CR, 0); } + +unsigned int ucb1400_adc_read(struct snd_ac97 *ac97, u16 adc_channel, + int adcsync); + #endif -- cgit v1.2.3-70-g09d2 From a17c859849402315613a0015ac8fbf101acf0cc1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 27 May 2009 17:50:35 +0200 Subject: netfilter: conntrack: add support for DCCP handshake sequence to ctnetlink This patch adds CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ that exposes the u64 handshake sequence number to user-space. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- include/linux/netfilter/nfnetlink_conntrack.h | 1 + include/net/netlink.h | 9 +++++++++ net/netfilter/nf_conntrack_proto_dccp.c | 7 +++++++ 3 files changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index 1a865e48b8e..ed4ef8d0b11 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -101,6 +101,7 @@ enum ctattr_protoinfo_dccp { CTA_PROTOINFO_DCCP_UNSPEC, CTA_PROTOINFO_DCCP_STATE, CTA_PROTOINFO_DCCP_ROLE, + CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ, __CTA_PROTOINFO_DCCP_MAX, }; #define CTA_PROTOINFO_DCCP_MAX (__CTA_PROTOINFO_DCCP_MAX - 1) diff --git a/include/net/netlink.h b/include/net/netlink.h index eddb50289d6..007bdb07dab 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -939,6 +939,15 @@ static inline u64 nla_get_u64(const struct nlattr *nla) return tmp; } +/** + * nla_get_be64 - return payload of __be64 attribute + * @nla: __be64 netlink attribute + */ +static inline __be64 nla_get_be64(const struct nlattr *nla) +{ + return *(__be64 *) nla_data(nla); +} + /** * nla_get_flag - return payload of flag attribute * @nla: flag netlink attribute diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 8e757dd5339..11801c43c8c 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -635,6 +635,8 @@ static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_STATE, ct->proto.dccp.state); NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_ROLE, ct->proto.dccp.role[IP_CT_DIR_ORIGINAL]); + NLA_PUT_BE64(skb, CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ, + cpu_to_be64(ct->proto.dccp.handshake_seq)); nla_nest_end(skb, nest_parms); read_unlock_bh(&dccp_lock); return 0; @@ -647,6 +649,7 @@ nla_put_failure: static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = { [CTA_PROTOINFO_DCCP_STATE] = { .type = NLA_U8 }, [CTA_PROTOINFO_DCCP_ROLE] = { .type = NLA_U8 }, + [CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ] = { .type = NLA_U64 }, }; static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) @@ -679,6 +682,10 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_SERVER; ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_CLIENT; } + if (tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ]) { + ct->proto.dccp.handshake_seq = + be64_to_cpu(nla_get_be64(tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ])); + } write_unlock_bh(&dccp_lock); return 0; } -- cgit v1.2.3-70-g09d2 From fca4217c5bab31019b5247e977673c9fcc385f6b Mon Sep 17 00:00:00 2001 From: Greg Banks Date: Wed, 1 Apr 2009 07:28:13 +1100 Subject: knfsd: reply cache cleanups Make REQHASH() an inline function. Rename hash_list to cache_hash. Fix an obsolete comment. Signed-off-by: Greg Banks Signed-off-by: J. Bruce Fields --- fs/nfsd/nfscache.c | 29 +++++++++++++++++++---------- include/linux/nfsd/cache.h | 3 +-- 2 files changed, 20 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 5bfc2ac60d5..6f0aa4989c6 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -29,15 +29,24 @@ */ #define CACHESIZE 1024 #define HASHSIZE 64 -#define REQHASH(xid) (((((__force __u32)xid) >> 24) ^ ((__force __u32)xid)) & (HASHSIZE-1)) -static struct hlist_head * hash_list; +static struct hlist_head * cache_hash; static struct list_head lru_head; static int cache_disabled = 1; +/* + * Calculate the hash index from an XID. + */ +static inline u32 request_hash(u32 xid) +{ + u32 h = xid; + h ^= (xid >> 24); + return h & (HASHSIZE-1); +} + static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); -/* +/* * locking for the reply cache: * A cache entry is "single use" if c_state == RC_INPROG * Otherwise, it when accessing _prev or _next, the lock must be held. @@ -62,8 +71,8 @@ int nfsd_reply_cache_init(void) i--; } - hash_list = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL); - if (!hash_list) + cache_hash = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL); + if (!cache_hash) goto out_nomem; cache_disabled = 0; @@ -88,8 +97,8 @@ void nfsd_reply_cache_shutdown(void) cache_disabled = 1; - kfree (hash_list); - hash_list = NULL; + kfree (cache_hash); + cache_hash = NULL; } /* @@ -108,7 +117,7 @@ static void hash_refile(struct svc_cacherep *rp) { hlist_del_init(&rp->c_hash); - hlist_add_head(&rp->c_hash, hash_list + REQHASH(rp->c_xid)); + hlist_add_head(&rp->c_hash, cache_hash + request_hash(rp->c_xid)); } /* @@ -138,7 +147,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp, int type) spin_lock(&cache_lock); rtn = RC_DOIT; - rh = &hash_list[REQHASH(xid)]; + rh = &cache_hash[request_hash(xid)]; hlist_for_each_entry(rp, hn, rh, c_hash) { if (rp->c_state != RC_UNUSED && xid == rp->c_xid && proc == rp->c_proc && @@ -264,7 +273,7 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) len = resv->iov_len - ((char*)statp - (char*)resv->iov_base); len >>= 2; - + /* Don't cache excessive amounts of data and XDR failures */ if (!statp || len > (256 >> 2)) { rp->c_state = RC_UNUSED; diff --git a/include/linux/nfsd/cache.h b/include/linux/nfsd/cache.h index 5bccaab8105..3a3f58934f5 100644 --- a/include/linux/nfsd/cache.h +++ b/include/linux/nfsd/cache.h @@ -14,8 +14,7 @@ #include /* - * Representation of a reply cache entry. The first two members *must* - * be hash_next and hash_prev. + * Representation of a reply cache entry. */ struct svc_cacherep { struct hlist_node c_hash; -- cgit v1.2.3-70-g09d2 From ab6bf42e2339580b5d87746d0ff4da4b1578b03e Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 27 May 2009 14:38:34 -0700 Subject: mlx4_core: Add module parameter for number of MTTs per segment The current MTT allocator uses kmalloc() to allocate a buffer for its buddy allocator, and thus is limited in the amount of MTT segments that it can control. As a result, the size of memory that can be registered is limited too. This patch uses a module parameter to control the number of MTT entries that each segment represents, allowing more memory to be registered with the same number of segments. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/net/mlx4/main.c | 14 ++++++++++++-- drivers/net/mlx4/mr.c | 6 +++--- drivers/net/mlx4/profile.c | 2 +- include/linux/mlx4/device.h | 1 + 4 files changed, 17 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c index 30bea968969..018348c0119 100644 --- a/drivers/net/mlx4/main.c +++ b/drivers/net/mlx4/main.c @@ -100,6 +100,10 @@ module_param_named(use_prio, use_prio, bool, 0444); MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports " "(0/1, default 0)"); +static int log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG); +module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444); +MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-5)"); + int mlx4_check_port_params(struct mlx4_dev *dev, enum mlx4_port_type *port_type) { @@ -203,12 +207,13 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.max_cqes = dev_cap->max_cq_sz - 1; dev->caps.reserved_cqs = dev_cap->reserved_cqs; dev->caps.reserved_eqs = dev_cap->reserved_eqs; + dev->caps.mtts_per_seg = 1 << log_mtts_per_seg; dev->caps.reserved_mtts = DIV_ROUND_UP(dev_cap->reserved_mtts, - MLX4_MTT_ENTRY_PER_SEG); + dev->caps.mtts_per_seg); dev->caps.reserved_mrws = dev_cap->reserved_mrws; dev->caps.reserved_uars = dev_cap->reserved_uars; dev->caps.reserved_pds = dev_cap->reserved_pds; - dev->caps.mtt_entry_sz = MLX4_MTT_ENTRY_PER_SEG * dev_cap->mtt_entry_sz; + dev->caps.mtt_entry_sz = dev->caps.mtts_per_seg * dev_cap->mtt_entry_sz; dev->caps.max_msg_sz = dev_cap->max_msg_sz; dev->caps.page_size_cap = ~(u32) (dev_cap->min_page_sz - 1); dev->caps.flags = dev_cap->flags; @@ -1304,6 +1309,11 @@ static int __init mlx4_verify_params(void) return -1; } + if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 5)) { + printk(KERN_WARNING "mlx4_core: bad log_mtts_per_seg: %d\n", log_mtts_per_seg); + return -1; + } + return 0; } diff --git a/drivers/net/mlx4/mr.c b/drivers/net/mlx4/mr.c index 0caf74cae8b..3b8973d1993 100644 --- a/drivers/net/mlx4/mr.c +++ b/drivers/net/mlx4/mr.c @@ -209,7 +209,7 @@ int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift, } else mtt->page_shift = page_shift; - for (mtt->order = 0, i = MLX4_MTT_ENTRY_PER_SEG; i < npages; i <<= 1) + for (mtt->order = 0, i = dev->caps.mtts_per_seg; i < npages; i <<= 1) ++mtt->order; mtt->first_seg = mlx4_alloc_mtt_range(dev, mtt->order); @@ -350,7 +350,7 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr) mpt_entry->pd_flags |= cpu_to_be32(MLX4_MPT_PD_FLAG_FAST_REG | MLX4_MPT_PD_FLAG_RAE); mpt_entry->mtt_sz = cpu_to_be32((1 << mr->mtt.order) * - MLX4_MTT_ENTRY_PER_SEG); + dev->caps.mtts_per_seg); } else { mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_SW_OWNS); } @@ -391,7 +391,7 @@ static int mlx4_write_mtt_chunk(struct mlx4_dev *dev, struct mlx4_mtt *mtt, (start_index + npages - 1) / (PAGE_SIZE / sizeof (u64))) return -EINVAL; - if (start_index & (MLX4_MTT_ENTRY_PER_SEG - 1)) + if (start_index & (dev->caps.mtts_per_seg - 1)) return -EINVAL; mtts = mlx4_table_find(&priv->mr_table.mtt_table, mtt->first_seg + diff --git a/drivers/net/mlx4/profile.c b/drivers/net/mlx4/profile.c index cebdf3243ca..bd22df95adf 100644 --- a/drivers/net/mlx4/profile.c +++ b/drivers/net/mlx4/profile.c @@ -98,7 +98,7 @@ u64 mlx4_make_profile(struct mlx4_dev *dev, profile[MLX4_RES_EQ].size = dev_cap->eqc_entry_sz; profile[MLX4_RES_DMPT].size = dev_cap->dmpt_entry_sz; profile[MLX4_RES_CMPT].size = dev_cap->cmpt_entry_sz; - profile[MLX4_RES_MTT].size = MLX4_MTT_ENTRY_PER_SEG * dev_cap->mtt_entry_sz; + profile[MLX4_RES_MTT].size = dev->caps.mtts_per_seg * dev_cap->mtt_entry_sz; profile[MLX4_RES_MCG].size = MLX4_MGM_ENTRY_SIZE; profile[MLX4_RES_QP].num = request->num_qp; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 3aff8a6a389..ce7cc6c7bcb 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -210,6 +210,7 @@ struct mlx4_caps { int num_comp_vectors; int num_mpts; int num_mtt_segs; + int mtts_per_seg; int fmr_reserved_mtts; int reserved_mtts; int reserved_mrws; -- cgit v1.2.3-70-g09d2 From 1ce8e7b57b3a4527ef83da1c5c7bd8a6b9d87b56 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 May 2009 04:42:37 +0000 Subject: net: ALIGN/PTR_ALIGN cleanup in alloc_netdev_mq()/netdev_priv() Use ALIGN() and PTR_ALIGN() macros instead of handcoding them. Get rid of NETDEV_ALIGN_CONST ugly define Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 +---- net/core/dev.c | 9 ++++----- net/mac80211/main.c | 8 ++------ 3 files changed, 7 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 371ece521e5..14efce33c00 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -905,7 +905,6 @@ struct net_device #define to_net_dev(d) container_of(d, struct net_device, dev) #define NETDEV_ALIGN 32 -#define NETDEV_ALIGN_CONST (NETDEV_ALIGN - 1) static inline struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev, @@ -976,9 +975,7 @@ static inline bool netdev_uses_trailer_tags(struct net_device *dev) */ static inline void *netdev_priv(const struct net_device *dev) { - return (char *)dev + ((sizeof(struct net_device) - + NETDEV_ALIGN_CONST) - & ~NETDEV_ALIGN_CONST); + return (char *)dev + ALIGN(sizeof(struct net_device), NETDEV_ALIGN); } /* Set the sysfs physical device reference for the network logical device diff --git a/net/core/dev.c b/net/core/dev.c index ed4550fd9ec..32ceee17896 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4988,18 +4988,18 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, struct netdev_queue *tx; struct net_device *dev; size_t alloc_size; - void *p; + struct net_device *p; BUG_ON(strlen(name) >= sizeof(dev->name)); alloc_size = sizeof(struct net_device); if (sizeof_priv) { /* ensure 32-byte alignment of private area */ - alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; + alloc_size = ALIGN(alloc_size, NETDEV_ALIGN); alloc_size += sizeof_priv; } /* ensure 32-byte alignment of whole construct */ - alloc_size += NETDEV_ALIGN_CONST; + alloc_size += NETDEV_ALIGN - 1; p = kzalloc(alloc_size, GFP_KERNEL); if (!p) { @@ -5014,8 +5014,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, goto free_p; } - dev = (struct net_device *) - (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); + dev = PTR_ALIGN(p, NETDEV_ALIGN); dev->padded = (char *)dev - (char *)p; if (dev_addr_init(dev)) diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 6b7e92eaab4..e37770ced53 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -735,9 +735,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, * +-------------------------+ * */ - priv_size = ((sizeof(struct ieee80211_local) + - NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST) + - priv_data_len; + priv_size = ALIGN(sizeof(*local), NETDEV_ALIGN) + priv_data_len; wiphy = wiphy_new(&mac80211_config_ops, priv_size); @@ -754,9 +752,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, local->hw.wiphy = wiphy; - local->hw.priv = (char *)local + - ((sizeof(struct ieee80211_local) + - NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); + local->hw.priv = (char *)local + ALIGN(sizeof(*local), NETDEV_ALIGN); BUG_ON(!ops->tx); BUG_ON(!ops->start); -- cgit v1.2.3-70-g09d2 From 385a154cac8763284f65cdfa25f6796c9eb1ca21 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 27 May 2009 15:48:07 -0700 Subject: net: correct a comment for the final #endif Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 14efce33c00..8e03b06e638 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1975,4 +1975,4 @@ static inline u32 dev_ethtool_get_flags(struct net_device *dev) } #endif /* __KERNEL__ */ -#endif /* _LINUX_DEV_H */ +#endif /* _LINUX_NETDEVICE_H */ -- cgit v1.2.3-70-g09d2 From f2aebaee653a35b01c3665de2cbb1e31456b8ea8 Mon Sep 17 00:00:00 2001 From: Zhaolei Date: Wed, 27 May 2009 21:36:02 +0800 Subject: ftrace: don't convert function's local variable name in macro "call" is an argument of macro, but it is also used as a local variable name of function in macro. We should keep this local variable name distinct from any CPP macro parameter name if both are in the same macro scope, although it hasn't caused any problem yet. [ Impact: robustify macro ] Signed-off-by: Zhao Lei Acked-by: Steven Rostedt Signed-off-by: Frederic Weisbecker --- include/trace/ftrace.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 87fc227c6fb..b4ec83ae711 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -397,19 +397,19 @@ static void ftrace_profile_##call(proto) \ perf_tpcounter_event(event_##call.id); \ } \ \ -static int ftrace_profile_enable_##call(struct ftrace_event_call *call) \ +static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \ { \ int ret = 0; \ \ - if (!atomic_inc_return(&call->profile_count)) \ + if (!atomic_inc_return(&event_call->profile_count)) \ ret = register_trace_##call(ftrace_profile_##call); \ \ return ret; \ } \ \ -static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \ +static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ { \ - if (atomic_add_negative(-1, &call->profile_count)) \ + if (atomic_add_negative(-1, &event_call->profile_count)) \ unregister_trace_##call(ftrace_profile_##call); \ } @@ -433,9 +433,9 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \ #define __array(type, item, len) #undef __string -#define __string(item, src) \ - __str_offsets.item = __str_size + \ - offsetof(typeof(*entry), __str_data); \ +#define __string(item, src) \ + __str_offsets.item = __str_size + \ + offsetof(typeof(*entry), __str_data); \ __str_size += strlen(src) + 1; #undef __assign_str @@ -451,8 +451,8 @@ static struct ftrace_event_call event_##call; \ \ static void ftrace_raw_event_##call(proto) \ { \ - struct ftrace_str_offsets_##call __maybe_unused __str_offsets; \ - struct ftrace_event_call *call = &event_##call; \ + struct ftrace_str_offsets_##call __maybe_unused __str_offsets; \ + struct ftrace_event_call *event_call = &event_##call; \ struct ring_buffer_event *event; \ struct ftrace_raw_##call *entry; \ unsigned long irq_flags; \ @@ -473,7 +473,7 @@ static void ftrace_raw_event_##call(proto) \ \ assign; \ \ - if (!filter_current_check_discard(call, entry, event)) \ + if (!filter_current_check_discard(event_call, entry, event)) \ trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \ } \ \ -- cgit v1.2.3-70-g09d2 From b9417f84e17b93a6976a8a88b38bf9567975cb38 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 27 Apr 2009 16:33:36 -0600 Subject: ACPI: use LNXCPU, not ACPI_CPU, for Linux-specific processor _HID ACPI_PROCESSOR_OBJECT_HID is a synthetic _HID that Linux generates for "Processor" definitions. Unlike "Device" definitions, "Processor" definitions do not have a _HID in the namespace, so we generate a fake _HID. By convention, all these fake _HIDs begin with "LNX". This does change the user-visible _HID for "Processor" objects -- previously, we used "ACPI_CPU" and this changes that to "LNXCPU", which starts with "LNX" as do all the other made-up _HIDs. This change is visible in processor filenames and "hid" files under /sys/devices/LNXSYSTM:00/. Signed-off-by: Bjorn Helgaas Signed-off-by: Len Brown --- include/acpi/acpi_drivers.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h index 0352c8f0b05..c9922b36200 100644 --- a/include/acpi/acpi_drivers.h +++ b/include/acpi/acpi_drivers.h @@ -57,7 +57,7 @@ */ #define ACPI_POWER_HID "LNXPOWER" -#define ACPI_PROCESSOR_OBJECT_HID "ACPI_CPU" +#define ACPI_PROCESSOR_OBJECT_HID "LNXCPU" #define ACPI_PROCESSOR_HID "ACPI0007" #define ACPI_SYSTEM_HID "LNXSYSTM" #define ACPI_THERMAL_HID "LNXTHERM" -- cgit v1.2.3-70-g09d2 From 9eccbc2f67efd0d19c47f40182abf2965c287add Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 27 Apr 2009 16:33:46 -0600 Subject: ACPI: processor: move device _HID into driver The ACPI0007 _HID used for processor "Device" objects in the namespace is not needed outside the processor driver, so move it there. Also, the #define is only used once, so just remove it and hard-code "ACPI0007". Signed-off-by: Bjorn Helgaas Signed-off-by: Len Brown --- drivers/acpi/processor_core.c | 2 +- include/acpi/acpi_drivers.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index cf627d64cde..cabff4cb21f 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -89,7 +89,7 @@ static int acpi_processor_handle_eject(struct acpi_processor *pr); static const struct acpi_device_id processor_device_ids[] = { {ACPI_PROCESSOR_OBJECT_HID, 0}, - {ACPI_PROCESSOR_HID, 0}, + {"ACPI0007", 0}, {"", 0}, }; MODULE_DEVICE_TABLE(acpi, processor_device_ids); diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h index c9922b36200..5e8ed3a78cd 100644 --- a/include/acpi/acpi_drivers.h +++ b/include/acpi/acpi_drivers.h @@ -58,7 +58,6 @@ #define ACPI_POWER_HID "LNXPOWER" #define ACPI_PROCESSOR_OBJECT_HID "LNXCPU" -#define ACPI_PROCESSOR_HID "ACPI0007" #define ACPI_SYSTEM_HID "LNXSYSTM" #define ACPI_THERMAL_HID "LNXTHERM" #define ACPI_BUTTON_HID_POWERF "LNXPWRBN" -- cgit v1.2.3-70-g09d2 From d3e78ee3d015dac1794433abb6403b6fc8e70e10 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 28 May 2009 11:41:50 +0200 Subject: perf_counter: Fix perf_counter_init_task() on !CONFIG_PERF_COUNTERS Pointed out by compiler warnings: tip/include/linux/perf_counter.h:644: warning: no return statement in function returning non-void Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: John Kacur LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 2b16ed37b74..a65ddc58051 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -641,7 +641,7 @@ perf_counter_task_sched_out(struct task_struct *task, struct task_struct *next, int cpu) { } static inline void perf_counter_task_tick(struct task_struct *task, int cpu) { } -static inline int perf_counter_init_task(struct task_struct *child) { } +static inline int perf_counter_init_task(struct task_struct *child) { return 0; } static inline void perf_counter_exit_task(struct task_struct *child) { } static inline void perf_counter_do_pending(void) { } static inline void perf_counter_print_debug(void) { } -- cgit v1.2.3-70-g09d2 From c93f7669098eb97c5376e5396e3dfb734c17df4f Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 28 May 2009 22:18:17 +1000 Subject: perf_counter: Fix race in attaching counters to tasks and exiting Commit 564c2b21 ("perf_counter: Optimize context switch between identical inherited contexts") introduced a race where it is possible that a counter being attached to a task could get attached to the wrong task, if the task is one that has inherited its context from another task via fork. This happens because the optimized context switch could switch the context to another task after find_get_context has read task->perf_counter_ctxp. In fact, it's possible that the context could then get freed, if the other task then exits. This fixes the problem by protecting both the context switch and the critical code in find_get_context with spinlocks. The context switch locks the cxt->lock of both the outgoing and incoming contexts before swapping them. That means that once code such as find_get_context has obtained the spinlock for the context associated with a task, the context can't get swapped to another task. However, the context may have been swapped in the interval between reading task->perf_counter_ctxp and getting the lock, so it is necessary to check and retry. To make sure that none of the contexts being looked at in find_get_context can get freed, this changes the context freeing code to use RCU. Thus an rcu_read_lock() is sufficient to ensure that no contexts can get freed. This part of the patch is lifted from a patch posted by Peter Zijlstra. This also adds a check to make sure that we can't add a counter to a task that is exiting. There is also a race between perf_counter_exit_task and find_get_context; this solves the race by moving the get_ctx that was in perf_counter_alloc into the locked region in find_get_context, so that once find_get_context has got the context for a task, it won't get freed even if the task calls perf_counter_exit_task. It doesn't matter if new top-level (non-inherited) counters get attached to the context after perf_counter_exit_task has detached the context from the task. They will just stay there and never get scheduled in until the counters' fds get closed, and then perf_release will remove them from the context and eventually free the context. With this, we are now doing the unclone in find_get_context rather than when a counter was added to or removed from a context (actually, we were missing the unclone_ctx() call when adding a counter to a context). We don't need to unclone when removing a counter from a context because we have no way to remove a counter from a cloned context. This also takes out the smp_wmb() in find_get_context, which Peter Zijlstra pointed out was unnecessary because the cmpxchg implies a full barrier anyway. Signed-off-by: Paul Mackerras Acked-by: Peter Zijlstra Cc: Corey Ashford Cc: Mike Galbraith Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo Cc: John Kacur LKML-Reference: <18974.33033.667187.273886@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 5 +- kernel/perf_counter.c | 205 ++++++++++++++++++++++++++++++------------- 2 files changed, 148 insertions(+), 62 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index a65ddc58051..717bf3b59ba 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -541,8 +541,9 @@ struct perf_counter_context { * been cloned (inherited) from a common ancestor. */ struct perf_counter_context *parent_ctx; - u32 parent_gen; - u32 generation; + u64 parent_gen; + u64 generation; + struct rcu_head rcu_head; }; /** diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 367299f91aa..52e5a15321d 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -103,12 +103,22 @@ static void get_ctx(struct perf_counter_context *ctx) atomic_inc(&ctx->refcount); } +static void free_ctx(struct rcu_head *head) +{ + struct perf_counter_context *ctx; + + ctx = container_of(head, struct perf_counter_context, rcu_head); + kfree(ctx); +} + static void put_ctx(struct perf_counter_context *ctx) { if (atomic_dec_and_test(&ctx->refcount)) { if (ctx->parent_ctx) put_ctx(ctx->parent_ctx); - kfree(ctx); + if (ctx->task) + put_task_struct(ctx->task); + call_rcu(&ctx->rcu_head, free_ctx); } } @@ -211,22 +221,6 @@ group_sched_out(struct perf_counter *group_counter, cpuctx->exclusive = 0; } -/* - * Mark this context as not being a clone of another. - * Called when counters are added to or removed from this context. - * We also increment our generation number so that anything that - * was cloned from this context before this will not match anything - * cloned from this context after this. - */ -static void unclone_ctx(struct perf_counter_context *ctx) -{ - ++ctx->generation; - if (!ctx->parent_ctx) - return; - put_ctx(ctx->parent_ctx); - ctx->parent_ctx = NULL; -} - /* * Cross CPU call to remove a performance counter * @@ -281,13 +275,19 @@ static void __perf_counter_remove_from_context(void *info) * * CPU counters are removed with a smp call. For task counters we only * call when the task is on a CPU. + * + * If counter->ctx is a cloned context, callers must make sure that + * every task struct that counter->ctx->task could possibly point to + * remains valid. This is OK when called from perf_release since + * that only calls us on the top-level context, which can't be a clone. + * When called from perf_counter_exit_task, it's OK because the + * context has been detached from its task. */ static void perf_counter_remove_from_context(struct perf_counter *counter) { struct perf_counter_context *ctx = counter->ctx; struct task_struct *task = ctx->task; - unclone_ctx(ctx); if (!task) { /* * Per cpu counters are removed via an smp call and @@ -410,6 +410,16 @@ static void __perf_counter_disable(void *info) /* * Disable a counter. + * + * If counter->ctx is a cloned context, callers must make sure that + * every task struct that counter->ctx->task could possibly point to + * remains valid. This condition is satisifed when called through + * perf_counter_for_each_child or perf_counter_for_each because they + * hold the top-level counter's child_mutex, so any descendant that + * goes to exit will block in sync_child_counter. + * When called from perf_pending_counter it's OK because counter->ctx + * is the current context on this CPU and preemption is disabled, + * hence we can't get into perf_counter_task_sched_out for this context. */ static void perf_counter_disable(struct perf_counter *counter) { @@ -794,6 +804,12 @@ static void __perf_counter_enable(void *info) /* * Enable a counter. + * + * If counter->ctx is a cloned context, callers must make sure that + * every task struct that counter->ctx->task could possibly point to + * remains valid. This condition is satisfied when called through + * perf_counter_for_each_child or perf_counter_for_each as described + * for perf_counter_disable. */ static void perf_counter_enable(struct perf_counter *counter) { @@ -923,7 +939,9 @@ void perf_counter_task_sched_out(struct task_struct *task, struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); struct perf_counter_context *ctx = task->perf_counter_ctxp; struct perf_counter_context *next_ctx; + struct perf_counter_context *parent; struct pt_regs *regs; + int do_switch = 1; regs = task_pt_regs(task); perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs, 0); @@ -932,18 +950,39 @@ void perf_counter_task_sched_out(struct task_struct *task, return; update_context_time(ctx); + + rcu_read_lock(); + parent = rcu_dereference(ctx->parent_ctx); next_ctx = next->perf_counter_ctxp; - if (next_ctx && context_equiv(ctx, next_ctx)) { - task->perf_counter_ctxp = next_ctx; - next->perf_counter_ctxp = ctx; - ctx->task = next; - next_ctx->task = task; - return; + if (parent && next_ctx && + rcu_dereference(next_ctx->parent_ctx) == parent) { + /* + * Looks like the two contexts are clones, so we might be + * able to optimize the context switch. We lock both + * contexts and check that they are clones under the + * lock (including re-checking that neither has been + * uncloned in the meantime). It doesn't matter which + * order we take the locks because no other cpu could + * be trying to lock both of these tasks. + */ + spin_lock(&ctx->lock); + spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING); + if (context_equiv(ctx, next_ctx)) { + task->perf_counter_ctxp = next_ctx; + next->perf_counter_ctxp = ctx; + ctx->task = next; + next_ctx->task = task; + do_switch = 0; + } + spin_unlock(&next_ctx->lock); + spin_unlock(&ctx->lock); } + rcu_read_unlock(); - __perf_counter_sched_out(ctx, cpuctx); - - cpuctx->task_ctx = NULL; + if (do_switch) { + __perf_counter_sched_out(ctx, cpuctx); + cpuctx->task_ctx = NULL; + } } static void __perf_counter_task_sched_out(struct perf_counter_context *ctx) @@ -1215,18 +1254,13 @@ __perf_counter_init_context(struct perf_counter_context *ctx, ctx->task = task; } -static void put_context(struct perf_counter_context *ctx) -{ - if (ctx->task) - put_task_struct(ctx->task); -} - static struct perf_counter_context *find_get_context(pid_t pid, int cpu) { struct perf_cpu_context *cpuctx; struct perf_counter_context *ctx; - struct perf_counter_context *tctx; + struct perf_counter_context *parent_ctx; struct task_struct *task; + int err; /* * If cpu is not a wildcard then this is a percpu counter: @@ -1249,6 +1283,7 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu) cpuctx = &per_cpu(perf_cpu_context, cpu); ctx = &cpuctx->ctx; + get_ctx(ctx); return ctx; } @@ -1265,37 +1300,79 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu) if (!task) return ERR_PTR(-ESRCH); + /* + * Can't attach counters to a dying task. + */ + err = -ESRCH; + if (task->flags & PF_EXITING) + goto errout; + /* Reuse ptrace permission checks for now. */ - if (!ptrace_may_access(task, PTRACE_MODE_READ)) { - put_task_struct(task); - return ERR_PTR(-EACCES); + err = -EACCES; + if (!ptrace_may_access(task, PTRACE_MODE_READ)) + goto errout; + + retry_lock: + rcu_read_lock(); + retry: + ctx = rcu_dereference(task->perf_counter_ctxp); + if (ctx) { + /* + * If this context is a clone of another, it might + * get swapped for another underneath us by + * perf_counter_task_sched_out, though the + * rcu_read_lock() protects us from any context + * getting freed. Lock the context and check if it + * got swapped before we could get the lock, and retry + * if so. If we locked the right context, then it + * can't get swapped on us any more and we can + * unclone it if necessary. + * Once it's not a clone things will be stable. + */ + spin_lock_irq(&ctx->lock); + if (ctx != rcu_dereference(task->perf_counter_ctxp)) { + spin_unlock_irq(&ctx->lock); + goto retry; + } + parent_ctx = ctx->parent_ctx; + if (parent_ctx) { + put_ctx(parent_ctx); + ctx->parent_ctx = NULL; /* no longer a clone */ + } + ++ctx->generation; + /* + * Get an extra reference before dropping the lock so that + * this context won't get freed if the task exits. + */ + get_ctx(ctx); + spin_unlock_irq(&ctx->lock); } + rcu_read_unlock(); - ctx = task->perf_counter_ctxp; if (!ctx) { ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL); - if (!ctx) { - put_task_struct(task); - return ERR_PTR(-ENOMEM); - } + err = -ENOMEM; + if (!ctx) + goto errout; __perf_counter_init_context(ctx, task); - /* - * Make sure other cpus see correct values for *ctx - * once task->perf_counter_ctxp is visible to them. - */ - smp_wmb(); - tctx = cmpxchg(&task->perf_counter_ctxp, NULL, ctx); - if (tctx) { + get_ctx(ctx); + if (cmpxchg(&task->perf_counter_ctxp, NULL, ctx)) { /* * We raced with some other task; use * the context they set. */ kfree(ctx); - ctx = tctx; + goto retry_lock; } + get_task_struct(task); } + put_task_struct(task); return ctx; + + errout: + put_task_struct(task); + return ERR_PTR(err); } static void free_counter_rcu(struct rcu_head *head) @@ -1303,7 +1380,6 @@ static void free_counter_rcu(struct rcu_head *head) struct perf_counter *counter; counter = container_of(head, struct perf_counter, rcu_head); - put_ctx(counter->ctx); kfree(counter); } @@ -1324,6 +1400,7 @@ static void free_counter(struct perf_counter *counter) if (counter->destroy) counter->destroy(counter); + put_ctx(counter->ctx); call_rcu(&counter->rcu_head, free_counter_rcu); } @@ -1347,7 +1424,6 @@ static int perf_release(struct inode *inode, struct file *file) put_task_struct(counter->owner); free_counter(counter); - put_context(ctx); return 0; } @@ -1437,6 +1513,12 @@ static void perf_counter_for_each_sibling(struct perf_counter *counter, mutex_unlock(&ctx->mutex); } +/* + * Holding the top-level counter's child_mutex means that any + * descendant process that has inherited this counter will block + * in sync_child_counter if it goes to exit, thus satisfying the + * task existence requirements of perf_counter_enable/disable. + */ static void perf_counter_for_each_child(struct perf_counter *counter, void (*func)(struct perf_counter *)) { @@ -3124,8 +3206,6 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, counter->ctx = ctx; counter->oncpu = -1; - get_ctx(ctx); - counter->state = PERF_COUNTER_STATE_INACTIVE; if (hw_event->disabled) counter->state = PERF_COUNTER_STATE_OFF; @@ -3290,7 +3370,7 @@ err_free_put_context: kfree(counter); err_put_context: - put_context(ctx); + put_ctx(ctx); goto out_fput; } @@ -3322,6 +3402,7 @@ inherit_counter(struct perf_counter *parent_counter, group_leader, GFP_KERNEL); if (IS_ERR(child_counter)) return child_counter; + get_ctx(child_ctx); /* * Make the child state follow the state of the parent counter, @@ -3439,11 +3520,6 @@ __perf_counter_exit_task(struct task_struct *child, /* * When a child task exits, feed back counter values to parent counters. - * - * Note: we may be running in child context, but the PID is not hashed - * anymore so new counters will not be added. - * (XXX not sure that is true when we get called from flush_old_exec. - * -- paulus) */ void perf_counter_exit_task(struct task_struct *child) { @@ -3458,7 +3534,15 @@ void perf_counter_exit_task(struct task_struct *child) local_irq_save(flags); __perf_counter_task_sched_out(child_ctx); + + /* + * Take the context lock here so that if find_get_context is + * reading child->perf_counter_ctxp, we wait until it has + * incremented the context's refcount before we do put_ctx below. + */ + spin_lock(&child_ctx->lock); child->perf_counter_ctxp = NULL; + spin_unlock(&child_ctx->lock); local_irq_restore(flags); mutex_lock(&child_ctx->mutex); @@ -3513,6 +3597,7 @@ int perf_counter_init_task(struct task_struct *child) __perf_counter_init_context(child_ctx, child); child->perf_counter_ctxp = child_ctx; + get_task_struct(child); /* * Lock the parent list. No need to lock the child - not PID -- cgit v1.2.3-70-g09d2 From 7f0333eb2f98bbfece4fbfe21076d0a3e49f0bb0 Mon Sep 17 00:00:00 2001 From: Paulius Zaleckas Date: Wed, 13 May 2009 06:20:29 -0700 Subject: wimax: Add netlink interface to get device state wimax connection manager / daemon has to know what is current state of the device. Previously it was only possible to get notification whet state has changed. Note: By mistake, the new generic netlink's number for WIMAX_GNL_OP_STATE_GET was declared inserting into the existing list of API calls, not appending; thus, it'd break existing API. Fixed by Inaky Perez-Gonzalez by moving to the tail, where we add to the interface, not modify the interface. Thanks to Stephen Hemminger for catching this. Signed-off-by: Paulius Zaleckas --- include/linux/wimax.h | 5 +++ net/wimax/Makefile | 1 + net/wimax/debug-levels.h | 1 + net/wimax/debugfs.c | 1 + net/wimax/op-state-get.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++ net/wimax/stack.c | 5 ++- 6 files changed, 98 insertions(+), 1 deletion(-) create mode 100644 net/wimax/op-state-get.c (limited to 'include') diff --git a/include/linux/wimax.h b/include/linux/wimax.h index c89de7f4e5b..2f7a6b77d6b 100644 --- a/include/linux/wimax.h +++ b/include/linux/wimax.h @@ -78,6 +78,7 @@ enum { WIMAX_GNL_OP_RFKILL, /* Run wimax_rfkill() */ WIMAX_GNL_OP_RESET, /* Run wimax_rfkill() */ WIMAX_GNL_RE_STATE_CHANGE, /* Report: status change */ + WIMAX_GNL_OP_STATE_GET, /* Request for current state */ }; @@ -113,6 +114,10 @@ enum { WIMAX_GNL_RESET_IFIDX = 1, }; +/* Atributes for wimax_state_get() */ +enum { + WIMAX_GNL_STGET_IFIDX = 1, +}; /* * Attributes for the Report State Change diff --git a/net/wimax/Makefile b/net/wimax/Makefile index 5b80b941c2c..8f1510d0cc2 100644 --- a/net/wimax/Makefile +++ b/net/wimax/Makefile @@ -6,6 +6,7 @@ wimax-y := \ op-msg.o \ op-reset.o \ op-rfkill.o \ + op-state-get.o \ stack.o wimax-$(CONFIG_DEBUG_FS) += debugfs.o diff --git a/net/wimax/debug-levels.h b/net/wimax/debug-levels.h index 1c29123a3aa..0975adba6b7 100644 --- a/net/wimax/debug-levels.h +++ b/net/wimax/debug-levels.h @@ -36,6 +36,7 @@ enum d_module { D_SUBMODULE_DECLARE(op_msg), D_SUBMODULE_DECLARE(op_reset), D_SUBMODULE_DECLARE(op_rfkill), + D_SUBMODULE_DECLARE(op_state_get), D_SUBMODULE_DECLARE(stack), }; diff --git a/net/wimax/debugfs.c b/net/wimax/debugfs.c index 94d216a4640..6c9bedb7431 100644 --- a/net/wimax/debugfs.c +++ b/net/wimax/debugfs.c @@ -61,6 +61,7 @@ int wimax_debugfs_add(struct wimax_dev *wimax_dev) __debugfs_register("wimax_dl_", op_msg, dentry); __debugfs_register("wimax_dl_", op_reset, dentry); __debugfs_register("wimax_dl_", op_rfkill, dentry); + __debugfs_register("wimax_dl_", op_state_get, dentry); __debugfs_register("wimax_dl_", stack, dentry); result = 0; out: diff --git a/net/wimax/op-state-get.c b/net/wimax/op-state-get.c new file mode 100644 index 00000000000..a76b8fcb056 --- /dev/null +++ b/net/wimax/op-state-get.c @@ -0,0 +1,86 @@ +/* + * Linux WiMAX + * Implement and export a method for getting a WiMAX device current state + * + * Copyright (C) 2009 Paulius Zaleckas + * + * Based on previous WiMAX core work by: + * Copyright (C) 2008 Intel Corporation + * Inaky Perez-Gonzalez + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#include +#include +#include +#include +#include "wimax-internal.h" + +#define D_SUBMODULE op_state_get +#include "debug-levels.h" + + +static const +struct nla_policy wimax_gnl_state_get_policy[WIMAX_GNL_ATTR_MAX + 1] = { + [WIMAX_GNL_STGET_IFIDX] = { + .type = NLA_U32, + }, +}; + + +/* + * Exporting to user space over generic netlink + * + * Parse the state get command from user space, return a combination + * value that describe the current state. + * + * No attributes. + */ +static +int wimax_gnl_doit_state_get(struct sk_buff *skb, struct genl_info *info) +{ + int result, ifindex; + struct wimax_dev *wimax_dev; + struct device *dev; + + d_fnstart(3, NULL, "(skb %p info %p)\n", skb, info); + result = -ENODEV; + if (info->attrs[WIMAX_GNL_STGET_IFIDX] == NULL) { + printk(KERN_ERR "WIMAX_GNL_OP_STATE_GET: can't find IFIDX " + "attribute\n"); + goto error_no_wimax_dev; + } + ifindex = nla_get_u32(info->attrs[WIMAX_GNL_STGET_IFIDX]); + wimax_dev = wimax_dev_get_by_genl_info(info, ifindex); + if (wimax_dev == NULL) + goto error_no_wimax_dev; + dev = wimax_dev_to_dev(wimax_dev); + /* Execute the operation and send the result back to user space */ + result = wimax_state_get(wimax_dev); + dev_put(wimax_dev->net_dev); +error_no_wimax_dev: + d_fnend(3, NULL, "(skb %p info %p) = %d\n", skb, info, result); + return result; +} + + +struct genl_ops wimax_gnl_state_get = { + .cmd = WIMAX_GNL_OP_STATE_GET, + .flags = GENL_ADMIN_PERM, + .policy = wimax_gnl_state_get_policy, + .doit = wimax_gnl_doit_state_get, + .dumpit = NULL, +}; diff --git a/net/wimax/stack.c b/net/wimax/stack.c index 933e1422b09..79fb7d7c640 100644 --- a/net/wimax/stack.c +++ b/net/wimax/stack.c @@ -402,13 +402,15 @@ EXPORT_SYMBOL_GPL(wimax_dev_init); extern struct genl_ops wimax_gnl_msg_from_user, wimax_gnl_reset, - wimax_gnl_rfkill; + wimax_gnl_rfkill, + wimax_gnl_state_get; static struct genl_ops *wimax_gnl_ops[] = { &wimax_gnl_msg_from_user, &wimax_gnl_reset, &wimax_gnl_rfkill, + &wimax_gnl_state_get, }; @@ -533,6 +535,7 @@ struct d_level D_LEVEL[] = { D_SUBMODULE_DEFINE(op_msg), D_SUBMODULE_DEFINE(op_reset), D_SUBMODULE_DEFINE(op_rfkill), + D_SUBMODULE_DEFINE(op_state_get), D_SUBMODULE_DEFINE(stack), }; size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL); -- cgit v1.2.3-70-g09d2 From 7481806dcfd07e9a636155554f6f4b4fbd976381 Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Fri, 22 May 2009 00:41:11 -0700 Subject: wimax: a new API call was added, increment minor protocol version number As the 'state_get' API call was added, we need to increase the minor protocol version number so applications that depend on the can check it's presence. Signed-off-by: Inaky Perez-Gonzalez --- include/linux/wimax.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/wimax.h b/include/linux/wimax.h index 2f7a6b77d6b..4fdcc563551 100644 --- a/include/linux/wimax.h +++ b/include/linux/wimax.h @@ -59,7 +59,7 @@ enum { * M - Major: change if removing or modifying an existing call. * m - minor: change when adding a new call */ - WIMAX_GNL_VERSION = 00, + WIMAX_GNL_VERSION = 01, /* Generic NetLink attributes */ WIMAX_GNL_ATTR_INVALID = 0x00, WIMAX_GNL_ATTR_MAX = 10, -- cgit v1.2.3-70-g09d2 From 5d4e039b2cb1ca4de9774344ea7b61ad7fa1b0a1 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 28 May 2009 01:05:00 +0000 Subject: bonding: allow bond in mode balance-alb to work properly in bridge -try4.3 [PATCH net-next] bonding: allow bond in mode balance-alb to work properly in bridge -try4.3 (updated) changes v4.2 -> v4.3 - memcpy the address always, not just in case it differs from master->dev_addr - compare_ether_addr_64bits() is not used so there is no direct need to make new header file (I think it would be good to have bond stuff in separate file anyway). changes v4.1 -> v4.2 - use skb->pkt_type == PACKET_HOST compare rather then comparing skb dest addr against skb->dev->dev_addr The problem is described in following bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=487763 Basically here's what's going on. In every mode, bonding interface uses the same mac address for all enslaved devices (except fail_over_mac). Only balance-alb will simultaneously use multiple MAC addresses across different slaves. When you put this kind of bond device into a bridge it will only add one of mac adresses into a hash list of mac addresses, say X. This mac address is marked as local. But this bonding interface also has mac address Y. Now then packet arrives with destination address Y, this address is not marked as local and the packed looks like it needs to be forwarded. This packet is then lost which is wrong. Notice that interfaces can be added and removed from bond while it is in bridge. *** When the multiple addresses for bridge port approach failed to solve this issue due to STP I started to think other way to solve this. I returned to previous solution but tweaked one. This patch solves the situation in the bonding without touching bridge code. For every incoming frame to bonding the destination address is compared to current address of the slave device from which tha packet came. If these two match destination address is replaced by mac address of the master. This address is known by bridge so it is delivered properly. Note that the comparsion is not made directly, it's used skb->pkt_type == PACKET_HOST instead. This is "set" previously in eth_type_trans(). I experimentally tried that this works as good as searching through the slave list (v4 of this patch). Jirka Signed-off-by: Jiri Pirko Signed-off-by: Eric Dumazet Signed-off-by: Andy Gospodarek Signed-off-by: David S. Miller --- include/linux/netdevice.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8e03b06e638..1eaf5ae14fe 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1917,6 +1917,16 @@ static inline void netif_set_gso_max_size(struct net_device *dev, dev->gso_max_size = size; } +static inline void skb_bond_set_mac_by_master(struct sk_buff *skb, + struct net_device *master) +{ + if (skb->pkt_type == PACKET_HOST) { + u16 *dest = (u16 *) eth_hdr(skb)->h_dest; + + memcpy(dest, master->dev_addr, ETH_ALEN); + } +} + /* On bonding slaves other than the currently active slave, suppress * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and * ARP on active-backup slaves with arp_validate enabled. @@ -1930,6 +1940,14 @@ static inline int skb_bond_should_drop(struct sk_buff *skb) if (master->priv_flags & IFF_MASTER_ARPMON) dev->last_rx = jiffies; + if ((master->priv_flags & IFF_MASTER_ALB) && master->br_port) { + /* Do address unmangle. The local destination address + * will be always the one master has. Provides the right + * functionality in a bridge. + */ + skb_bond_set_mac_by_master(skb, master); + } + if (dev->priv_flags & IFF_SLAVE_INACTIVE) { if ((dev->priv_flags & IFF_SLAVE_NEEDARP) && skb->protocol == __cpu_to_be16(ETH_P_ARP)) -- cgit v1.2.3-70-g09d2 From 8bea869c5e56234990e6bad92a543437115bfc18 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Mon, 27 Apr 2009 09:44:40 +0200 Subject: ALSA: PCM midlevel: improve fifo_size handling Move the fifo_size assignment to hw->ioctl callback to allow lowlevel drivers overwrite the default behaviour. fifo_size is in frames not bytes as specified in asound.h and alsa-lib's documentation, but most hardware have fixed byte based FIFOs. Introduce internal SNDRV_PCM_INFO_FIFO_IN_FRAMES. Signed-off-by: Jaroslav Kysela Signed-off-by: Takashi Iwai --- include/sound/asound.h | 1 + include/sound/pcm.h | 1 + sound/core/pcm_lib.c | 19 +++++++++++++++++++ sound/core/pcm_native.c | 15 ++++++++++++--- 4 files changed, 33 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/sound/asound.h b/include/sound/asound.h index 6add80fc251..82aed3f4753 100644 --- a/include/sound/asound.h +++ b/include/sound/asound.h @@ -255,6 +255,7 @@ typedef int __bitwise snd_pcm_subformat_t; #define SNDRV_PCM_INFO_HALF_DUPLEX 0x00100000 /* only half duplex */ #define SNDRV_PCM_INFO_JOINT_DUPLEX 0x00200000 /* playback and capture stream are somewhat correlated */ #define SNDRV_PCM_INFO_SYNC_START 0x00400000 /* pcm support some kind of sync go */ +#define SNDRV_PCM_INFO_FIFO_IN_FRAMES 0x80000000 /* internal kernel flag - FIFO size is in frames */ typedef int __bitwise snd_pcm_state_t; #define SNDRV_PCM_STATE_OPEN ((__force snd_pcm_state_t) 0) /* stream is open */ diff --git a/include/sound/pcm.h b/include/sound/pcm.h index 267effddb07..8a69b5c1e1c 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -98,6 +98,7 @@ struct snd_pcm_ops { #define SNDRV_PCM_IOCTL1_INFO 1 #define SNDRV_PCM_IOCTL1_CHANNEL_INFO 2 #define SNDRV_PCM_IOCTL1_GSTATE 3 +#define SNDRV_PCM_IOCTL1_FIFO_SIZE 4 #define SNDRV_PCM_TRIGGER_STOP 0 #define SNDRV_PCM_TRIGGER_START 1 diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index d659995ac3a..adc2b0bd113 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -1524,6 +1524,23 @@ static int snd_pcm_lib_ioctl_channel_info(struct snd_pcm_substream *substream, return 0; } +static int snd_pcm_lib_ioctl_fifo_size(struct snd_pcm_substream *substream, + void *arg) +{ + struct snd_pcm_hw_params *params = arg; + snd_pcm_format_t format; + int channels, width; + + params->fifo_size = substream->runtime->hw.fifo_size; + if (!(substream->runtime->hw.info & SNDRV_PCM_INFO_FIFO_IN_FRAMES)) { + format = params_format(params); + channels = params_channels(params); + width = snd_pcm_format_physical_width(format); + params->fifo_size /= width * channels; + } + return 0; +} + /** * snd_pcm_lib_ioctl - a generic PCM ioctl callback * @substream: the pcm substream instance @@ -1545,6 +1562,8 @@ int snd_pcm_lib_ioctl(struct snd_pcm_substream *substream, return snd_pcm_lib_ioctl_reset(substream, arg); case SNDRV_PCM_IOCTL1_CHANNEL_INFO: return snd_pcm_lib_ioctl_channel_info(substream, arg); + case SNDRV_PCM_IOCTL1_FIFO_SIZE: + return snd_pcm_lib_ioctl_fifo_size(substream, arg); } return -ENXIO; } diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 45dc53fcfa2..84da3ba17c8 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -312,9 +312,18 @@ int snd_pcm_hw_refine(struct snd_pcm_substream *substream, hw = &substream->runtime->hw; if (!params->info) - params->info = hw->info; - if (!params->fifo_size) - params->fifo_size = hw->fifo_size; + params->info = hw->info & ~SNDRV_PCM_INFO_FIFO_IN_FRAMES; + if (!params->fifo_size) { + if (snd_mask_min(¶ms->masks[SNDRV_PCM_HW_PARAM_FORMAT]) == + snd_mask_max(¶ms->masks[SNDRV_PCM_HW_PARAM_FORMAT]) && + snd_mask_min(¶ms->masks[SNDRV_PCM_HW_PARAM_CHANNELS]) == + snd_mask_max(¶ms->masks[SNDRV_PCM_HW_PARAM_CHANNELS])) { + changed = substream->ops->ioctl(substream, + SNDRV_PCM_IOCTL1_FIFO_SIZE, params); + if (params < 0) + return changed; + } + } params->rmask = 0; return 0; } -- cgit v1.2.3-70-g09d2 From 0dc54e9f33e2fbcea28356bc2c8c931cb307d3b3 Mon Sep 17 00:00:00 2001 From: Kevin Cernekee Date: Wed, 8 Apr 2009 22:52:28 -0700 Subject: mtd: add MEMERASE64 ioctl for >4GiB devices New MEMERASE/MEMREADOOB/MEMWRITEOOB ioctls are needed in order to support 64-bit offsets into large NAND flash devices. Signed-off-by: Kevin Cernekee Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- drivers/mtd/mtdchar.c | 29 +++++++++++++++++++++-------- fs/compat_ioctl.c | 1 + include/mtd/mtd-abi.h | 6 ++++++ 3 files changed, 28 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index 763d3f0a1f4..ad4b8618977 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -417,6 +417,7 @@ static int mtd_ioctl(struct inode *inode, struct file *file, break; case MEMERASE: + case MEMERASE64: { struct erase_info *erase; @@ -427,20 +428,32 @@ static int mtd_ioctl(struct inode *inode, struct file *file, if (!erase) ret = -ENOMEM; else { - struct erase_info_user einfo; - wait_queue_head_t waitq; DECLARE_WAITQUEUE(wait, current); init_waitqueue_head(&waitq); - if (copy_from_user(&einfo, argp, - sizeof(struct erase_info_user))) { - kfree(erase); - return -EFAULT; + if (cmd == MEMERASE64) { + struct erase_info_user64 einfo64; + + if (copy_from_user(&einfo64, argp, + sizeof(struct erase_info_user64))) { + kfree(erase); + return -EFAULT; + } + erase->addr = einfo64.start; + erase->len = einfo64.length; + } else { + struct erase_info_user einfo32; + + if (copy_from_user(&einfo32, argp, + sizeof(struct erase_info_user))) { + kfree(erase); + return -EFAULT; + } + erase->addr = einfo32.start; + erase->len = einfo32.length; } - erase->addr = einfo.start; - erase->len = einfo.length; erase->mtd = mtd; erase->callback = mtdchar_erase_callback; erase->priv = (unsigned long)&waitq; diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index b83f6bcfa51..c603ca2c223 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -2441,6 +2441,7 @@ COMPATIBLE_IOCTL(MEMGETREGIONCOUNT) COMPATIBLE_IOCTL(MEMGETREGIONINFO) COMPATIBLE_IOCTL(MEMGETBADBLOCK) COMPATIBLE_IOCTL(MEMSETBADBLOCK) +COMPATIBLE_IOCTL(MEMERASE64) /* NBD */ ULONG_IOCTL(NBD_SET_SOCK) ULONG_IOCTL(NBD_SET_BLKSIZE) diff --git a/include/mtd/mtd-abi.h b/include/mtd/mtd-abi.h index b6595b3c68b..2e32be1e3a1 100644 --- a/include/mtd/mtd-abi.h +++ b/include/mtd/mtd-abi.h @@ -12,6 +12,11 @@ struct erase_info_user { __u32 length; }; +struct erase_info_user64 { + __u64 start; + __u64 length; +}; + struct mtd_oob_buf { __u32 start; __u32 length; @@ -95,6 +100,7 @@ struct otp_info { #define ECCGETLAYOUT _IOR('M', 17, struct nand_ecclayout) #define ECCGETSTATS _IOR('M', 18, struct mtd_ecc_stats) #define MTDFILEMODE _IO('M', 19) +#define MEMERASE64 _IOW('M', 20, struct erase_info_user64) /* * Obsolete legacy interface. Keep it in order not to break userspace -- cgit v1.2.3-70-g09d2 From bbbee90829304d156c12b171c0ac7e6e1aba8b90 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 29 May 2009 14:25:58 +0200 Subject: perf_counter: Ammend cleanup in fork() fail When fork() fails we cannot use perf_counter_exit_task() since that assumes to operate on current. Write a new helper that cleans up unused/clean contexts. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo Cc: John Kacur LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 ++ kernel/fork.c | 2 +- kernel/perf_counter.c | 43 ++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 43 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 717bf3b59ba..519a41bba24 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -579,6 +579,7 @@ extern void perf_counter_task_sched_out(struct task_struct *task, extern void perf_counter_task_tick(struct task_struct *task, int cpu); extern int perf_counter_init_task(struct task_struct *child); extern void perf_counter_exit_task(struct task_struct *child); +extern void perf_counter_free_task(struct task_struct *task); extern void perf_counter_do_pending(void); extern void perf_counter_print_debug(void); extern void __perf_disable(void); @@ -644,6 +645,7 @@ static inline void perf_counter_task_tick(struct task_struct *task, int cpu) { } static inline int perf_counter_init_task(struct task_struct *child) { return 0; } static inline void perf_counter_exit_task(struct task_struct *child) { } +static inline void perf_counter_free_task(struct task_struct *task) { } static inline void perf_counter_do_pending(void) { } static inline void perf_counter_print_debug(void) { } static inline void perf_disable(void) { } diff --git a/kernel/fork.c b/kernel/fork.c index c07c3335cea..23bf757ed32 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1298,7 +1298,7 @@ bad_fork_cleanup_semundo: bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_policy: - perf_counter_exit_task(p); + perf_counter_free_task(p); #ifdef CONFIG_NUMA mpol_put(p->mempolicy); bad_fork_cleanup_cgroup: diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 0c000d305e0..79c3f26541d 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -3538,8 +3538,7 @@ static void sync_child_counter(struct perf_counter *child_counter, } static void -__perf_counter_exit_task(struct task_struct *child, - struct perf_counter *child_counter, +__perf_counter_exit_task(struct perf_counter *child_counter, struct perf_counter_context *child_ctx) { struct perf_counter *parent_counter; @@ -3605,7 +3604,7 @@ void perf_counter_exit_task(struct task_struct *child) again: list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list, list_entry) - __perf_counter_exit_task(child, child_counter, child_ctx); + __perf_counter_exit_task(child_counter, child_ctx); /* * If the last counter was a group counter, it will have appended all @@ -3620,6 +3619,44 @@ again: put_ctx(child_ctx); } +/* + * free an unexposed, unused context as created by inheritance by + * init_task below, used by fork() in case of fail. + */ +void perf_counter_free_task(struct task_struct *task) +{ + struct perf_counter_context *ctx = task->perf_counter_ctxp; + struct perf_counter *counter, *tmp; + + if (!ctx) + return; + + mutex_lock(&ctx->mutex); +again: + list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry) { + struct perf_counter *parent = counter->parent; + + if (WARN_ON_ONCE(!parent)) + continue; + + mutex_lock(&parent->child_mutex); + list_del_init(&counter->child_list); + mutex_unlock(&parent->child_mutex); + + fput(parent->filp); + + list_del_counter(counter, ctx); + free_counter(counter); + } + + if (!list_empty(&ctx->counter_list)) + goto again; + + mutex_unlock(&ctx->mutex); + + put_ctx(ctx); +} + /* * Initialize the perf_counter context in task_struct */ -- cgit v1.2.3-70-g09d2 From aea7cea9fa9e39e71f95ad70b3daf98ba9972587 Mon Sep 17 00:00:00 2001 From: Kevin Cernekee Date: Wed, 8 Apr 2009 22:53:49 -0700 Subject: mtd: add OOB ioctls for >4GiB devices Signed-off-by: Kevin Cernekee Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- drivers/mtd/mtdchar.c | 28 ++++++++++++++++++++++++++++ fs/compat_ioctl.c | 2 ++ include/mtd/mtd-abi.h | 9 +++++++++ 3 files changed, 39 insertions(+) (limited to 'include') diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index 51bb0b09200..99d1fbc9501 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -607,6 +607,34 @@ static int mtd_ioctl(struct inode *inode, struct file *file, break; } + case MEMWRITEOOB64: + { + struct mtd_oob_buf64 buf; + struct mtd_oob_buf64 __user *buf_user = argp; + + if (copy_from_user(&buf, argp, sizeof(buf))) + ret = -EFAULT; + else + ret = mtd_do_writeoob(file, mtd, buf.start, buf.length, + (void __user *)(uintptr_t)buf.usr_ptr, + &buf_user->length); + break; + } + + case MEMREADOOB64: + { + struct mtd_oob_buf64 buf; + struct mtd_oob_buf64 __user *buf_user = argp; + + if (copy_from_user(&buf, argp, sizeof(buf))) + ret = -EFAULT; + else + ret = mtd_do_readoob(mtd, buf.start, buf.length, + (void __user *)(uintptr_t)buf.usr_ptr, + &buf_user->length); + break; + } + case MEMLOCK: { struct erase_info_user einfo; diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 196397bff08..8da222eacba 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -2411,6 +2411,8 @@ COMPATIBLE_IOCTL(ECCGETLAYOUT) COMPATIBLE_IOCTL(ECCGETSTATS) COMPATIBLE_IOCTL(MTDFILEMODE) COMPATIBLE_IOCTL(MEMERASE64) +COMPATIBLE_IOCTL(MEMREADOOB64) +COMPATIBLE_IOCTL(MEMWRITEOOB64) /* NBD */ ULONG_IOCTL(NBD_SET_SOCK) ULONG_IOCTL(NBD_SET_BLKSIZE) diff --git a/include/mtd/mtd-abi.h b/include/mtd/mtd-abi.h index 2e32be1e3a1..be51ae2bd0f 100644 --- a/include/mtd/mtd-abi.h +++ b/include/mtd/mtd-abi.h @@ -23,6 +23,13 @@ struct mtd_oob_buf { unsigned char __user *ptr; }; +struct mtd_oob_buf64 { + __u64 start; + __u32 pad; + __u32 length; + __u64 usr_ptr; +}; + #define MTD_ABSENT 0 #define MTD_RAM 1 #define MTD_ROM 2 @@ -101,6 +108,8 @@ struct otp_info { #define ECCGETSTATS _IOR('M', 18, struct mtd_ecc_stats) #define MTDFILEMODE _IO('M', 19) #define MEMERASE64 _IOW('M', 20, struct erase_info_user64) +#define MEMWRITEOOB64 _IOWR('M', 21, struct mtd_oob_buf64) +#define MEMREADOOB64 _IOWR('M', 22, struct mtd_oob_buf64) /* * Obsolete legacy interface. Keep it in order not to break userspace -- cgit v1.2.3-70-g09d2 From ccffad25b5136958d4769ed6de5e87992dd9c65c Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 22 May 2009 23:22:17 +0000 Subject: net: convert unicast addr list This patch converts unicast address list to standard list_head using previously introduced struct netdev_hw_addr. It also relaxes the locking. Original spinlock (still used for multicast addresses) is not needed and is no longer used for a protection of this list. All reading and writing takes place under rtnl (with no changes). I also removed a possibility to specify the length of the address while adding or deleting unicast address. It's always dev->addr_len. The convertion touched especially e1000 and ixgbe codes when the change is not so trivial. Signed-off-by: Jiri Pirko drivers/net/bnx2.c | 13 +-- drivers/net/e1000/e1000_main.c | 24 +++-- drivers/net/ixgbe/ixgbe_common.c | 14 ++-- drivers/net/ixgbe/ixgbe_common.h | 4 +- drivers/net/ixgbe/ixgbe_main.c | 6 +- drivers/net/ixgbe/ixgbe_type.h | 4 +- drivers/net/macvlan.c | 11 +- drivers/net/mv643xx_eth.c | 11 +- drivers/net/niu.c | 7 +- drivers/net/virtio_net.c | 7 +- drivers/s390/net/qeth_l2_main.c | 6 +- drivers/scsi/fcoe/fcoe.c | 16 ++-- include/linux/netdevice.h | 18 ++-- net/8021q/vlan.c | 4 +- net/8021q/vlan_dev.c | 10 +- net/core/dev.c | 195 +++++++++++++++++++++++++++----------- net/dsa/slave.c | 10 +- net/packet/af_packet.c | 4 +- 18 files changed, 227 insertions(+), 137 deletions(-) Signed-off-by: David S. Miller --- drivers/net/bnx2.c | 13 ++- drivers/net/e1000/e1000_main.c | 24 +++-- drivers/net/ixgbe/ixgbe_common.c | 14 +-- drivers/net/ixgbe/ixgbe_common.h | 4 +- drivers/net/ixgbe/ixgbe_main.c | 6 +- drivers/net/ixgbe/ixgbe_type.h | 4 +- drivers/net/macvlan.c | 11 ++- drivers/net/mv643xx_eth.c | 11 ++- drivers/net/niu.c | 7 +- drivers/net/virtio_net.c | 7 +- drivers/s390/net/qeth_l2_main.c | 6 +- drivers/scsi/fcoe/fcoe.c | 16 ++-- include/linux/netdevice.h | 18 ++-- net/8021q/vlan.c | 4 +- net/8021q/vlan_dev.c | 10 +- net/core/dev.c | 195 +++++++++++++++++++++++++++------------ net/dsa/slave.c | 10 +- net/packet/af_packet.c | 4 +- 18 files changed, 227 insertions(+), 137 deletions(-) (limited to 'include') diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index 83ee0f53f2d..f53017250e0 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -48,6 +48,7 @@ #include #include #include +#include #include "bnx2.h" #include "bnx2_fw.h" @@ -3310,7 +3311,7 @@ bnx2_set_rx_mode(struct net_device *dev) { struct bnx2 *bp = netdev_priv(dev); u32 rx_mode, sort_mode; - struct dev_addr_list *uc_ptr; + struct netdev_hw_addr *ha; int i; if (!netif_running(dev)) @@ -3369,21 +3370,19 @@ bnx2_set_rx_mode(struct net_device *dev) sort_mode |= BNX2_RPM_SORT_USER0_MC_HSH_EN; } - uc_ptr = NULL; if (dev->uc_count > BNX2_MAX_UNICAST_ADDRESSES) { rx_mode |= BNX2_EMAC_RX_MODE_PROMISCUOUS; sort_mode |= BNX2_RPM_SORT_USER0_PROM_EN | BNX2_RPM_SORT_USER0_PROM_VLAN; } else if (!(dev->flags & IFF_PROMISC)) { - uc_ptr = dev->uc_list; - /* Add all entries into to the match filter list */ - for (i = 0; i < dev->uc_count; i++) { - bnx2_set_mac_addr(bp, uc_ptr->da_addr, + i = 0; + list_for_each_entry(ha, &dev->uc_list, list) { + bnx2_set_mac_addr(bp, ha->addr, i + BNX2_START_UNICAST_ADDRESS_INDEX); sort_mode |= (1 << (i + BNX2_START_UNICAST_ADDRESS_INDEX)); - uc_ptr = uc_ptr->next; + i++; } } diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index 79fe1ee3da5..74667e52143 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -2330,7 +2330,8 @@ static void e1000_set_rx_mode(struct net_device *netdev) { struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; - struct dev_addr_list *uc_ptr; + struct netdev_hw_addr *ha; + bool use_uc = false; struct dev_addr_list *mc_ptr; u32 rctl; u32 hash_value; @@ -2369,12 +2370,11 @@ static void e1000_set_rx_mode(struct net_device *netdev) rctl |= E1000_RCTL_VFE; } - uc_ptr = NULL; if (netdev->uc_count > rar_entries - 1) { rctl |= E1000_RCTL_UPE; } else if (!(netdev->flags & IFF_PROMISC)) { rctl &= ~E1000_RCTL_UPE; - uc_ptr = netdev->uc_list; + use_uc = true; } ew32(RCTL, rctl); @@ -2392,13 +2392,20 @@ static void e1000_set_rx_mode(struct net_device *netdev) * if there are not 14 addresses, go ahead and clear the filters * -- with 82571 controllers only 0-13 entries are filled here */ + i = 1; + if (use_uc) + list_for_each_entry(ha, &netdev->uc_list, list) { + if (i == rar_entries) + break; + e1000_rar_set(hw, ha->addr, i++); + } + + WARN_ON(i == rar_entries); + mc_ptr = netdev->mc_list; - for (i = 1; i < rar_entries; i++) { - if (uc_ptr) { - e1000_rar_set(hw, uc_ptr->da_addr, i); - uc_ptr = uc_ptr->next; - } else if (mc_ptr) { + for (; i < rar_entries; i++) { + if (mc_ptr) { e1000_rar_set(hw, mc_ptr->da_addr, i); mc_ptr = mc_ptr->next; } else { @@ -2408,7 +2415,6 @@ static void e1000_set_rx_mode(struct net_device *netdev) E1000_WRITE_FLUSH(); } } - WARN_ON(uc_ptr != NULL); /* load any remaining addresses into the hash table */ diff --git a/drivers/net/ixgbe/ixgbe_common.c b/drivers/net/ixgbe/ixgbe_common.c index 0cc3c47cb45..6f79409270a 100644 --- a/drivers/net/ixgbe/ixgbe_common.c +++ b/drivers/net/ixgbe/ixgbe_common.c @@ -28,6 +28,8 @@ #include #include #include +#include +#include #include "ixgbe.h" #include "ixgbe_common.h" @@ -1356,15 +1358,14 @@ static void ixgbe_add_uc_addr(struct ixgbe_hw *hw, u8 *addr, u32 vmdq) * Drivers using secondary unicast addresses must set user_set_promisc when * manually putting the device into promiscuous mode. **/ -s32 ixgbe_update_uc_addr_list_generic(struct ixgbe_hw *hw, u8 *addr_list, - u32 addr_count, ixgbe_mc_addr_itr next) +s32 ixgbe_update_uc_addr_list_generic(struct ixgbe_hw *hw, + struct list_head *uc_list) { - u8 *addr; u32 i; u32 old_promisc_setting = hw->addr_ctrl.overflow_promisc; u32 uc_addr_in_use; u32 fctrl; - u32 vmdq; + struct netdev_hw_addr *ha; /* * Clear accounting of old secondary address list, @@ -1382,10 +1383,9 @@ s32 ixgbe_update_uc_addr_list_generic(struct ixgbe_hw *hw, u8 *addr_list, } /* Add the new addresses */ - for (i = 0; i < addr_count; i++) { + list_for_each_entry(ha, uc_list, list) { hw_dbg(hw, " Adding the secondary addresses:\n"); - addr = next(hw, &addr_list, &vmdq); - ixgbe_add_uc_addr(hw, addr, vmdq); + ixgbe_add_uc_addr(hw, ha->addr, 0); } if (hw->addr_ctrl.overflow_promisc) { diff --git a/drivers/net/ixgbe/ixgbe_common.h b/drivers/net/ixgbe/ixgbe_common.h index dd260890ad0..b2a4b2c99c4 100644 --- a/drivers/net/ixgbe/ixgbe_common.h +++ b/drivers/net/ixgbe/ixgbe_common.h @@ -59,8 +59,8 @@ s32 ixgbe_init_rx_addrs_generic(struct ixgbe_hw *hw); s32 ixgbe_update_mc_addr_list_generic(struct ixgbe_hw *hw, u8 *mc_addr_list, u32 mc_addr_count, ixgbe_mc_addr_itr func); -s32 ixgbe_update_uc_addr_list_generic(struct ixgbe_hw *hw, u8 *addr_list, - u32 addr_count, ixgbe_mc_addr_itr func); +s32 ixgbe_update_uc_addr_list_generic(struct ixgbe_hw *hw, + struct list_head *uc_list); s32 ixgbe_enable_mc_generic(struct ixgbe_hw *hw); s32 ixgbe_disable_mc_generic(struct ixgbe_hw *hw); s32 ixgbe_enable_rx_dma_generic(struct ixgbe_hw *hw, u32 regval); diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index 924aa5ed02c..de70a2df9ae 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -2181,11 +2181,7 @@ static void ixgbe_set_rx_mode(struct net_device *netdev) IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); /* reprogram secondary unicast list */ - addr_count = netdev->uc_count; - if (addr_count) - addr_list = netdev->uc_list->dmi_addr; - hw->mac.ops.update_uc_addr_list(hw, addr_list, addr_count, - ixgbe_addr_list_itr); + hw->mac.ops.update_uc_addr_list(hw, &netdev->uc_list); /* reprogram multicast list */ addr_count = netdev->mc_count; diff --git a/drivers/net/ixgbe/ixgbe_type.h b/drivers/net/ixgbe/ixgbe_type.h index df1f7034c28..a8a8243d8fd 100644 --- a/drivers/net/ixgbe/ixgbe_type.h +++ b/drivers/net/ixgbe/ixgbe_type.h @@ -30,6 +30,7 @@ #include #include +#include /* Vendor ID */ #define IXGBE_INTEL_VENDOR_ID 0x8086 @@ -2223,8 +2224,7 @@ struct ixgbe_mac_operations { s32 (*set_vmdq)(struct ixgbe_hw *, u32, u32); s32 (*clear_vmdq)(struct ixgbe_hw *, u32, u32); s32 (*init_rx_addrs)(struct ixgbe_hw *); - s32 (*update_uc_addr_list)(struct ixgbe_hw *, u8 *, u32, - ixgbe_mc_addr_itr); + s32 (*update_uc_addr_list)(struct ixgbe_hw *, struct list_head *); s32 (*update_mc_addr_list)(struct ixgbe_hw *, u8 *, u32, ixgbe_mc_addr_itr); s32 (*enable_mc)(struct ixgbe_hw *); diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index d5334b41e4b..021d9941c29 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -232,7 +232,7 @@ static int macvlan_open(struct net_device *dev) if (macvlan_addr_busy(vlan->port, dev->dev_addr)) goto out; - err = dev_unicast_add(lowerdev, dev->dev_addr, ETH_ALEN); + err = dev_unicast_add(lowerdev, dev->dev_addr); if (err < 0) goto out; if (dev->flags & IFF_ALLMULTI) { @@ -244,7 +244,7 @@ static int macvlan_open(struct net_device *dev) return 0; del_unicast: - dev_unicast_delete(lowerdev, dev->dev_addr, ETH_ALEN); + dev_unicast_delete(lowerdev, dev->dev_addr); out: return err; } @@ -258,7 +258,7 @@ static int macvlan_stop(struct net_device *dev) if (dev->flags & IFF_ALLMULTI) dev_set_allmulti(lowerdev, -1); - dev_unicast_delete(lowerdev, dev->dev_addr, ETH_ALEN); + dev_unicast_delete(lowerdev, dev->dev_addr); macvlan_hash_del(vlan); return 0; @@ -282,10 +282,11 @@ static int macvlan_set_mac_address(struct net_device *dev, void *p) if (macvlan_addr_busy(vlan->port, addr->sa_data)) return -EBUSY; - if ((err = dev_unicast_add(lowerdev, addr->sa_data, ETH_ALEN))) + err = dev_unicast_add(lowerdev, addr->sa_data); + if (err) return err; - dev_unicast_delete(lowerdev, dev->dev_addr, ETH_ALEN); + dev_unicast_delete(lowerdev, dev->dev_addr); macvlan_hash_change_addr(vlan, addr->sa_data); } diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c index 1361ddc8d31..b4e18a58cb1 100644 --- a/drivers/net/mv643xx_eth.c +++ b/drivers/net/mv643xx_eth.c @@ -55,6 +55,7 @@ #include #include #include +#include static char mv643xx_eth_driver_name[] = "mv643xx_eth"; static char mv643xx_eth_driver_version[] = "1.4"; @@ -1721,20 +1722,20 @@ static void uc_addr_set(struct mv643xx_eth_private *mp, unsigned char *addr) static u32 uc_addr_filter_mask(struct net_device *dev) { - struct dev_addr_list *uc_ptr; + struct netdev_hw_addr *ha; u32 nibbles; if (dev->flags & IFF_PROMISC) return 0; nibbles = 1 << (dev->dev_addr[5] & 0x0f); - for (uc_ptr = dev->uc_list; uc_ptr != NULL; uc_ptr = uc_ptr->next) { - if (memcmp(dev->dev_addr, uc_ptr->da_addr, 5)) + list_for_each_entry(ha, &dev->uc_list, list) { + if (memcmp(dev->dev_addr, ha->addr, 5)) return 0; - if ((dev->dev_addr[5] ^ uc_ptr->da_addr[5]) & 0xf0) + if ((dev->dev_addr[5] ^ ha->addr[5]) & 0xf0) return 0; - nibbles |= 1 << (uc_ptr->da_addr[5] & 0x0f); + nibbles |= 1 << (ha->addr[5] & 0x0f); } return nibbles; diff --git a/drivers/net/niu.c b/drivers/net/niu.c index edac3a0b02d..fa61a12c5e1 100644 --- a/drivers/net/niu.c +++ b/drivers/net/niu.c @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -6362,6 +6363,7 @@ static void niu_set_rx_mode(struct net_device *dev) struct niu *np = netdev_priv(dev); int i, alt_cnt, err; struct dev_addr_list *addr; + struct netdev_hw_addr *ha; unsigned long flags; u16 hash[16] = { 0, }; @@ -6383,9 +6385,8 @@ static void niu_set_rx_mode(struct net_device *dev) if (alt_cnt) { int index = 0; - for (addr = dev->uc_list; addr; addr = addr->next) { - err = niu_set_alt_mac(np, index, - addr->da_addr); + list_for_each_entry(ha, &dev->uc_list, list) { + err = niu_set_alt_mac(np, index, ha->addr); if (err) printk(KERN_WARNING PFX "%s: Error %d " "adding alt mac %d\n", diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 6cc5bcd34fb..0c9ca67f66e 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -680,6 +680,7 @@ static void virtnet_set_rx_mode(struct net_device *dev) u8 promisc, allmulti; struct virtio_net_ctrl_mac *mac_data; struct dev_addr_list *addr; + struct netdev_hw_addr *ha; void *buf; int i; @@ -718,9 +719,9 @@ static void virtnet_set_rx_mode(struct net_device *dev) /* Store the unicast list and count in the front of the buffer */ mac_data->entries = dev->uc_count; - addr = dev->uc_list; - for (i = 0; i < dev->uc_count; i++, addr = addr->next) - memcpy(&mac_data->macs[i][0], addr->da_addr, ETH_ALEN); + i = 0; + list_for_each_entry(ha, &dev->uc_list, list) + memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); sg_set_buf(&sg[0], mac_data, sizeof(mac_data->entries) + (dev->uc_count * ETH_ALEN)); diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 9ca6bab7c9b..ecd3d06c0d5 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "qeth_core.h" @@ -640,6 +641,7 @@ static void qeth_l2_set_multicast_list(struct net_device *dev) { struct qeth_card *card = dev->ml_priv; struct dev_addr_list *dm; + struct netdev_hw_addr *ha; if (card->info.type == QETH_CARD_TYPE_OSN) return ; @@ -653,8 +655,8 @@ static void qeth_l2_set_multicast_list(struct net_device *dev) for (dm = dev->mc_list; dm; dm = dm->next) qeth_l2_add_mc(card, dm->da_addr, 0); - for (dm = dev->uc_list; dm; dm = dm->next) - qeth_l2_add_mc(card, dm->da_addr, 1); + list_for_each_entry(ha, &dev->uc_list, list) + qeth_l2_add_mc(card, ha->addr, 1); spin_unlock_bh(&card->mclock); if (!qeth_adp_supported(card, IPA_SETADP_SET_PROMISC_MODE)) diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index ce33f107b0a..f791348871f 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -182,8 +182,8 @@ static void fcoe_update_src_mac(struct fcoe_ctlr *fip, u8 *old, u8 *new) fc = fcoe_from_ctlr(fip); rtnl_lock(); if (!is_zero_ether_addr(old)) - dev_unicast_delete(fc->real_dev, old, ETH_ALEN); - dev_unicast_add(fc->real_dev, new, ETH_ALEN); + dev_unicast_delete(fc->real_dev, old); + dev_unicast_add(fc->real_dev, new); rtnl_unlock(); } @@ -233,13 +233,11 @@ void fcoe_netdev_cleanup(struct fcoe_softc *fc) /* Delete secondary MAC addresses */ rtnl_lock(); memcpy(flogi_maddr, (u8[6]) FC_FCOE_FLOGI_MAC, ETH_ALEN); - dev_unicast_delete(fc->real_dev, flogi_maddr, ETH_ALEN); + dev_unicast_delete(fc->real_dev, flogi_maddr); if (!is_zero_ether_addr(fc->ctlr.data_src_addr)) - dev_unicast_delete(fc->real_dev, - fc->ctlr.data_src_addr, ETH_ALEN); + dev_unicast_delete(fc->real_dev, fc->ctlr.data_src_addr); if (fc->ctlr.spma) - dev_unicast_delete(fc->real_dev, - fc->ctlr.ctl_src_addr, ETH_ALEN); + dev_unicast_delete(fc->real_dev, fc->ctlr.ctl_src_addr); dev_mc_delete(fc->real_dev, FIP_ALL_ENODE_MACS, ETH_ALEN, 0); rtnl_unlock(); } @@ -347,9 +345,9 @@ static int fcoe_netdev_config(struct fc_lport *lp, struct net_device *netdev) */ rtnl_lock(); memcpy(flogi_maddr, (u8[6]) FC_FCOE_FLOGI_MAC, ETH_ALEN); - dev_unicast_add(fc->real_dev, flogi_maddr, ETH_ALEN); + dev_unicast_add(fc->real_dev, flogi_maddr); if (fc->ctlr.spma) - dev_unicast_add(fc->real_dev, fc->ctlr.ctl_src_addr, ETH_ALEN); + dev_unicast_add(fc->real_dev, fc->ctlr.ctl_src_addr); dev_mc_add(fc->real_dev, FIP_ALL_ENODE_MACS, ETH_ALEN, 0); rtnl_unlock(); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1eaf5ae14fe..bbfabf3012b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -215,9 +215,12 @@ struct netdev_hw_addr { struct list_head list; unsigned char addr[MAX_ADDR_LEN]; unsigned char type; -#define NETDEV_HW_ADDR_T_LAN 1 -#define NETDEV_HW_ADDR_T_SAN 2 -#define NETDEV_HW_ADDR_T_SLAVE 3 +#define NETDEV_HW_ADDR_T_LAN 1 +#define NETDEV_HW_ADDR_T_SAN 2 +#define NETDEV_HW_ADDR_T_SLAVE 3 +#define NETDEV_HW_ADDR_T_UNICAST 4 + int refcount; + bool synced; struct rcu_head rcu_head; }; @@ -773,10 +776,11 @@ struct net_device unsigned char addr_len; /* hardware address length */ unsigned short dev_id; /* for shared network cards */ - spinlock_t addr_list_lock; - struct dev_addr_list *uc_list; /* Secondary unicast mac addresses */ + struct list_head uc_list; /* Secondary unicast mac + addresses */ int uc_count; /* Number of installed ucasts */ int uc_promisc; + spinlock_t addr_list_lock; struct dev_addr_list *mc_list; /* Multicast mac addresses */ int mc_count; /* Number of installed mcasts */ unsigned int promiscuity; @@ -1836,8 +1840,8 @@ extern int dev_addr_del_multiple(struct net_device *to_dev, /* Functions used for secondary unicast and multicast support */ extern void dev_set_rx_mode(struct net_device *dev); extern void __dev_set_rx_mode(struct net_device *dev); -extern int dev_unicast_delete(struct net_device *dev, void *addr, int alen); -extern int dev_unicast_add(struct net_device *dev, void *addr, int alen); +extern int dev_unicast_delete(struct net_device *dev, void *addr); +extern int dev_unicast_add(struct net_device *dev, void *addr); extern int dev_unicast_sync(struct net_device *to, struct net_device *from); extern void dev_unicast_unsync(struct net_device *to, struct net_device *from); extern int dev_mc_delete(struct net_device *dev, void *addr, int alen, int all); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index d1e10546eb8..714e1c3536b 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -378,13 +378,13 @@ static void vlan_sync_address(struct net_device *dev, * the new address */ if (compare_ether_addr(vlandev->dev_addr, vlan->real_dev_addr) && !compare_ether_addr(vlandev->dev_addr, dev->dev_addr)) - dev_unicast_delete(dev, vlandev->dev_addr, ETH_ALEN); + dev_unicast_delete(dev, vlandev->dev_addr); /* vlan address was equal to the old address and is different from * the new address */ if (!compare_ether_addr(vlandev->dev_addr, vlan->real_dev_addr) && compare_ether_addr(vlandev->dev_addr, dev->dev_addr)) - dev_unicast_add(dev, vlandev->dev_addr, ETH_ALEN); + dev_unicast_add(dev, vlandev->dev_addr); memcpy(vlan->real_dev_addr, dev->dev_addr, ETH_ALEN); } diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 1e2ad4c7c59..96bad8f233e 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -441,7 +441,7 @@ static int vlan_dev_open(struct net_device *dev) return -ENETDOWN; if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) { - err = dev_unicast_add(real_dev, dev->dev_addr, ETH_ALEN); + err = dev_unicast_add(real_dev, dev->dev_addr); if (err < 0) goto out; } @@ -470,7 +470,7 @@ clear_allmulti: dev_set_allmulti(real_dev, -1); del_unicast: if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) - dev_unicast_delete(real_dev, dev->dev_addr, ETH_ALEN); + dev_unicast_delete(real_dev, dev->dev_addr); out: netif_carrier_off(dev); return err; @@ -492,7 +492,7 @@ static int vlan_dev_stop(struct net_device *dev) dev_set_promiscuity(real_dev, -1); if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) - dev_unicast_delete(real_dev, dev->dev_addr, dev->addr_len); + dev_unicast_delete(real_dev, dev->dev_addr); netif_carrier_off(dev); return 0; @@ -511,13 +511,13 @@ static int vlan_dev_set_mac_address(struct net_device *dev, void *p) goto out; if (compare_ether_addr(addr->sa_data, real_dev->dev_addr)) { - err = dev_unicast_add(real_dev, addr->sa_data, ETH_ALEN); + err = dev_unicast_add(real_dev, addr->sa_data); if (err < 0) return err; } if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) - dev_unicast_delete(real_dev, dev->dev_addr, ETH_ALEN); + dev_unicast_delete(real_dev, dev->dev_addr); out: memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); diff --git a/net/core/dev.c b/net/core/dev.c index 32ceee17896..e2fcc5f1017 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3473,8 +3473,9 @@ void dev_set_rx_mode(struct net_device *dev) /* hw addresses list handling functions */ -static int __hw_addr_add(struct list_head *list, unsigned char *addr, - int addr_len, unsigned char addr_type) +static int __hw_addr_add(struct list_head *list, int *delta, + unsigned char *addr, int addr_len, + unsigned char addr_type) { struct netdev_hw_addr *ha; int alloc_size; @@ -3482,6 +3483,15 @@ static int __hw_addr_add(struct list_head *list, unsigned char *addr, if (addr_len > MAX_ADDR_LEN) return -EINVAL; + list_for_each_entry(ha, list, list) { + if (!memcmp(ha->addr, addr, addr_len) && + ha->type == addr_type) { + ha->refcount++; + return 0; + } + } + + alloc_size = sizeof(*ha); if (alloc_size < L1_CACHE_BYTES) alloc_size = L1_CACHE_BYTES; @@ -3490,7 +3500,11 @@ static int __hw_addr_add(struct list_head *list, unsigned char *addr, return -ENOMEM; memcpy(ha->addr, addr, addr_len); ha->type = addr_type; + ha->refcount = 1; + ha->synced = false; list_add_tail_rcu(&ha->list, list); + if (delta) + (*delta)++; return 0; } @@ -3502,29 +3516,30 @@ static void ha_rcu_free(struct rcu_head *head) kfree(ha); } -static int __hw_addr_del_ii(struct list_head *list, unsigned char *addr, - int addr_len, unsigned char addr_type, - int ignore_index) +static int __hw_addr_del(struct list_head *list, int *delta, + unsigned char *addr, int addr_len, + unsigned char addr_type) { struct netdev_hw_addr *ha; - int i = 0; list_for_each_entry(ha, list, list) { - if (i++ != ignore_index && - !memcmp(ha->addr, addr, addr_len) && + if (!memcmp(ha->addr, addr, addr_len) && (ha->type == addr_type || !addr_type)) { + if (--ha->refcount) + return 0; list_del_rcu(&ha->list); call_rcu(&ha->rcu_head, ha_rcu_free); + if (delta) + (*delta)--; return 0; } } return -ENOENT; } -static int __hw_addr_add_multiple_ii(struct list_head *to_list, - struct list_head *from_list, - int addr_len, unsigned char addr_type, - int ignore_index) +static int __hw_addr_add_multiple(struct list_head *to_list, int *to_delta, + struct list_head *from_list, int addr_len, + unsigned char addr_type) { int err; struct netdev_hw_addr *ha, *ha2; @@ -3532,7 +3547,8 @@ static int __hw_addr_add_multiple_ii(struct list_head *to_list, list_for_each_entry(ha, from_list, list) { type = addr_type ? addr_type : ha->type; - err = __hw_addr_add(to_list, ha->addr, addr_len, type); + err = __hw_addr_add(to_list, to_delta, ha->addr, + addr_len, type); if (err) goto unroll; } @@ -3543,27 +3559,69 @@ unroll: if (ha2 == ha) break; type = addr_type ? addr_type : ha2->type; - __hw_addr_del_ii(to_list, ha2->addr, addr_len, type, - ignore_index); + __hw_addr_del(to_list, to_delta, ha2->addr, + addr_len, type); } return err; } -static void __hw_addr_del_multiple_ii(struct list_head *to_list, - struct list_head *from_list, - int addr_len, unsigned char addr_type, - int ignore_index) +static void __hw_addr_del_multiple(struct list_head *to_list, int *to_delta, + struct list_head *from_list, int addr_len, + unsigned char addr_type) { struct netdev_hw_addr *ha; unsigned char type; list_for_each_entry(ha, from_list, list) { type = addr_type ? addr_type : ha->type; - __hw_addr_del_ii(to_list, ha->addr, addr_len, addr_type, - ignore_index); + __hw_addr_del(to_list, to_delta, ha->addr, + addr_len, addr_type); + } +} + +static int __hw_addr_sync(struct list_head *to_list, int *to_delta, + struct list_head *from_list, int *from_delta, + int addr_len) +{ + int err = 0; + struct netdev_hw_addr *ha, *tmp; + + list_for_each_entry_safe(ha, tmp, from_list, list) { + if (!ha->synced) { + err = __hw_addr_add(to_list, to_delta, ha->addr, + addr_len, ha->type); + if (err) + break; + ha->synced = true; + ha->refcount++; + } else if (ha->refcount == 1) { + __hw_addr_del(to_list, to_delta, ha->addr, + addr_len, ha->type); + __hw_addr_del(from_list, from_delta, ha->addr, + addr_len, ha->type); + } } + return err; } +static void __hw_addr_unsync(struct list_head *to_list, int *to_delta, + struct list_head *from_list, int *from_delta, + int addr_len) +{ + struct netdev_hw_addr *ha, *tmp; + + list_for_each_entry_safe(ha, tmp, from_list, list) { + if (ha->synced) { + __hw_addr_del(to_list, to_delta, ha->addr, + addr_len, ha->type); + ha->synced = false; + __hw_addr_del(from_list, from_delta, ha->addr, + addr_len, ha->type); + } + } +} + + static void __hw_addr_flush(struct list_head *list) { struct netdev_hw_addr *ha, *tmp; @@ -3594,7 +3652,7 @@ static int dev_addr_init(struct net_device *dev) INIT_LIST_HEAD(&dev->dev_addr_list); memset(addr, 0, sizeof(*addr)); - err = __hw_addr_add(&dev->dev_addr_list, addr, sizeof(*addr), + err = __hw_addr_add(&dev->dev_addr_list, NULL, addr, sizeof(*addr), NETDEV_HW_ADDR_T_LAN); if (!err) { /* @@ -3626,7 +3684,7 @@ int dev_addr_add(struct net_device *dev, unsigned char *addr, ASSERT_RTNL(); - err = __hw_addr_add(&dev->dev_addr_list, addr, dev->addr_len, + err = __hw_addr_add(&dev->dev_addr_list, NULL, addr, dev->addr_len, addr_type); if (!err) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); @@ -3649,11 +3707,20 @@ int dev_addr_del(struct net_device *dev, unsigned char *addr, unsigned char addr_type) { int err; + struct netdev_hw_addr *ha; ASSERT_RTNL(); - err = __hw_addr_del_ii(&dev->dev_addr_list, addr, dev->addr_len, - addr_type, 0); + /* + * We can not remove the first address from the list because + * dev->dev_addr points to that. + */ + ha = list_first_entry(&dev->dev_addr_list, struct netdev_hw_addr, list); + if (ha->addr == dev->dev_addr && ha->refcount == 1) + return -ENOENT; + + err = __hw_addr_del(&dev->dev_addr_list, NULL, addr, dev->addr_len, + addr_type); if (!err) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); return err; @@ -3680,9 +3747,9 @@ int dev_addr_add_multiple(struct net_device *to_dev, if (from_dev->addr_len != to_dev->addr_len) return -EINVAL; - err = __hw_addr_add_multiple_ii(&to_dev->dev_addr_list, - &from_dev->dev_addr_list, - to_dev->addr_len, addr_type, 0); + err = __hw_addr_add_multiple(&to_dev->dev_addr_list, NULL, + &from_dev->dev_addr_list, + to_dev->addr_len, addr_type); if (!err) call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev); return err; @@ -3707,9 +3774,9 @@ int dev_addr_del_multiple(struct net_device *to_dev, if (from_dev->addr_len != to_dev->addr_len) return -EINVAL; - __hw_addr_del_multiple_ii(&to_dev->dev_addr_list, - &from_dev->dev_addr_list, - to_dev->addr_len, addr_type, 0); + __hw_addr_del_multiple(&to_dev->dev_addr_list, NULL, + &from_dev->dev_addr_list, + to_dev->addr_len, addr_type); call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev); return 0; } @@ -3779,24 +3846,22 @@ int __dev_addr_add(struct dev_addr_list **list, int *count, * dev_unicast_delete - Release secondary unicast address. * @dev: device * @addr: address to delete - * @alen: length of @addr * * Release reference to a secondary unicast address and remove it * from the device if the reference count drops to zero. * * The caller must hold the rtnl_mutex. */ -int dev_unicast_delete(struct net_device *dev, void *addr, int alen) +int dev_unicast_delete(struct net_device *dev, void *addr) { int err; ASSERT_RTNL(); - netif_addr_lock_bh(dev); - err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0); + err = __hw_addr_del(&dev->uc_list, &dev->uc_count, addr, + dev->addr_len, NETDEV_HW_ADDR_T_UNICAST); if (!err) __dev_set_rx_mode(dev); - netif_addr_unlock_bh(dev); return err; } EXPORT_SYMBOL(dev_unicast_delete); @@ -3805,24 +3870,22 @@ EXPORT_SYMBOL(dev_unicast_delete); * dev_unicast_add - add a secondary unicast address * @dev: device * @addr: address to add - * @alen: length of @addr * * Add a secondary unicast address to the device or increase * the reference count if it already exists. * * The caller must hold the rtnl_mutex. */ -int dev_unicast_add(struct net_device *dev, void *addr, int alen) +int dev_unicast_add(struct net_device *dev, void *addr) { int err; ASSERT_RTNL(); - netif_addr_lock_bh(dev); - err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0); + err = __hw_addr_add(&dev->uc_list, &dev->uc_count, addr, + dev->addr_len, NETDEV_HW_ADDR_T_UNICAST); if (!err) __dev_set_rx_mode(dev); - netif_addr_unlock_bh(dev); return err; } EXPORT_SYMBOL(dev_unicast_add); @@ -3879,8 +3942,7 @@ void __dev_addr_unsync(struct dev_addr_list **to, int *to_count, * @from: source device * * Add newly added addresses to the destination device and release - * addresses that have no users left. The source device must be - * locked by netif_tx_lock_bh. + * addresses that have no users left. * * This function is intended to be called from the dev->set_rx_mode * function of layered software devices. @@ -3889,12 +3951,15 @@ int dev_unicast_sync(struct net_device *to, struct net_device *from) { int err = 0; - netif_addr_lock_bh(to); - err = __dev_addr_sync(&to->uc_list, &to->uc_count, - &from->uc_list, &from->uc_count); + ASSERT_RTNL(); + + if (to->addr_len != from->addr_len) + return -EINVAL; + + err = __hw_addr_sync(&to->uc_list, &to->uc_count, + &from->uc_list, &from->uc_count, to->addr_len); if (!err) __dev_set_rx_mode(to); - netif_addr_unlock_bh(to); return err; } EXPORT_SYMBOL(dev_unicast_sync); @@ -3910,18 +3975,33 @@ EXPORT_SYMBOL(dev_unicast_sync); */ void dev_unicast_unsync(struct net_device *to, struct net_device *from) { - netif_addr_lock_bh(from); - netif_addr_lock(to); + ASSERT_RTNL(); - __dev_addr_unsync(&to->uc_list, &to->uc_count, - &from->uc_list, &from->uc_count); - __dev_set_rx_mode(to); + if (to->addr_len != from->addr_len) + return; - netif_addr_unlock(to); - netif_addr_unlock_bh(from); + __hw_addr_unsync(&to->uc_list, &to->uc_count, + &from->uc_list, &from->uc_count, to->addr_len); + __dev_set_rx_mode(to); } EXPORT_SYMBOL(dev_unicast_unsync); +static void dev_unicast_flush(struct net_device *dev) +{ + /* rtnl_mutex must be held here */ + + __hw_addr_flush(&dev->uc_list); + dev->uc_count = 0; +} + +static void dev_unicast_init(struct net_device *dev) +{ + /* rtnl_mutex must be held here */ + + INIT_LIST_HEAD(&dev->uc_list); +} + + static void __dev_addr_discard(struct dev_addr_list **list) { struct dev_addr_list *tmp; @@ -3940,9 +4020,6 @@ static void dev_addr_discard(struct net_device *dev) { netif_addr_lock_bh(dev); - __dev_addr_discard(&dev->uc_list); - dev->uc_count = 0; - __dev_addr_discard(&dev->mc_list); dev->mc_count = 0; @@ -4535,6 +4612,7 @@ static void rollback_registered(struct net_device *dev) /* * Flush the unicast and multicast chains */ + dev_unicast_flush(dev); dev_addr_discard(dev); if (dev->netdev_ops->ndo_uninit) @@ -5020,6 +5098,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, if (dev_addr_init(dev)) goto free_tx; + dev_unicast_init(dev); + dev_net_set(dev, &init_net); dev->_tx = tx; @@ -5223,6 +5303,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char /* * Flush the unicast and multicast chains */ + dev_unicast_flush(dev); dev_addr_discard(dev); netdev_unregister_kobject(dev); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index ed131181215..2175e6d5cc8 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -67,7 +67,7 @@ static int dsa_slave_open(struct net_device *dev) return -ENETDOWN; if (compare_ether_addr(dev->dev_addr, master->dev_addr)) { - err = dev_unicast_add(master, dev->dev_addr, ETH_ALEN); + err = dev_unicast_add(master, dev->dev_addr); if (err < 0) goto out; } @@ -90,7 +90,7 @@ clear_allmulti: dev_set_allmulti(master, -1); del_unicast: if (compare_ether_addr(dev->dev_addr, master->dev_addr)) - dev_unicast_delete(master, dev->dev_addr, ETH_ALEN); + dev_unicast_delete(master, dev->dev_addr); out: return err; } @@ -108,7 +108,7 @@ static int dsa_slave_close(struct net_device *dev) dev_set_promiscuity(master, -1); if (compare_ether_addr(dev->dev_addr, master->dev_addr)) - dev_unicast_delete(master, dev->dev_addr, ETH_ALEN); + dev_unicast_delete(master, dev->dev_addr); return 0; } @@ -147,13 +147,13 @@ static int dsa_slave_set_mac_address(struct net_device *dev, void *a) goto out; if (compare_ether_addr(addr->sa_data, master->dev_addr)) { - err = dev_unicast_add(master, addr->sa_data, ETH_ALEN); + err = dev_unicast_add(master, addr->sa_data); if (err < 0) return err; } if (compare_ether_addr(dev->dev_addr, master->dev_addr)) - dev_unicast_delete(master, dev->dev_addr, ETH_ALEN); + dev_unicast_delete(master, dev->dev_addr); out: memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c7c5d524967..6da9f38ef5c 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1582,9 +1582,9 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i, break; case PACKET_MR_UNICAST: if (what > 0) - return dev_unicast_add(dev, i->addr, i->alen); + return dev_unicast_add(dev, i->addr); else - return dev_unicast_delete(dev, i->addr, i->alen); + return dev_unicast_delete(dev, i->addr); break; default:; } -- cgit v1.2.3-70-g09d2 From 25346b93ca079080c9cb23331db5c4f6404e8530 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 1 Jun 2009 17:48:12 +1000 Subject: perf_counter: Provide functions for locking and pinning the context for a task This abstracts out the code for locking the context associated with a task. Because the context might get transferred from one task to another concurrently, we have to check after locking the context that it is still the right context for the task and retry if not. This was open-coded in find_get_context() and perf_counter_init_task(). This adds a further function for pinning the context for a task, i.e. marking it so it can't be transferred to another task. This adds a 'pin_count' field to struct perf_counter_context to indicate that a context is pinned, instead of the previous method of setting the parent_gen count to all 1s. Pinning the context with a pin_count is easier to undo and doesn't require saving the parent_gen value. This also adds a perf_unpin_context() to undo the effect of perf_pin_task_context() and changes perf_counter_init_task to use it. Signed-off-by: Paul Mackerras Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo Cc: John Kacur LKML-Reference: <18979.34748.755674.596386@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 1 + kernel/perf_counter.c | 128 +++++++++++++++++++++++++------------------ 2 files changed, 75 insertions(+), 54 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 519a41bba24..81ec79c9f19 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -543,6 +543,7 @@ struct perf_counter_context { struct perf_counter_context *parent_ctx; u64 parent_gen; u64 generation; + int pin_count; struct rcu_head rcu_head; }; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 79c3f26541d..da8dfef4b47 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -122,6 +122,69 @@ static void put_ctx(struct perf_counter_context *ctx) } } +/* + * Get the perf_counter_context for a task and lock it. + * This has to cope with with the fact that until it is locked, + * the context could get moved to another task. + */ +static struct perf_counter_context *perf_lock_task_context( + struct task_struct *task, unsigned long *flags) +{ + struct perf_counter_context *ctx; + + rcu_read_lock(); + retry: + ctx = rcu_dereference(task->perf_counter_ctxp); + if (ctx) { + /* + * If this context is a clone of another, it might + * get swapped for another underneath us by + * perf_counter_task_sched_out, though the + * rcu_read_lock() protects us from any context + * getting freed. Lock the context and check if it + * got swapped before we could get the lock, and retry + * if so. If we locked the right context, then it + * can't get swapped on us any more. + */ + spin_lock_irqsave(&ctx->lock, *flags); + if (ctx != rcu_dereference(task->perf_counter_ctxp)) { + spin_unlock_irqrestore(&ctx->lock, *flags); + goto retry; + } + } + rcu_read_unlock(); + return ctx; +} + +/* + * Get the context for a task and increment its pin_count so it + * can't get swapped to another task. This also increments its + * reference count so that the context can't get freed. + */ +static struct perf_counter_context *perf_pin_task_context(struct task_struct *task) +{ + struct perf_counter_context *ctx; + unsigned long flags; + + ctx = perf_lock_task_context(task, &flags); + if (ctx) { + ++ctx->pin_count; + get_ctx(ctx); + spin_unlock_irqrestore(&ctx->lock, flags); + } + return ctx; +} + +static void perf_unpin_context(struct perf_counter_context *ctx) +{ + unsigned long flags; + + spin_lock_irqsave(&ctx->lock, flags); + --ctx->pin_count; + spin_unlock_irqrestore(&ctx->lock, flags); + put_ctx(ctx); +} + /* * Add a counter from the lists for its context. * Must be called with ctx->mutex and ctx->lock held. @@ -916,7 +979,7 @@ static int context_equiv(struct perf_counter_context *ctx1, { return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx && ctx1->parent_gen == ctx2->parent_gen - && ctx1->parent_gen != ~0ull; + && !ctx1->pin_count && !ctx2->pin_count; } /* @@ -1271,6 +1334,7 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu) struct perf_counter_context *ctx; struct perf_counter_context *parent_ctx; struct task_struct *task; + unsigned long flags; int err; /* @@ -1323,28 +1387,9 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu) if (!ptrace_may_access(task, PTRACE_MODE_READ)) goto errout; - retry_lock: - rcu_read_lock(); retry: - ctx = rcu_dereference(task->perf_counter_ctxp); + ctx = perf_lock_task_context(task, &flags); if (ctx) { - /* - * If this context is a clone of another, it might - * get swapped for another underneath us by - * perf_counter_task_sched_out, though the - * rcu_read_lock() protects us from any context - * getting freed. Lock the context and check if it - * got swapped before we could get the lock, and retry - * if so. If we locked the right context, then it - * can't get swapped on us any more and we can - * unclone it if necessary. - * Once it's not a clone things will be stable. - */ - spin_lock_irq(&ctx->lock); - if (ctx != rcu_dereference(task->perf_counter_ctxp)) { - spin_unlock_irq(&ctx->lock); - goto retry; - } parent_ctx = ctx->parent_ctx; if (parent_ctx) { put_ctx(parent_ctx); @@ -1355,9 +1400,8 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu) * this context won't get freed if the task exits. */ get_ctx(ctx); - spin_unlock_irq(&ctx->lock); + spin_unlock_irqrestore(&ctx->lock, flags); } - rcu_read_unlock(); if (!ctx) { ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL); @@ -1372,7 +1416,7 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu) * the context they set. */ kfree(ctx); - goto retry_lock; + goto retry; } get_task_struct(task); } @@ -3667,7 +3711,6 @@ int perf_counter_init_task(struct task_struct *child) struct perf_counter *counter; struct task_struct *parent = current; int inherited_all = 1; - u64 cloned_gen; int ret = 0; child->perf_counter_ctxp = NULL; @@ -3693,32 +3736,17 @@ int perf_counter_init_task(struct task_struct *child) get_task_struct(child); /* - * If the parent's context is a clone, temporarily set its - * parent_gen to an impossible value (all 1s) so it won't get - * swapped under us. The rcu_read_lock makes sure that - * parent_ctx continues to exist even if it gets swapped to - * another process and then freed while we are trying to get - * its lock. + * If the parent's context is a clone, pin it so it won't get + * swapped under us. */ - rcu_read_lock(); - retry: - parent_ctx = rcu_dereference(parent->perf_counter_ctxp); + parent_ctx = perf_pin_task_context(parent); + /* * No need to check if parent_ctx != NULL here; since we saw * it non-NULL earlier, the only reason for it to become NULL * is if we exit, and since we're currently in the middle of * a fork we can't be exiting at the same time. */ - spin_lock_irq(&parent_ctx->lock); - if (parent_ctx != rcu_dereference(parent->perf_counter_ctxp)) { - spin_unlock_irq(&parent_ctx->lock); - goto retry; - } - cloned_gen = parent_ctx->parent_gen; - if (parent_ctx->parent_ctx) - parent_ctx->parent_gen = ~0ull; - spin_unlock_irq(&parent_ctx->lock); - rcu_read_unlock(); /* * Lock the parent list. No need to lock the child - not PID @@ -3759,7 +3787,7 @@ int perf_counter_init_task(struct task_struct *child) cloned_ctx = rcu_dereference(parent_ctx->parent_ctx); if (cloned_ctx) { child_ctx->parent_ctx = cloned_ctx; - child_ctx->parent_gen = cloned_gen; + child_ctx->parent_gen = parent_ctx->parent_gen; } else { child_ctx->parent_ctx = parent_ctx; child_ctx->parent_gen = parent_ctx->generation; @@ -3769,15 +3797,7 @@ int perf_counter_init_task(struct task_struct *child) mutex_unlock(&parent_ctx->mutex); - /* - * Restore the clone status of the parent. - */ - if (parent_ctx->parent_ctx) { - spin_lock_irq(&parent_ctx->lock); - if (parent_ctx->parent_ctx) - parent_ctx->parent_gen = cloned_gen; - spin_unlock_irq(&parent_ctx->lock); - } + perf_unpin_context(parent_ctx); return ret; } -- cgit v1.2.3-70-g09d2 From d1a277c584d0862dbf51991baea947ea5f2ce6bf Mon Sep 17 00:00:00 2001 From: Wolfgang Grandegger Date: Sat, 30 May 2009 07:55:50 +0000 Subject: can: sja1000: generic OF platform bus driver This patch adds a generic driver for SJA1000 chips on the OpenFirmware platform bus found on embedded PowerPC systems. You need a SJA1000 node definition in your flattened device tree source (DTS) file similar to: can@3,100 { compatible = "nxp,sja1000"; reg = <3 0x100 0x80>; interrupts = <2 0>; interrupt-parent = <&mpic>; nxp,external-clock-frequency = <16000000>; }; See also Documentation/powerpc/dts-bindings/can/sja1000.txt. CC: devicetree-discuss@ozlabs.org Signed-off-by: Wolfgang Grandegger Signed-off-by: David S. Miller --- Documentation/powerpc/dts-bindings/can/sja1000.txt | 53 +++++ drivers/net/can/Kconfig | 9 + drivers/net/can/sja1000/Makefile | 1 + drivers/net/can/sja1000/sja1000_of_platform.c | 233 +++++++++++++++++++++ include/linux/can/platform/sja1000.h | 3 + 5 files changed, 299 insertions(+) create mode 100644 Documentation/powerpc/dts-bindings/can/sja1000.txt create mode 100644 drivers/net/can/sja1000/sja1000_of_platform.c (limited to 'include') diff --git a/Documentation/powerpc/dts-bindings/can/sja1000.txt b/Documentation/powerpc/dts-bindings/can/sja1000.txt new file mode 100644 index 00000000000..d6d209ded93 --- /dev/null +++ b/Documentation/powerpc/dts-bindings/can/sja1000.txt @@ -0,0 +1,53 @@ +Memory mapped SJA1000 CAN controller from NXP (formerly Philips) + +Required properties: + +- compatible : should be "nxp,sja1000". + +- reg : should specify the chip select, address offset and size required + to map the registers of the SJA1000. The size is usually 0x80. + +- interrupts: property with a value describing the interrupt source + (number and sensitivity) required for the SJA1000. + +Optional properties: + +- nxp,external-clock-frequency : Frequency of the external oscillator + clock in Hz. Note that the internal clock frequency used by the + SJA1000 is half of that value. If not specified, a default value + of 16000000 (16 MHz) is used. + +- nxp,tx-output-mode : operation mode of the TX output control logic: + <0x0> : bi-phase output mode + <0x1> : normal output mode (default) + <0x2> : test output mode + <0x3> : clock output mode + +- nxp,tx-output-config : TX output pin configuration: + <0x01> : TX0 invert + <0x02> : TX0 pull-down (default) + <0x04> : TX0 pull-up + <0x06> : TX0 push-pull + <0x08> : TX1 invert + <0x10> : TX1 pull-down + <0x20> : TX1 pull-up + <0x30> : TX1 push-pull + +- nxp,clock-out-frequency : clock frequency in Hz on the CLKOUT pin. + If not specified or if the specified value is 0, the CLKOUT pin + will be disabled. + +- nxp,no-comparator-bypass : Allows to disable the CAN input comperator. + +For futher information, please have a look to the SJA1000 data sheet. + +Examples: + +can@3,100 { + compatible = "nxp,sja1000"; + reg = <3 0x100 0x80>; + interrupts = <2 0>; + interrupt-parent = <&mpic>; + nxp,external-clock-frequency = <16000000>; +}; + diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig index cfd6c5a285f..d5e18812bf4 100644 --- a/drivers/net/can/Kconfig +++ b/drivers/net/can/Kconfig @@ -51,6 +51,15 @@ config CAN_SJA1000_PLATFORM boards from Phytec (http://www.phytec.de) like the PCM027, PCM038. +config CAN_SJA1000_OF_PLATFORM + depends on CAN_SJA1000 && PPC_OF + tristate "Generic OF Platform Bus based SJA1000 driver" + ---help--- + This driver adds support for the SJA1000 chips connected to + the OpenFirmware "platform bus" found on embedded systems with + OpenFirmware bindings, e.g. if you have a PowerPC based system + you may want to enable this option. + config CAN_EMS_PCI tristate "EMS CPC-PCI and CPC-PCIe Card" depends on PCI && CAN_SJA1000 diff --git a/drivers/net/can/sja1000/Makefile b/drivers/net/can/sja1000/Makefile index d6c631f9e66..9d0c08da273 100644 --- a/drivers/net/can/sja1000/Makefile +++ b/drivers/net/can/sja1000/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_CAN_SJA1000) += sja1000.o obj-$(CONFIG_CAN_SJA1000_PLATFORM) += sja1000_platform.o +obj-$(CONFIG_CAN_SJA1000_OF_PLATFORM) += sja1000_of_platform.o obj-$(CONFIG_CAN_EMS_PCI) += ems_pci.o obj-$(CONFIG_CAN_KVASER_PCI) += kvaser_pci.o diff --git a/drivers/net/can/sja1000/sja1000_of_platform.c b/drivers/net/can/sja1000/sja1000_of_platform.c new file mode 100644 index 00000000000..aa953fb4b8d --- /dev/null +++ b/drivers/net/can/sja1000/sja1000_of_platform.c @@ -0,0 +1,233 @@ +/* + * Driver for SJA1000 CAN controllers on the OpenFirmware platform bus + * + * Copyright (C) 2008-2009 Wolfgang Grandegger + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the version 2 of the GNU General Public License + * as published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +/* This is a generic driver for SJA1000 chips on the OpenFirmware platform + * bus found on embedded PowerPC systems. You need a SJA1000 CAN node + * definition in your flattened device tree source (DTS) file similar to: + * + * can@3,100 { + * compatible = "nxp,sja1000"; + * reg = <3 0x100 0x80>; + * interrupts = <2 0>; + * interrupt-parent = <&mpic>; + * nxp,external-clock-frequency = <16000000>; + * }; + * + * See "Documentation/powerpc/dts-bindings/can/sja1000.txt" for further + * information. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "sja1000.h" + +#define DRV_NAME "sja1000_of_platform" + +MODULE_AUTHOR("Wolfgang Grandegger "); +MODULE_DESCRIPTION("Socket-CAN driver for SJA1000 on the OF platform bus"); +MODULE_LICENSE("GPL v2"); + +#define SJA1000_OFP_CAN_CLOCK (16000000 / 2) + +#define SJA1000_OFP_OCR OCR_TX0_PULLDOWN +#define SJA1000_OFP_CDR (CDR_CBP | CDR_CLK_OFF) + +static u8 sja1000_ofp_read_reg(const struct sja1000_priv *priv, int reg) +{ + return in_8(priv->reg_base + reg); +} + +static void sja1000_ofp_write_reg(const struct sja1000_priv *priv, + int reg, u8 val) +{ + out_8(priv->reg_base + reg, val); +} + +static int __devexit sja1000_ofp_remove(struct of_device *ofdev) +{ + struct net_device *dev = dev_get_drvdata(&ofdev->dev); + struct sja1000_priv *priv = netdev_priv(dev); + struct device_node *np = ofdev->node; + struct resource res; + + dev_set_drvdata(&ofdev->dev, NULL); + + unregister_sja1000dev(dev); + free_sja1000dev(dev); + iounmap(priv->reg_base); + irq_dispose_mapping(dev->irq); + + of_address_to_resource(np, 0, &res); + release_mem_region(res.start, resource_size(&res)); + + return 0; +} + +static int __devinit sja1000_ofp_probe(struct of_device *ofdev, + const struct of_device_id *id) +{ + struct device_node *np = ofdev->node; + struct net_device *dev; + struct sja1000_priv *priv; + struct resource res; + const u32 *prop; + int err, irq, res_size, prop_size; + void __iomem *base; + + err = of_address_to_resource(np, 0, &res); + if (err) { + dev_err(&ofdev->dev, "invalid address\n"); + return err; + } + + res_size = resource_size(&res); + + if (!request_mem_region(res.start, res_size, DRV_NAME)) { + dev_err(&ofdev->dev, "couldn't request %#x..%#x\n", + res.start, res.end); + return -EBUSY; + } + + base = ioremap_nocache(res.start, res_size); + if (!base) { + dev_err(&ofdev->dev, "couldn't ioremap %#x..%#x\n", + res.start, res.end); + err = -ENOMEM; + goto exit_release_mem; + } + + irq = irq_of_parse_and_map(np, 0); + if (irq == NO_IRQ) { + dev_err(&ofdev->dev, "no irq found\n"); + err = -ENODEV; + goto exit_unmap_mem; + } + + dev = alloc_sja1000dev(0); + if (!dev) { + err = -ENOMEM; + goto exit_dispose_irq; + } + + priv = netdev_priv(dev); + + priv->read_reg = sja1000_ofp_read_reg; + priv->write_reg = sja1000_ofp_write_reg; + + prop = of_get_property(np, "nxp,external-clock-frequency", &prop_size); + if (prop && (prop_size == sizeof(u32))) + priv->can.clock.freq = *prop / 2; + else + priv->can.clock.freq = SJA1000_OFP_CAN_CLOCK; /* default */ + + prop = of_get_property(np, "nxp,tx-output-mode", &prop_size); + if (prop && (prop_size == sizeof(u32))) + priv->ocr |= *prop & OCR_MODE_MASK; + else + priv->ocr |= OCR_MODE_NORMAL; /* default */ + + prop = of_get_property(np, "nxp,tx-output-config", &prop_size); + if (prop && (prop_size == sizeof(u32))) + priv->ocr |= (*prop << OCR_TX_SHIFT) & OCR_TX_MASK; + else + priv->ocr |= OCR_TX0_PULLDOWN; /* default */ + + prop = of_get_property(np, "nxp,clock-out-frequency", &prop_size); + if (prop && (prop_size == sizeof(u32)) && *prop) { + u32 divider = priv->can.clock.freq * 2 / *prop; + + if (divider > 1) + priv->cdr |= divider / 2 - 1; + else + priv->cdr |= CDR_CLKOUT_MASK; + } else { + priv->cdr |= CDR_CLK_OFF; /* default */ + } + + prop = of_get_property(np, "nxp,no-comparator-bypass", NULL); + if (!prop) + priv->cdr |= CDR_CBP; /* default */ + + priv->irq_flags = IRQF_SHARED; + priv->reg_base = base; + + dev->irq = irq; + + dev_info(&ofdev->dev, + "reg_base=0x%p irq=%d clock=%d ocr=0x%02x cdr=0x%02x\n", + priv->reg_base, dev->irq, priv->can.clock.freq, + priv->ocr, priv->cdr); + + dev_set_drvdata(&ofdev->dev, dev); + SET_NETDEV_DEV(dev, &ofdev->dev); + + err = register_sja1000dev(dev); + if (err) { + dev_err(&ofdev->dev, "registering %s failed (err=%d)\n", + DRV_NAME, err); + goto exit_free_sja1000; + } + + return 0; + +exit_free_sja1000: + free_sja1000dev(dev); +exit_dispose_irq: + irq_dispose_mapping(irq); +exit_unmap_mem: + iounmap(base); +exit_release_mem: + release_mem_region(res.start, res_size); + + return err; +} + +static struct of_device_id __devinitdata sja1000_ofp_table[] = { + {.compatible = "nxp,sja1000"}, + {}, +}; + +static struct of_platform_driver sja1000_ofp_driver = { + .owner = THIS_MODULE, + .name = DRV_NAME, + .probe = sja1000_ofp_probe, + .remove = __devexit_p(sja1000_ofp_remove), + .match_table = sja1000_ofp_table, +}; + +static int __init sja1000_ofp_init(void) +{ + return of_register_platform_driver(&sja1000_ofp_driver); +} +module_init(sja1000_ofp_init); + +static void __exit sja1000_ofp_exit(void) +{ + return of_unregister_platform_driver(&sja1000_ofp_driver); +}; +module_exit(sja1000_ofp_exit); diff --git a/include/linux/can/platform/sja1000.h b/include/linux/can/platform/sja1000.h index 37966e630ff..01ee2aeb048 100644 --- a/include/linux/can/platform/sja1000.h +++ b/include/linux/can/platform/sja1000.h @@ -13,6 +13,7 @@ #define OCR_MODE_TEST 0x01 #define OCR_MODE_NORMAL 0x02 #define OCR_MODE_CLOCK 0x03 +#define OCR_MODE_MASK 0x07 #define OCR_TX0_INVERT 0x04 #define OCR_TX0_PULLDOWN 0x08 #define OCR_TX0_PULLUP 0x10 @@ -21,6 +22,8 @@ #define OCR_TX1_PULLDOWN 0x40 #define OCR_TX1_PULLUP 0x80 #define OCR_TX1_PUSHPULL 0xc0 +#define OCR_TX_MASK 0xfc +#define OCR_TX_SHIFT 2 struct sja1000_platform_data { u32 clock; /* CAN bus oscillator frequency in Hz */ -- cgit v1.2.3-70-g09d2 From 56d417b12e57dfe11c9b7ba4bea3882c62a55815 Mon Sep 17 00:00:00 2001 From: Brian Haley Date: Mon, 1 Jun 2009 03:07:33 -0700 Subject: IPv6: Add 'autoconf' and 'disable_ipv6' module parameters Add 'autoconf' and 'disable_ipv6' parameters to the IPv6 module. The first controls if IPv6 addresses are autoconfigured from prefixes received in Router Advertisements. The IPv6 loopback (::1) and link-local addresses are still configured. The second controls if IPv6 addresses are desired at all. No IPv6 addresses will be added to any interfaces. Signed-off-by: Brian Haley Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 7 +++ Documentation/networking/ipv6.txt | 37 +++++++++++++++ include/linux/ipv6.h | 6 +++ net/ipv6/addrconf.c | 83 ++++++++++++++++++++++++++++++++-- net/ipv6/af_inet6.c | 22 +++++++-- 5 files changed, 145 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 3ffd233c369..8be76235fe6 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1057,6 +1057,13 @@ disable_ipv6 - BOOLEAN address. Default: FALSE (enable IPv6 operation) + When this value is changed from 1 to 0 (IPv6 is being enabled), + it will dynamically create a link-local address on the given + interface and start Duplicate Address Detection, if necessary. + + When this value is changed from 0 to 1 (IPv6 is being disabled), + it will dynamically delete all address on the given interface. + accept_dad - INTEGER Whether to accept DAD (Duplicate Address Detection). 0: Disable DAD diff --git a/Documentation/networking/ipv6.txt b/Documentation/networking/ipv6.txt index 268e5c103dd..9fd7e21296c 100644 --- a/Documentation/networking/ipv6.txt +++ b/Documentation/networking/ipv6.txt @@ -33,3 +33,40 @@ disable A reboot is required to enable IPv6. +autoconf + + Specifies whether to enable IPv6 address autoconfiguration + on all interfaces. This might be used when one does not wish + for addresses to be automatically generated from prefixes + received in Router Advertisements. + + The possible values and their effects are: + + 0 + IPv6 address autoconfiguration is disabled on all interfaces. + + Only the IPv6 loopback address (::1) and link-local addresses + will be added to interfaces. + + 1 + IPv6 address autoconfiguration is enabled on all interfaces. + + This is the default value. + +disable_ipv6 + + Specifies whether to disable IPv6 on all interfaces. + This might be used when no IPv6 addresses are desired. + + The possible values and their effects are: + + 0 + IPv6 is enabled on all interfaces. + + This is the default value. + + 1 + IPv6 is disabled on all interfaces. + + No IPv6 addresses will be added to interfaces. + diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 476d9464ac8..c662efa6828 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -169,6 +169,12 @@ struct ipv6_devconf { __s32 accept_dad; void *sysctl; }; + +struct ipv6_params { + __s32 disable_ipv6; + __s32 autoconf; +}; +extern struct ipv6_params ipv6_defaults; #endif /* index values for the variables in ipv6_devconf */ diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 31938e5fb22..c3488372f12 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -591,7 +591,6 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, { struct inet6_ifaddr *ifa = NULL; struct rt6_info *rt; - struct net *net = dev_net(idev->dev); int hash; int err = 0; int addr_type = ipv6_addr_type(addr); @@ -608,7 +607,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, goto out2; } - if (idev->cnf.disable_ipv6 || net->ipv6.devconf_all->disable_ipv6) { + if (idev->cnf.disable_ipv6) { err = -EACCES; goto out2; } @@ -1752,6 +1751,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) __u32 prefered_lft; int addr_type; struct inet6_dev *in6_dev; + struct net *net = dev_net(dev); pinfo = (struct prefix_info *) opt; @@ -1809,7 +1809,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) if (addrconf_finite_timeout(rt_expires)) rt_expires *= HZ; - rt = rt6_lookup(dev_net(dev), &pinfo->prefix, NULL, + rt = rt6_lookup(net, &pinfo->prefix, NULL, dev->ifindex, 1); if (rt && addrconf_is_prefix_route(rt)) { @@ -1846,7 +1846,6 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) struct inet6_ifaddr * ifp; struct in6_addr addr; int create = 0, update_lft = 0; - struct net *net = dev_net(dev); if (pinfo->prefix_len == 64) { memcpy(&addr, &pinfo->prefix, 8); @@ -3988,6 +3987,75 @@ static int addrconf_sysctl_forward_strategy(ctl_table *table, return addrconf_fixup_forwarding(table, valp, val); } +static void dev_disable_change(struct inet6_dev *idev) +{ + if (!idev || !idev->dev) + return; + + if (idev->cnf.disable_ipv6) + addrconf_notify(NULL, NETDEV_DOWN, idev->dev); + else + addrconf_notify(NULL, NETDEV_UP, idev->dev); +} + +static void addrconf_disable_change(struct net *net, __s32 newf) +{ + struct net_device *dev; + struct inet6_dev *idev; + + read_lock(&dev_base_lock); + for_each_netdev(net, dev) { + rcu_read_lock(); + idev = __in6_dev_get(dev); + if (idev) { + int changed = (!idev->cnf.disable_ipv6) ^ (!newf); + idev->cnf.disable_ipv6 = newf; + if (changed) + dev_disable_change(idev); + } + rcu_read_unlock(); + } + read_unlock(&dev_base_lock); +} + +static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int old) +{ + struct net *net; + + net = (struct net *)table->extra2; + + if (p == &net->ipv6.devconf_dflt->disable_ipv6) + return 0; + + if (!rtnl_trylock()) + return restart_syscall(); + + if (p == &net->ipv6.devconf_all->disable_ipv6) { + __s32 newf = net->ipv6.devconf_all->disable_ipv6; + net->ipv6.devconf_dflt->disable_ipv6 = newf; + addrconf_disable_change(net, newf); + } else if ((!*p) ^ (!old)) + dev_disable_change((struct inet6_dev *)table->extra1); + + rtnl_unlock(); + return 0; +} + +static +int addrconf_sysctl_disable(ctl_table *ctl, int write, struct file * filp, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int *valp = ctl->data; + int val = *valp; + int ret; + + ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + + if (write) + ret = addrconf_disable_ipv6(ctl, valp, val); + return ret; +} + static struct addrconf_sysctl_table { struct ctl_table_header *sysctl_header; @@ -4225,7 +4293,8 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.disable_ipv6, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = addrconf_sysctl_disable, + .strategy = sysctl_intvec, }, { .ctl_name = CTL_UNNUMBERED, @@ -4346,6 +4415,10 @@ static int addrconf_init_net(struct net *net) dflt = kmemdup(dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL); if (dflt == NULL) goto err_alloc_dflt; + } else { + /* these will be inherited by all namespaces */ + dflt->autoconf = ipv6_defaults.autoconf; + dflt->disable_ipv6 = ipv6_defaults.disable_ipv6; } net->ipv6.devconf_all = all; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index b6215be0963..85b3d0036af 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -72,9 +72,21 @@ MODULE_LICENSE("GPL"); static struct list_head inetsw6[SOCK_MAX]; static DEFINE_SPINLOCK(inetsw6_lock); -static int disable_ipv6 = 0; -module_param_named(disable, disable_ipv6, int, 0); -MODULE_PARM_DESC(disable, "Disable IPv6 such that it is non-functional"); +struct ipv6_params ipv6_defaults = { + .disable_ipv6 = 0, + .autoconf = 1, +}; + +static int disable_ipv6_mod = 0; + +module_param_named(disable, disable_ipv6_mod, int, 0444); +MODULE_PARM_DESC(disable, "Disable IPv6 module such that it is non-functional"); + +module_param_named(disable_ipv6, ipv6_defaults.disable_ipv6, int, 0444); +MODULE_PARM_DESC(disable_ipv6, "Disable IPv6 on all interfaces"); + +module_param_named(autoconf, ipv6_defaults.autoconf, int, 0444); +MODULE_PARM_DESC(autoconf, "Enable IPv6 address autoconfiguration on all interfaces"); static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk) { @@ -1038,7 +1050,7 @@ static int __init inet6_init(void) for(r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r) INIT_LIST_HEAD(r); - if (disable_ipv6) { + if (disable_ipv6_mod) { printk(KERN_INFO "IPv6: Loaded, but administratively disabled, " "reboot required to enable\n"); @@ -1227,7 +1239,7 @@ module_init(inet6_init); static void __exit inet6_exit(void) { - if (disable_ipv6) + if (disable_ipv6_mod) return; /* First of all disallow new sockets creation. */ -- cgit v1.2.3-70-g09d2 From 22a4f650d686eeaac3629dae1c4294381485efdf Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 1 Jun 2009 10:13:37 +0200 Subject: perf_counter: Tidy up style details - whitespace fixlets - make local variable definitions more consistent [ Impact: cleanup ] Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: John Kacur LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 +- kernel/perf_counter.c | 39 +++++++++++++++++++++------------------ 2 files changed, 22 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 81ec79c9f19..0e57d8cc5a3 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -562,7 +562,7 @@ struct perf_cpu_context { * * task, softirq, irq, nmi context */ - int recursion[4]; + int recursion[4]; }; #ifdef CONFIG_PERF_COUNTERS diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index ff8b4636f84..df319c48c52 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -16,8 +16,9 @@ #include #include #include -#include +#include #include +#include #include #include #include @@ -26,7 +27,6 @@ #include #include #include -#include #include @@ -65,7 +65,9 @@ void __weak hw_perf_disable(void) { barrier(); } void __weak hw_perf_enable(void) { barrier(); } void __weak hw_perf_counter_setup(int cpu) { barrier(); } -int __weak hw_perf_group_sched_in(struct perf_counter *group_leader, + +int __weak +hw_perf_group_sched_in(struct perf_counter *group_leader, struct perf_cpu_context *cpuctx, struct perf_counter_context *ctx, int cpu) { @@ -127,8 +129,8 @@ static void put_ctx(struct perf_counter_context *ctx) * This has to cope with with the fact that until it is locked, * the context could get moved to another task. */ -static struct perf_counter_context *perf_lock_task_context( - struct task_struct *task, unsigned long *flags) +static struct perf_counter_context * +perf_lock_task_context(struct task_struct *task, unsigned long *flags) { struct perf_counter_context *ctx; @@ -1330,9 +1332,9 @@ __perf_counter_init_context(struct perf_counter_context *ctx, static struct perf_counter_context *find_get_context(pid_t pid, int cpu) { - struct perf_cpu_context *cpuctx; - struct perf_counter_context *ctx; struct perf_counter_context *parent_ctx; + struct perf_counter_context *ctx; + struct perf_cpu_context *cpuctx; struct task_struct *task; unsigned long flags; int err; @@ -1664,8 +1666,8 @@ int perf_counter_task_disable(void) */ void perf_counter_update_userpage(struct perf_counter *counter) { - struct perf_mmap_data *data; struct perf_counter_mmap_page *userpg; + struct perf_mmap_data *data; rcu_read_lock(); data = rcu_dereference(counter->data); @@ -1769,10 +1771,11 @@ fail: static void __perf_mmap_data_free(struct rcu_head *rcu_head) { - struct perf_mmap_data *data = container_of(rcu_head, - struct perf_mmap_data, rcu_head); + struct perf_mmap_data *data; int i; + data = container_of(rcu_head, struct perf_mmap_data, rcu_head); + free_page((unsigned long)data->user_page); for (i = 0; i < data->nr_pages; i++) free_page((unsigned long)data->data_pages[i]); @@ -1801,8 +1804,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) struct perf_counter *counter = vma->vm_file->private_data; WARN_ON_ONCE(counter->ctx->parent_ctx); - if (atomic_dec_and_mutex_lock(&counter->mmap_count, - &counter->mmap_mutex)) { + if (atomic_dec_and_mutex_lock(&counter->mmap_count, &counter->mmap_mutex)) { struct user_struct *user = current_user(); atomic_long_sub(counter->data->nr_pages + 1, &user->locked_vm); @@ -1821,11 +1823,11 @@ static struct vm_operations_struct perf_mmap_vmops = { static int perf_mmap(struct file *file, struct vm_area_struct *vma) { struct perf_counter *counter = file->private_data; + unsigned long user_locked, user_lock_limit; struct user_struct *user = current_user(); + unsigned long locked, lock_limit; unsigned long vma_size; unsigned long nr_pages; - unsigned long user_locked, user_lock_limit; - unsigned long locked, lock_limit; long user_extra, extra; int ret = 0; @@ -1900,8 +1902,8 @@ unlock: static int perf_fasync(int fd, struct file *filp, int on) { - struct perf_counter *counter = filp->private_data; struct inode *inode = filp->f_path.dentry->d_inode; + struct perf_counter *counter = filp->private_data; int retval; mutex_lock(&inode->i_mutex); @@ -2412,8 +2414,8 @@ static void perf_counter_output(struct perf_counter *counter, */ struct perf_comm_event { - struct task_struct *task; - char *comm; + struct task_struct *task; + char *comm; int comm_size; struct { @@ -2932,6 +2934,7 @@ static void perf_swcounter_add(struct perf_counter *counter, u64 nr, int nmi, struct pt_regs *regs, u64 addr) { int neg = atomic64_add_negative(nr, &counter->hw.count); + if (counter->hw.irq_period && !neg) perf_swcounter_overflow(counter, nmi, regs, addr); } @@ -3526,7 +3529,7 @@ inherit_counter(struct perf_counter *parent_counter, /* * Make the child state follow the state of the parent counter, * not its hw_event.disabled bit. We hold the parent's mutex, - * so we won't race with perf_counter_{en,dis}able_family. + * so we won't race with perf_counter_{en, dis}able_family. */ if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE) child_counter->state = PERF_COUNTER_STATE_INACTIVE; -- cgit v1.2.3-70-g09d2 From fb39125fd79a25c5002f3b45cf4c80e3fa6b961b Mon Sep 17 00:00:00 2001 From: Zhaolei Date: Fri, 17 Apr 2009 15:15:51 +0800 Subject: ftrace, workqueuetrace: make workqueue tracepoints use TRACE_EVENT macro v3: zhaolei@cn.fujitsu.com: Change TRACE_EVENT definition to new format introduced by Steven Rostedt: consolidate trace and trace_event headers v2: kosaki@jp.fujitsu.com: print the function names instead of addr, and zap the work addr v1: zhaolei@cn.fujitsu.com: Make workqueue tracepoints use TRACE_EVENT macro TRACE_EVENT is a more generic way to define tracepoints. Doing so adds these new capabilities to the tracepoints: - zero-copy and per-cpu splice() tracing - binary tracing without printf overhead - structured logging records exposed under /debug/tracing/events - trace events embedded in function tracer output and other plugins - user-defined, per tracepoint filter expressions Then, this patch converts DEFINE_TRACE to TRACE_EVENT in workqueue related tracepoints. [ Impact: expand workqueue tracer to events tracing ] Signed-off-by: Zhao Lei Cc: Steven Rostedt Cc: Tom Zanussi Cc: Oleg Nesterov Cc: Andrew Morton Signed-off-by: KOSAKI Motohiro Signed-off-by: Frederic Weisbecker --- include/trace/events/workqueue.h | 100 +++++++++++++++++++++++++++++++++++++++ include/trace/workqueue.h | 25 ---------- kernel/trace/trace_workqueue.c | 2 +- kernel/workqueue.c | 11 +---- 4 files changed, 103 insertions(+), 35 deletions(-) create mode 100644 include/trace/events/workqueue.h delete mode 100644 include/trace/workqueue.h (limited to 'include') diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h new file mode 100644 index 00000000000..035f1bff288 --- /dev/null +++ b/include/trace/events/workqueue.h @@ -0,0 +1,100 @@ +#if !defined(_TRACE_WORKQUEUE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_WORKQUEUE_H + +#include +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM workqueue + +TRACE_EVENT(workqueue_insertion, + + TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), + + TP_ARGS(wq_thread, work), + + TP_STRUCT__entry( + __array(char, thread_comm, TASK_COMM_LEN) + __field(pid_t, thread_pid) + __field(work_func_t, func) + ), + + TP_fast_assign( + memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN); + __entry->thread_pid = wq_thread->pid; + __entry->func = work->func; + ), + + TP_printk("thread=%s:%d func=%pF", __entry->thread_comm, + __entry->thread_pid, __entry->func) +); + +TRACE_EVENT(workqueue_execution, + + TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), + + TP_ARGS(wq_thread, work), + + TP_STRUCT__entry( + __array(char, thread_comm, TASK_COMM_LEN) + __field(pid_t, thread_pid) + __field(work_func_t, func) + ), + + TP_fast_assign( + memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN); + __entry->thread_pid = wq_thread->pid; + __entry->func = work->func; + ), + + TP_printk("thread=%s:%d func=%pF", __entry->thread_comm, + __entry->thread_pid, __entry->func) +); + +/* Trace the creation of one workqueue thread on a cpu */ +TRACE_EVENT(workqueue_creation, + + TP_PROTO(struct task_struct *wq_thread, int cpu), + + TP_ARGS(wq_thread, cpu), + + TP_STRUCT__entry( + __array(char, thread_comm, TASK_COMM_LEN) + __field(pid_t, thread_pid) + __field(int, cpu) + ), + + TP_fast_assign( + memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN); + __entry->thread_pid = wq_thread->pid; + __entry->cpu = cpu; + ), + + TP_printk("thread=%s:%d cpu=%d", __entry->thread_comm, + __entry->thread_pid, __entry->cpu) +); + +TRACE_EVENT(workqueue_destruction, + + TP_PROTO(struct task_struct *wq_thread), + + TP_ARGS(wq_thread), + + TP_STRUCT__entry( + __array(char, thread_comm, TASK_COMM_LEN) + __field(pid_t, thread_pid) + ), + + TP_fast_assign( + memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN); + __entry->thread_pid = wq_thread->pid; + ), + + TP_printk("thread=%s:%d", __entry->thread_comm, __entry->thread_pid) +); + +#endif /* _TRACE_WORKQUEUE_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/workqueue.h b/include/trace/workqueue.h deleted file mode 100644 index 7626523deeb..00000000000 --- a/include/trace/workqueue.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef __TRACE_WORKQUEUE_H -#define __TRACE_WORKQUEUE_H - -#include -#include -#include - -DECLARE_TRACE(workqueue_insertion, - TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), - TP_ARGS(wq_thread, work)); - -DECLARE_TRACE(workqueue_execution, - TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), - TP_ARGS(wq_thread, work)); - -/* Trace the creation of one workqueue thread on a cpu */ -DECLARE_TRACE(workqueue_creation, - TP_PROTO(struct task_struct *wq_thread, int cpu), - TP_ARGS(wq_thread, cpu)); - -DECLARE_TRACE(workqueue_destruction, - TP_PROTO(struct task_struct *wq_thread), - TP_ARGS(wq_thread)); - -#endif /* __TRACE_WORKQUEUE_H */ diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c index 984b9175c13..cfe56d31d85 100644 --- a/kernel/trace/trace_workqueue.c +++ b/kernel/trace/trace_workqueue.c @@ -6,7 +6,7 @@ */ -#include +#include #include #include #include "trace_stat.h" diff --git a/kernel/workqueue.c b/kernel/workqueue.c index f71fb2a0895..0668795d881 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -33,7 +33,8 @@ #include #include #include -#include +#define CREATE_TRACE_POINTS +#include /* * The per-CPU workqueue (if single thread, we always use the first @@ -124,8 +125,6 @@ struct cpu_workqueue_struct *get_wq_data(struct work_struct *work) return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK); } -DEFINE_TRACE(workqueue_insertion); - static void insert_work(struct cpu_workqueue_struct *cwq, struct work_struct *work, struct list_head *head) { @@ -262,8 +261,6 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq, } EXPORT_SYMBOL_GPL(queue_delayed_work_on); -DEFINE_TRACE(workqueue_execution); - static void run_workqueue(struct cpu_workqueue_struct *cwq) { spin_lock_irq(&cwq->lock); @@ -753,8 +750,6 @@ init_cpu_workqueue(struct workqueue_struct *wq, int cpu) return cwq; } -DEFINE_TRACE(workqueue_creation); - static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) { struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; @@ -860,8 +855,6 @@ struct workqueue_struct *__create_workqueue_key(const char *name, } EXPORT_SYMBOL_GPL(__create_workqueue_key); -DEFINE_TRACE(workqueue_destruction); - static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq) { /* -- cgit v1.2.3-70-g09d2 From 6e25db44a7ad7eb380f4ec774ec00a8fcddea112 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 29 May 2009 11:24:59 +0800 Subject: tracing/events: fix a typo in __string() format output "tsize" should be "\tsize". Also remove the space before "__str_loc". Before: # cat tracing/events/irq/irq_handler_entry/format ... field:int irq; offset:12; size:4; field: __str_loc name; offset:16;tsize:2; ... After: # cat tracing/events/irq/irq_handler_entry/format ... field:int irq; offset:12; size:4; field:__str_loc name; offset:16; size:2; ... [ Impact: standardize __string field description in events format file ] Signed-off-by: Li Zefan Signed-off-by: Frederic Weisbecker Signed-off-by: Steven Rostedt --- include/trace/ftrace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index b4ec83ae711..9276ec4f34d 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -209,8 +209,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ #undef __string #define __string(item, src) \ - ret = trace_seq_printf(s, "\tfield: __str_loc " #item ";\t" \ - "offset:%u;tsize:%u;\n", \ + ret = trace_seq_printf(s, "\tfield:__str_loc " #item ";\t" \ + "offset:%u;\tsize:%u;\n", \ (unsigned int)offsetof(typeof(field), \ __str_loc_##item), \ (unsigned int)sizeof(field.__str_loc_##item)); \ -- cgit v1.2.3-70-g09d2 From a9c1c3abe1160a5632e48c929b02b740556bf423 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 1 Jun 2009 15:35:13 +0800 Subject: tracing/events: put TP_fast_assign into braces Currently TP_fast_assign has a limitation that we can't define local variables in it. Here's one use case when we introduce __dynamic_array(): TP_fast_assign( type *p = __get_dynamic_array(item); foo(p); bar(p); ), [ Impact: allow defining local variables in TP_fast_assign ] Signed-off-by: Li Zefan LKML-Reference: <4A2384B1.90100@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- include/trace/ftrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 9276ec4f34d..ee926822244 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -471,7 +471,7 @@ static void ftrace_raw_event_##call(proto) \ return; \ entry = ring_buffer_event_data(event); \ \ - assign; \ + { assign; } \ \ if (!filter_current_check_discard(event_call, entry, event)) \ trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \ -- cgit v1.2.3-70-g09d2 From 7fcb7c472f455d1711eb5a7633204dba8800a6d6 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 1 Jun 2009 15:35:46 +0800 Subject: tracing/events: introduce __dynamic_array() __string() is limited: - it's a char array, but we may want to define array with other types - a source string should be available, but we may just know the string size We introduce __dynamic_array() to break those limitations, and __string() becomes a wrapper of it. As a side effect, now __get_str() can be used in TP_fast_assign but not only TP_print. Take XFS for example, we have the string length in the dirent, but the string itself is not NULL-terminated, so __dynamic_array() can be used: TRACE_EVENT(xfs_dir2, TP_PROTO(struct xfs_da_args *args), TP_ARGS(args), TP_STRUCT__entry( __field(int, namelen) __dynamic_array(char, name, args->namelen + 1) ... ), TP_fast_assign( char *name = __get_str(name); if (args->namelen) memcpy(name, args->name, args->namelen); name[args->namelen] = '\0'; __entry->namelen = args->namelen; ), TP_printk("name %.*s namelen %d", __entry->namelen ? __get_str(name) : NULL __entry->namelen) ); [ Impact: allow defining dynamic size arrays ] Signed-off-by: Li Zefan LKML-Reference: <4A2384D2.3080403@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- include/trace/ftrace.h | 122 ++++++++++++++++++++++++++----------- kernel/trace/trace_events_filter.c | 6 +- 2 files changed, 91 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index ee926822244..b5478dab579 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -18,14 +18,17 @@ #include +#undef __field +#define __field(type, item) type item; + #undef __array #define __array(type, item, len) type item[len]; -#undef __field -#define __field(type, item) type item; +#undef __dynamic_array +#define __dynamic_array(type, item, len) unsigned short __data_loc_##item; #undef __string -#define __string(item, src) unsigned short __str_loc_##item; +#define __string(item, src) __dynamic_array(char, item, -1) #undef TP_STRUCT__entry #define TP_STRUCT__entry(args...) args @@ -35,7 +38,7 @@ struct ftrace_raw_##name { \ struct trace_entry ent; \ tstruct \ - char __str_data[0]; \ + char __data[0]; \ }; \ static struct ftrace_event_call event_##name @@ -47,30 +50,31 @@ * * Include the following: * - * struct ftrace_str_offsets_ { - * int ; - * int ; + * struct ftrace_data_offsets_ { + * int ; + * int ; * [...] * }; * - * The __string() macro will create each int , this is to - * keep the offset of each string from the beggining of the event - * once we perform the strlen() of the src strings. - * + * The __dynamic_array() macro will create each int , this is + * to keep the offset of each array from the beginning of the event. */ +#undef __field +#define __field(type, item); + #undef __array #define __array(type, item, len) -#undef __field -#define __field(type, item); +#undef __dynamic_array +#define __dynamic_array(type, item, len) int item; #undef __string -#define __string(item, src) int item; +#define __string(item, src) __dynamic_array(char, item, -1) #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ - struct ftrace_str_offsets_##call { \ + struct ftrace_data_offsets_##call { \ tstruct; \ }; @@ -119,8 +123,12 @@ #undef TP_printk #define TP_printk(fmt, args...) fmt "\n", args +#undef __get_dynamic_array +#define __get_dynamic_array(field) \ + ((void *)__entry + __entry->__data_loc_##field) + #undef __get_str -#define __get_str(field) ((char *)__entry + __entry->__str_loc_##field) +#define __get_str(field) (char *)__get_dynamic_array(field) #undef __print_flags #define __print_flags(flag, delim, flag_array...) \ @@ -207,16 +215,19 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ if (!ret) \ return 0; -#undef __string -#define __string(item, src) \ - ret = trace_seq_printf(s, "\tfield:__str_loc " #item ";\t" \ +#undef __dynamic_array +#define __dynamic_array(type, item, len) \ + ret = trace_seq_printf(s, "\tfield:__data_loc " #item ";\t" \ "offset:%u;\tsize:%u;\n", \ (unsigned int)offsetof(typeof(field), \ - __str_loc_##item), \ - (unsigned int)sizeof(field.__str_loc_##item)); \ + __data_loc_##item), \ + (unsigned int)sizeof(field.__data_loc_##item)); \ if (!ret) \ return 0; +#undef __string +#define __string(item, src) __dynamic_array(char, item, -1) + #undef __entry #define __entry REC @@ -260,11 +271,14 @@ ftrace_format_##call(struct trace_seq *s) \ if (ret) \ return ret; +#undef __dynamic_array +#define __dynamic_array(type, item, len) \ + ret = trace_define_field(event_call, "__data_loc" "[" #type "]", #item,\ + offsetof(typeof(field), __data_loc_##item), \ + sizeof(field.__data_loc_##item), 0); + #undef __string -#define __string(item, src) \ - ret = trace_define_field(event_call, "__str_loc", #item, \ - offsetof(typeof(field), __str_loc_##item), \ - sizeof(field.__str_loc_##item), 0); +#define __string(item, src) __dynamic_array(char, item, -1) #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, func, print) \ @@ -288,6 +302,43 @@ ftrace_define_fields_##call(void) \ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) +/* + * remember the offset of each array from the beginning of the event. + */ + +#undef __entry +#define __entry entry + +#undef __field +#define __field(type, item) + +#undef __array +#define __array(type, item, len) + +#undef __dynamic_array +#define __dynamic_array(type, item, len) \ + __data_offsets->item = __data_size + \ + offsetof(typeof(*entry), __data); \ + __data_size += (len) * sizeof(type); + +#undef __string +#define __string(item, src) __dynamic_array(char, item, strlen(src) + 1) \ + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +static inline int ftrace_get_offsets_##call( \ + struct ftrace_data_offsets_##call *__data_offsets, proto) \ +{ \ + int __data_size = 0; \ + struct ftrace_raw_##call __maybe_unused *entry; \ + \ + tstruct; \ + \ + return __data_size; \ +} + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + /* * Stage 4 of the trace events. * @@ -432,15 +483,15 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ #undef __array #define __array(type, item, len) +#undef __dynamic_array +#define __dynamic_array(type, item, len) \ + __entry->__data_loc_##item = __data_offsets.item; + #undef __string -#define __string(item, src) \ - __str_offsets.item = __str_size + \ - offsetof(typeof(*entry), __str_data); \ - __str_size += strlen(src) + 1; +#define __string(item, src) __dynamic_array(char, item, -1) \ #undef __assign_str #define __assign_str(dst, src) \ - __entry->__str_loc_##dst = __str_offsets.dst; \ strcpy(__get_str(dst), src); #undef TRACE_EVENT @@ -451,26 +502,29 @@ static struct ftrace_event_call event_##call; \ \ static void ftrace_raw_event_##call(proto) \ { \ - struct ftrace_str_offsets_##call __maybe_unused __str_offsets; \ + struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ struct ftrace_event_call *event_call = &event_##call; \ struct ring_buffer_event *event; \ struct ftrace_raw_##call *entry; \ unsigned long irq_flags; \ - int __str_size = 0; \ + int __data_size; \ int pc; \ \ local_save_flags(irq_flags); \ pc = preempt_count(); \ \ - tstruct; \ + __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ \ event = trace_current_buffer_lock_reserve(event_##call.id, \ - sizeof(struct ftrace_raw_##call) + __str_size,\ + sizeof(*entry) + __data_size, \ irq_flags, pc); \ if (!event) \ return; \ entry = ring_buffer_event_data(event); \ \ + \ + tstruct \ + \ { assign; } \ \ if (!filter_current_check_discard(event_call, entry, event)) \ diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index a7430b16d24..db6e54bdb59 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -478,12 +478,12 @@ enum { static int is_string_field(const char *type) { + if (strstr(type, "__data_loc") && strstr(type, "char")) + return FILTER_DYN_STRING; + if (strchr(type, '[') && strstr(type, "char")) return FILTER_STATIC_STRING; - if (!strcmp(type, "__str_loc")) - return FILTER_DYN_STRING; - return 0; } -- cgit v1.2.3-70-g09d2 From 1d080d6c3141623c92caaebe20e847cb99ccbb60 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 1 Jun 2009 12:20:40 -0400 Subject: tracing: remove redundant SOFTIRQ from softirq event traces After converting the softirq tracer to use te flags options, this caused a regression with the name. Since the flag was used directly it was printed out (i.e. HRTIMER_SOFTIRQ). This patch only shows the softirq name without the SOFTIRQ part. [ Impact: fix regression of output from softirq events ] Signed-off-by: Steven Rostedt --- include/trace/events/irq.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index 683fb36a994..b0c7ede55eb 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -7,18 +7,18 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM irq -#define softirq_name(sirq) { sirq, #sirq } -#define show_softirq_name(val) \ - __print_symbolic(val, \ - softirq_name(HI_SOFTIRQ), \ - softirq_name(TIMER_SOFTIRQ), \ - softirq_name(NET_TX_SOFTIRQ), \ - softirq_name(NET_RX_SOFTIRQ), \ - softirq_name(BLOCK_SOFTIRQ), \ - softirq_name(TASKLET_SOFTIRQ), \ - softirq_name(SCHED_SOFTIRQ), \ - softirq_name(HRTIMER_SOFTIRQ), \ - softirq_name(RCU_SOFTIRQ)) +#define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq } +#define show_softirq_name(val) \ + __print_symbolic(val, \ + softirq_name(HI), \ + softirq_name(TIMER), \ + softirq_name(NET_TX), \ + softirq_name(NET_RX), \ + softirq_name(BLOCK), \ + softirq_name(TASKLET), \ + softirq_name(SCHED), \ + softirq_name(HRTIMER), \ + softirq_name(RCU)) /** * irq_handler_entry - called immediately before the irq action handler -- cgit v1.2.3-70-g09d2 From 112f38a7e36e9d688b389507136bf3af3e6d159b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 1 Jun 2009 15:16:05 -0400 Subject: tracing: make trace pipe recognize latency format flag The trace_pipe did not recognize the latency format flag and would produce different output than the trace file. The problem was partly due that the trace flags in the iterator was not set as well as the trace_pipe zeros out part of the iterator (including the flags) to be able to use the same routines as the trace file. trace_flags of the iterator should not cause any problems when not zeroed out by for trace_pipe. Reported-by: Johannes Berg Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 2 +- kernel/trace/trace.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index bbf40f624fc..5c093ffc655 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -51,6 +51,7 @@ struct trace_iterator { int cpu_file; struct mutex mutex; struct ring_buffer_iter *buffer_iter[NR_CPUS]; + unsigned long iter_flags; /* The below is zeroed out in pipe_read */ struct trace_seq seq; @@ -58,7 +59,6 @@ struct trace_iterator { int cpu; u64 ts; - unsigned long iter_flags; loff_t pos; long idx; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a3a8a87d7e9..cae34c69752 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2826,6 +2826,9 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) /* trace pipe does not show start of buffer */ cpumask_setall(iter->started); + if (trace_flags & TRACE_ITER_LATENCY_FMT) + iter->iter_flags |= TRACE_FILE_LAT_FMT; + iter->cpu_file = cpu_file; iter->tr = &global_trace; mutex_init(&iter->mutex); -- cgit v1.2.3-70-g09d2 From f771bef98004d9d141b085d987a77d06669d4f4f Mon Sep 17 00:00:00 2001 From: Nivedita Singhvi Date: Thu, 28 May 2009 07:00:46 +0000 Subject: ipv4: New multicast-all socket option After some discussion offline with Christoph Lameter and David Stevens regarding multicast behaviour in Linux, I'm submitting a slightly modified patch from the one Christoph submitted earlier. This patch provides a new socket option IP_MULTICAST_ALL. In this case, default behaviour is _unchanged_ from the current Linux standard. The socket option is set by default to provide original behaviour. Sockets wishing to receive data only from multicast groups they join explicitly will need to clear this socket option. Signed-off-by: Nivedita Singhvi Signed-off-by: Christoph Lameter Acked-by: David Stevens Signed-off-by: David S. Miller --- include/linux/in.h | 1 + include/net/inet_sock.h | 3 ++- net/ipv4/af_inet.c | 1 + net/ipv4/igmp.c | 2 +- net/ipv4/ip_sockglue.c | 11 +++++++++++ 5 files changed, 16 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/in.h b/include/linux/in.h index d60122a3a08..cf196da04ec 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -107,6 +107,7 @@ struct in_addr { #define MCAST_JOIN_SOURCE_GROUP 46 #define MCAST_LEAVE_SOURCE_GROUP 47 #define MCAST_MSFILTER 48 +#define IP_MULTICAST_ALL 49 #define MCAST_EXCLUDE 0 #define MCAST_INCLUDE 1 diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index de0ecc71cf0..20a6957af87 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -130,7 +130,8 @@ struct inet_sock { freebind:1, hdrincl:1, mc_loop:1, - transparent:1; + transparent:1, + mc_all:1; int mc_index; __be32 mc_addr; struct ip_mc_socklist *mc_list; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 5abee4c9744..d8736217858 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -375,6 +375,7 @@ lookup_protocol: inet->uc_ttl = -1; inet->mc_loop = 1; inet->mc_ttl = 1; + inet->mc_all = 1; inet->mc_index = 0; inet->mc_list = NULL; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 9eb6219af61..e6058a50379 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2196,7 +2196,7 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif) break; } if (!pmc) - return 1; + return inet->mc_all; psl = pmc->sflist; if (!psl) return pmc->sfmode == MCAST_EXCLUDE; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 21b0187123d..cb49936856e 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -453,6 +453,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, (1<= sizeof(int)) { @@ -898,6 +899,13 @@ mc_msf_out: kfree(gsf); break; } + case IP_MULTICAST_ALL: + if (optlen < 1) + goto e_inval; + if (val != 0 && val != 1) + goto e_inval; + inet->mc_all = val; + break; case IP_ROUTER_ALERT: err = ip_ra_control(sk, val ? 1 : 0, NULL); break; @@ -1151,6 +1159,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, release_sock(sk); return err; } + case IP_MULTICAST_ALL: + val = inet->mc_all; + break; case IP_PKTOPTIONS: { struct msghdr msg; -- cgit v1.2.3-70-g09d2 From 10a8ebbb08c4b08292598947bbe534e04d6ee705 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Tue, 2 Jun 2009 12:02:38 +0200 Subject: ALSA: Core - add snd_card_set_id() function Introduce snd_card_set_id() function to allow lowlevel drivers to set default identification name for card slot. The function checks also for identification name collisions and tries to create unique name. Also, the snd_card_create() function is simplified, because this new function is used. As bonus, proper name collision checks are evaluated at the card create time. Signed-off-by: Jaroslav Kysela Signed-off-by: Takashi Iwai --- include/sound/core.h | 1 + sound/core/init.c | 62 +++++++++++++++++++++++++++++++++------------------- 2 files changed, 40 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/sound/core.h b/include/sound/core.h index 3dea79829ac..0e8ae10155a 100644 --- a/include/sound/core.h +++ b/include/sound/core.h @@ -313,6 +313,7 @@ struct snd_card *snd_card_new(int idx, const char *id, int snd_card_disconnect(struct snd_card *card); int snd_card_free(struct snd_card *card); int snd_card_free_when_closed(struct snd_card *card); +void snd_card_set_id(struct snd_card *card, const char *id); int snd_card_register(struct snd_card *card); int snd_card_info_init(void); int snd_card_info_done(void); diff --git a/sound/core/init.c b/sound/core/init.c index fd56afe846e..6557dd85e19 100644 --- a/sound/core/init.c +++ b/sound/core/init.c @@ -152,15 +152,8 @@ int snd_card_create(int idx, const char *xid, card = kzalloc(sizeof(*card) + extra_size, GFP_KERNEL); if (!card) return -ENOMEM; - if (xid) { - if (!snd_info_check_reserved_words(xid)) { - snd_printk(KERN_ERR - "given id string '%s' is reserved.\n", xid); - err = -EBUSY; - goto __error; - } - strlcpy(card->id, xid, sizeof(card->id)); - } + if (xid) + snd_card_set_id(card, xid); err = 0; mutex_lock(&snd_card_mutex); if (idx < 0) { @@ -483,22 +476,39 @@ int snd_card_free(struct snd_card *card) EXPORT_SYMBOL(snd_card_free); -static void choose_default_id(struct snd_card *card) +/** + * snd_card_set_id - set card identification name + * @card: soundcard structure + * @nid: new identification string + * + * This function sets the card identification and checks for name + * collisions. + */ +void snd_card_set_id(struct snd_card *card, const char *nid) { int i, len, idx_flag = 0, loops = SNDRV_CARDS; - char *id, *spos; + const char *spos, *src; + char *id; - id = spos = card->shortname; - while (*id != '\0') { - if (*id == ' ') - spos = id + 1; - id++; + /* check if user specified own card->id */ + if (card->id[0] != '\0') + return; + if (nid == NULL) { + id = card->shortname; + spos = src = id; + while (*id != '\0') { + if (*id == ' ') + spos = id + 1; + id++; + } + } else { + spos = src = nid; } id = card->id; while (*spos != '\0' && !isalnum(*spos)) spos++; if (isdigit(*spos)) - *id++ = isalpha(card->shortname[0]) ? card->shortname[0] : 'D'; + *id++ = isalpha(src[0]) ? src[0] : 'D'; while (*spos != '\0' && (size_t)(id - card->id) < sizeof(card->id) - 1) { if (isalnum(*spos)) *id++ = *spos; @@ -513,16 +523,20 @@ static void choose_default_id(struct snd_card *card) while (1) { if (loops-- == 0) { - snd_printk(KERN_ERR "unable to choose default card id (%s)\n", id); + snd_printk(KERN_ERR "unable to set card id (%s)\n", id); strcpy(card->id, card->proc_root->name); return; } if (!snd_info_check_reserved_words(id)) goto __change; + mutex_lock(&snd_card_mutex); for (i = 0; i < snd_ecards_limit; i++) { - if (snd_cards[i] && !strcmp(snd_cards[i]->id, id)) + if (snd_cards[i] && !strcmp(snd_cards[i]->id, id)) { + mutex_unlock(&snd_card_mutex); goto __change; + } } + mutex_unlock(&snd_card_mutex); break; __change: @@ -539,14 +553,16 @@ static void choose_default_id(struct snd_card *card) spos = id + len - 2; if ((size_t)len <= sizeof(card->id) - 2) spos++; - *spos++ = '_'; - *spos++ = '1'; - *spos++ = '\0'; + *(char *)spos++ = '_'; + *(char *)spos++ = '1'; + *(char *)spos++ = '\0'; idx_flag++; } } } +EXPORT_SYMBOL(snd_card_set_id); + #ifndef CONFIG_SYSFS_DEPRECATED static ssize_t card_id_show_attr(struct device *dev, @@ -641,7 +657,7 @@ int snd_card_register(struct snd_card *card) return 0; } if (card->id[0] == '\0') - choose_default_id(card); + snd_card_set_id(card, NULL); snd_cards[card->number] = card; mutex_unlock(&snd_card_mutex); init_info_for_card(card); -- cgit v1.2.3-70-g09d2 From 0e0ee1cc33de8f0cc603269b354085dee340afa0 Mon Sep 17 00:00:00 2001 From: Dmitry Pervushin Date: Wed, 29 Apr 2009 19:29:38 +0400 Subject: UBI: add notification API UBI volume notifications are intended to create the API to get clients notified about volume creation/deletion, renaming and re-sizing. A client can subscribe to these notifications using 'ubi_volume_register()' and cancel the subscription using 'ubi_volume_unregister()'. When UBI volumes change, a blocking notifier is called. Clients also can request "added" events on all volumes that existed before client subscribed to the notifications. If we use notifications instead of calling functions like 'ubi_gluebi_xxx()', we can make the MTD emulation layer to be more flexible: build it as a separate module and load/unload it on demand. [Artem: many cleanups, rework locking, add "updated" event, provide device/volume info in notifiers] Signed-off-by: Dmitry Pervushin Signed-off-by: Artem Bityutskiy --- drivers/mtd/ubi/build.c | 100 +++++++++++++++++++++++++++++++++++++++++--- drivers/mtd/ubi/cdev.c | 1 + drivers/mtd/ubi/kapi.c | 108 +++++++++++++++++++++++++++++++++++++++++------- drivers/mtd/ubi/ubi.h | 12 ++++++ drivers/mtd/ubi/vmt.c | 4 ++ include/linux/mtd/ubi.h | 37 +++++++++++++++++ 6 files changed, 241 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index d3da6668266..964a99d48bc 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -121,6 +121,94 @@ static struct device_attribute dev_bgt_enabled = static struct device_attribute dev_mtd_num = __ATTR(mtd_num, S_IRUGO, dev_attribute_show, NULL); +/** + * ubi_volume_notify - send a volume change notification. + * @ubi: UBI device description object + * @vol: volume description object of the changed volume + * @ntype: notification type to send (%UBI_VOLUME_ADDED, etc) + * + * This is a helper function which notifies all subscribers about a volume + * change event (creation, removal, re-sizing, re-naming, updating). Returns + * zero in case of success and a negative error code in case of failure. + */ +int ubi_volume_notify(struct ubi_device *ubi, struct ubi_volume *vol, int ntype) +{ + struct ubi_notification nt; + + ubi_do_get_device_info(ubi, &nt.di); + ubi_do_get_volume_info(ubi, vol, &nt.vi); + return blocking_notifier_call_chain(&ubi_notifiers, ntype, &nt); +} + +/** + * ubi_notify_all - send a notification to all volumes. + * @ubi: UBI device description object + * @ntype: notification type to send (%UBI_VOLUME_ADDED, etc) + * @nb: the notifier to call + * + * This function walks all volumes of UBI device @ubi and sends the @ntype + * notification for each volume. If @nb is %NULL, then all registered notifiers + * are called, otherwise only the @nb notifier is called. Returns the number of + * sent notifications. + */ +int ubi_notify_all(struct ubi_device *ubi, int ntype, struct notifier_block *nb) +{ + struct ubi_notification nt; + int i, count = 0; + + ubi_do_get_device_info(ubi, &nt.di); + + mutex_lock(&ubi->device_mutex); + for (i = 0; i < ubi->vtbl_slots; i++) { + /* + * Since the @ubi->device is locked, and we are not going to + * change @ubi->volumes, we do not have to lock + * @ubi->volumes_lock. + */ + if (!ubi->volumes[i]) + continue; + + ubi_do_get_volume_info(ubi, ubi->volumes[i], &nt.vi); + if (nb) + nb->notifier_call(nb, ntype, &nt); + else + blocking_notifier_call_chain(&ubi_notifiers, ntype, + &nt); + count += 1; + } + mutex_unlock(&ubi->device_mutex); + + return count; +} + +/** + * ubi_enumerate_volumes - send "add" notification for all existing volumes. + * @nb: the notifier to call + * + * This function walks all UBI devices and volumes and sends the + * %UBI_VOLUME_ADDED notification for each volume. If @nb is %NULL, then all + * registered notifiers are called, otherwise only the @nb notifier is called. + * Returns the number of sent notifications. + */ +int ubi_enumerate_volumes(struct notifier_block *nb) +{ + int i, count = 0; + + /* + * Since the @ubi_devices_mutex is locked, and we are not going to + * change @ubi_devices, we do not have to lock @ubi_devices_lock. + */ + for (i = 0; i < UBI_MAX_DEVICES; i++) { + struct ubi_device *ubi = ubi_devices[i]; + + if (!ubi) + continue; + count += ubi_notify_all(ubi, UBI_VOLUME_ADDED, nb); + } + + return count; +} + /** * ubi_get_device - get UBI device. * @ubi_num: UBI device number @@ -891,6 +979,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset) spin_unlock(&ubi->wl_lock); ubi_devices[ubi_num] = ubi; + ubi_notify_all(ubi, UBI_VOLUME_ADDED, NULL); return ubi_num; out_uif: @@ -933,13 +1022,13 @@ int ubi_detach_mtd_dev(int ubi_num, int anyway) if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES) return -EINVAL; - spin_lock(&ubi_devices_lock); - ubi = ubi_devices[ubi_num]; - if (!ubi) { - spin_unlock(&ubi_devices_lock); + ubi = ubi_get_device(ubi_num); + if (!ubi) return -EINVAL; - } + spin_lock(&ubi_devices_lock); + put_device(&ubi->dev); + ubi->ref_count -= 1; if (ubi->ref_count) { if (!anyway) { spin_unlock(&ubi_devices_lock); @@ -953,6 +1042,7 @@ int ubi_detach_mtd_dev(int ubi_num, int anyway) spin_unlock(&ubi_devices_lock); ubi_assert(ubi_num == ubi->ubi_num); + ubi_notify_all(ubi, UBI_VOLUME_REMOVED, NULL); dbg_msg("detaching mtd%d from ubi%d", ubi->mtd->index, ubi_num); /* diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c index 9a2b217941f..631983615f1 100644 --- a/drivers/mtd/ubi/cdev.c +++ b/drivers/mtd/ubi/cdev.c @@ -396,6 +396,7 @@ static ssize_t vol_cdev_write(struct file *file, const char __user *buf, } vol->checked = 1; ubi_gluebi_updated(vol); + ubi_volume_notify(ubi, vol, UBI_VOLUME_UPDATED); revoke_exclusive(desc, UBI_READWRITE); } diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c index 2675207c5fe..88a72e9c8be 100644 --- a/drivers/mtd/ubi/kapi.c +++ b/drivers/mtd/ubi/kapi.c @@ -25,6 +25,24 @@ #include #include "ubi.h" +/** + * ubi_do_get_device_info - get information about UBI device. + * @ubi: UBI device description object + * @di: the information is stored here + * + * This function is the same as 'ubi_get_device_info()', but it assumes the UBI + * device is locked and cannot disappear. + */ +void ubi_do_get_device_info(struct ubi_device *ubi, struct ubi_device_info *di) +{ + di->ubi_num = ubi->ubi_num; + di->leb_size = ubi->leb_size; + di->min_io_size = ubi->min_io_size; + di->ro_mode = ubi->ro_mode; + di->cdev = ubi->cdev.dev; +} +EXPORT_SYMBOL_GPL(ubi_do_get_device_info); + /** * ubi_get_device_info - get information about UBI device. * @ubi_num: UBI device number @@ -39,33 +57,24 @@ int ubi_get_device_info(int ubi_num, struct ubi_device_info *di) if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES) return -EINVAL; - ubi = ubi_get_device(ubi_num); if (!ubi) return -ENODEV; - - di->ubi_num = ubi->ubi_num; - di->leb_size = ubi->leb_size; - di->min_io_size = ubi->min_io_size; - di->ro_mode = ubi->ro_mode; - di->cdev = ubi->cdev.dev; - + ubi_do_get_device_info(ubi, di); ubi_put_device(ubi); return 0; } EXPORT_SYMBOL_GPL(ubi_get_device_info); /** - * ubi_get_volume_info - get information about UBI volume. - * @desc: volume descriptor + * ubi_do_get_volume_info - get information about UBI volume. + * @ubi: UBI device description object + * @vol: volume description object * @vi: the information is stored here */ -void ubi_get_volume_info(struct ubi_volume_desc *desc, - struct ubi_volume_info *vi) +void ubi_do_get_volume_info(struct ubi_device *ubi, struct ubi_volume *vol, + struct ubi_volume_info *vi) { - const struct ubi_volume *vol = desc->vol; - const struct ubi_device *ubi = vol->ubi; - vi->vol_id = vol->vol_id; vi->ubi_num = ubi->ubi_num; vi->size = vol->reserved_pebs; @@ -79,6 +88,17 @@ void ubi_get_volume_info(struct ubi_volume_desc *desc, vi->name = vol->name; vi->cdev = vol->cdev.dev; } + +/** + * ubi_get_volume_info - get information about UBI volume. + * @desc: volume descriptor + * @vi: the information is stored here + */ +void ubi_get_volume_info(struct ubi_volume_desc *desc, + struct ubi_volume_info *vi) +{ + ubi_do_get_volume_info(desc->vol->ubi, desc->vol, vi); +} EXPORT_SYMBOL_GPL(ubi_get_volume_info); /** @@ -561,7 +581,7 @@ int ubi_leb_unmap(struct ubi_volume_desc *desc, int lnum) EXPORT_SYMBOL_GPL(ubi_leb_unmap); /** - * ubi_leb_map - map logical erasblock to a physical eraseblock. + * ubi_leb_map - map logical eraseblock to a physical eraseblock. * @desc: volume descriptor * @lnum: logical eraseblock number * @dtype: expected data type @@ -659,3 +679,59 @@ int ubi_sync(int ubi_num) return 0; } EXPORT_SYMBOL_GPL(ubi_sync); + +BLOCKING_NOTIFIER_HEAD(ubi_notifiers); + +/** + * ubi_register_volume_notifier - register a volume notifier. + * @nb: the notifier description object + * @ignore_existing: if non-zero, do not send "added" notification for all + * already existing volumes + * + * This function registers a volume notifier, which means that + * 'nb->notifier_call()' will be invoked when an UBI volume is created, + * removed, re-sized, re-named, or updated. The first argument of the function + * is the notification type. The second argument is pointer to a + * &struct ubi_notification object which describes the notification event. + * Using UBI API from the volume notifier is prohibited. + * + * This function returns zero in case of success and a negative error code + * in case of failure. + */ +int ubi_register_volume_notifier(struct notifier_block *nb, + int ignore_existing) +{ + int err; + + err = blocking_notifier_chain_register(&ubi_notifiers, nb); + if (err != 0) + return err; + if (ignore_existing) + return 0; + + /* + * We are going to walk all UBI devices and all volumes, and + * notify the user about existing volumes by the %UBI_VOLUME_ADDED + * event. We have to lock the @ubi_devices_mutex to make sure UBI + * devices do not disappear. + */ + mutex_lock(&ubi_devices_mutex); + ubi_enumerate_volumes(nb); + mutex_unlock(&ubi_devices_mutex); + + return err; +} +EXPORT_SYMBOL_GPL(ubi_register_volume_notifier); + +/** + * ubi_unregister_volume_notifier - unregister the volume notifier. + * @nb: the notifier description object + * + * This function unregisters volume notifier @nm and returns zero in case of + * success and a negative error code in case of failure. + */ +int ubi_unregister_volume_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&ubi_notifiers, nb); +} +EXPORT_SYMBOL_GPL(ubi_unregister_volume_notifier); diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h index 6d929329a8d..86e1a4e0ab0 100644 --- a/drivers/mtd/ubi/ubi.h +++ b/drivers/mtd/ubi/ubi.h @@ -38,6 +38,7 @@ #include #include #include +#include #include "ubi-media.h" #include "scan.h" @@ -483,6 +484,7 @@ extern const struct file_operations ubi_cdev_operations; extern const struct file_operations ubi_vol_cdev_operations; extern struct class *ubi_class; extern struct mutex ubi_devices_mutex; +extern struct blocking_notifier_head ubi_notifiers; /* vtbl.c */ int ubi_change_vtbl_record(struct ubi_device *ubi, int idx, @@ -575,6 +577,16 @@ struct ubi_device *ubi_get_device(int ubi_num); void ubi_put_device(struct ubi_device *ubi); struct ubi_device *ubi_get_by_major(int major); int ubi_major2num(int major); +int ubi_volume_notify(struct ubi_device *ubi, struct ubi_volume *vol, + int ntype); +int ubi_notify_all(struct ubi_device *ubi, int ntype, + struct notifier_block *nb); +int ubi_enumerate_volumes(struct notifier_block *nb); + +/* kapi.c */ +void ubi_do_get_device_info(struct ubi_device *ubi, struct ubi_device_info *di); +void ubi_do_get_volume_info(struct ubi_device *ubi, struct ubi_volume *vol, + struct ubi_volume_info *vi); /* * ubi_rb_for_each_entry - walk an RB-tree. diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c index 8e8d6fae7a0..e151862a3a9 100644 --- a/drivers/mtd/ubi/vmt.c +++ b/drivers/mtd/ubi/vmt.c @@ -358,6 +358,7 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req) ubi->vol_count += 1; spin_unlock(&ubi->volumes_lock); + ubi_volume_notify(ubi, vol, UBI_VOLUME_ADDED); if (paranoid_check_volumes(ubi)) dbg_err("check failed while creating volume %d", vol_id); return err; @@ -466,6 +467,7 @@ int ubi_remove_volume(struct ubi_volume_desc *desc, int no_vtbl) ubi->vol_count -= 1; spin_unlock(&ubi->volumes_lock); + ubi_volume_notify(ubi, vol, UBI_VOLUME_REMOVED); if (!no_vtbl && paranoid_check_volumes(ubi)) dbg_err("check failed while removing volume %d", vol_id); @@ -589,6 +591,7 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs) (long long)vol->used_ebs * vol->usable_leb_size; } + ubi_volume_notify(ubi, vol, UBI_VOLUME_RESIZED); if (paranoid_check_volumes(ubi)) dbg_err("check failed while re-sizing volume %d", vol_id); return err; @@ -635,6 +638,7 @@ int ubi_rename_volumes(struct ubi_device *ubi, struct list_head *rename_list) vol->name_len = re->new_name_len; memcpy(vol->name, re->new_name, re->new_name_len + 1); spin_unlock(&ubi->volumes_lock); + ubi_volume_notify(ubi, vol, UBI_VOLUME_RENAMED); } } diff --git a/include/linux/mtd/ubi.h b/include/linux/mtd/ubi.h index 6316fafe5c2..6913b71d9ab 100644 --- a/include/linux/mtd/ubi.h +++ b/include/linux/mtd/ubi.h @@ -132,6 +132,39 @@ struct ubi_device_info { dev_t cdev; }; +/* + * enum - volume notification types. + * @UBI_VOLUME_ADDED: volume has been added + * @UBI_VOLUME_REMOVED: start volume volume + * @UBI_VOLUME_RESIZED: volume size has been re-sized + * @UBI_VOLUME_RENAMED: volume name has been re-named + * @UBI_VOLUME_UPDATED: volume name has been updated + * + * These constants define which type of event has happened when a volume + * notification function is invoked. + */ +enum { + UBI_VOLUME_ADDED, + UBI_VOLUME_REMOVED, + UBI_VOLUME_RESIZED, + UBI_VOLUME_RENAMED, + UBI_VOLUME_UPDATED, +}; + +/* + * struct ubi_notification - UBI notification description structure. + * @di: UBI device description object + * @vi: UBI volume description object + * + * UBI notifiers are called with a pointer to an object of this type. The + * object describes the notification. Namely, it provides a description of the + * UBI device and UBI volume the notification informs about. + */ +struct ubi_notification { + struct ubi_device_info di; + struct ubi_volume_info vi; +}; + /* UBI descriptor given to users when they open UBI volumes */ struct ubi_volume_desc; @@ -141,6 +174,10 @@ void ubi_get_volume_info(struct ubi_volume_desc *desc, struct ubi_volume_desc *ubi_open_volume(int ubi_num, int vol_id, int mode); struct ubi_volume_desc *ubi_open_volume_nm(int ubi_num, const char *name, int mode); +int ubi_register_volume_notifier(struct notifier_block *nb, + int ignore_existing); +int ubi_unregister_volume_notifier(struct notifier_block *nb); + void ubi_close_volume(struct ubi_volume_desc *desc); int ubi_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, int len, int check); -- cgit v1.2.3-70-g09d2 From 3f731ca60afc29f5bcdb5fd2a04391466313a9ac Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 1 Jun 2009 17:52:30 +1000 Subject: perf_counter: Fix cpu migration counter This fixes the cpu migration software counter to count correctly even when contexts get swapped from one task to another. Previously the cpu migration counts reported by perf stat were bogus, ranging from negative to several thousand for a single "lat_ctx 2 8 32" run. With this patch the cpu migration count reported for "lat_ctx 2 8 32" is almost always between 35 and 44. This fixes the problem by adding a call into the perf_counter code from set_task_cpu when tasks are migrated. This enables us to use the generic swcounter code (with some modifications) for the cpu migration counter. This modifies the swcounter code to allow a NULL regs pointer to be passed in to perf_swcounter_ctx_event() etc. The cpu migration counter does this because there isn't necessarily a pt_regs struct for the task available. In this case, the counter will not have interrupt capability - but the migration counter didn't have interrupt capability before, so this is no loss. Signed-off-by: Paul Mackerras Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo Cc: John Kacur LKML-Reference: <18979.35006.819769.416327@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 4 +++ kernel/perf_counter.c | 74 +++++++++++++------------------------------- kernel/sched.c | 1 + 3 files changed, 26 insertions(+), 53 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 0e57d8cc5a3..deb9acf9ad2 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -615,6 +615,8 @@ extern void perf_counter_munmap(unsigned long addr, unsigned long len, extern void perf_counter_comm(struct task_struct *tsk); +extern void perf_counter_task_migration(struct task_struct *task, int cpu); + #define MAX_STACK_DEPTH 255 struct perf_callchain_entry { @@ -668,6 +670,8 @@ perf_counter_munmap(unsigned long addr, unsigned long len, static inline void perf_counter_comm(struct task_struct *tsk) { } static inline void perf_counter_init(void) { } +static inline void perf_counter_task_migration(struct task_struct *task, + int cpu) { } #endif #endif /* __KERNEL__ */ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 8d2653f137e..cd94cf3bf9e 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -2921,11 +2921,13 @@ static int perf_swcounter_match(struct perf_counter *counter, if (counter->hw_event.config != event_config) return 0; - if (counter->hw_event.exclude_user && user_mode(regs)) - return 0; + if (regs) { + if (counter->hw_event.exclude_user && user_mode(regs)) + return 0; - if (counter->hw_event.exclude_kernel && !user_mode(regs)) - return 0; + if (counter->hw_event.exclude_kernel && !user_mode(regs)) + return 0; + } return 1; } @@ -2935,7 +2937,7 @@ static void perf_swcounter_add(struct perf_counter *counter, u64 nr, { int neg = atomic64_add_negative(nr, &counter->hw.count); - if (counter->hw.irq_period && !neg) + if (counter->hw.irq_period && !neg && regs) perf_swcounter_overflow(counter, nmi, regs, addr); } @@ -3151,55 +3153,24 @@ static const struct pmu perf_ops_task_clock = { /* * Software counter: cpu migrations */ - -static inline u64 get_cpu_migrations(struct perf_counter *counter) -{ - struct task_struct *curr = counter->ctx->task; - - if (curr) - return curr->se.nr_migrations; - return cpu_nr_migrations(smp_processor_id()); -} - -static void cpu_migrations_perf_counter_update(struct perf_counter *counter) -{ - u64 prev, now; - s64 delta; - - prev = atomic64_read(&counter->hw.prev_count); - now = get_cpu_migrations(counter); - - atomic64_set(&counter->hw.prev_count, now); - - delta = now - prev; - - atomic64_add(delta, &counter->count); -} - -static void cpu_migrations_perf_counter_read(struct perf_counter *counter) +void perf_counter_task_migration(struct task_struct *task, int cpu) { - cpu_migrations_perf_counter_update(counter); -} + struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); + struct perf_counter_context *ctx; -static int cpu_migrations_perf_counter_enable(struct perf_counter *counter) -{ - if (counter->prev_state <= PERF_COUNTER_STATE_OFF) - atomic64_set(&counter->hw.prev_count, - get_cpu_migrations(counter)); - return 0; -} + perf_swcounter_ctx_event(&cpuctx->ctx, PERF_TYPE_SOFTWARE, + PERF_COUNT_CPU_MIGRATIONS, + 1, 1, NULL, 0); -static void cpu_migrations_perf_counter_disable(struct perf_counter *counter) -{ - cpu_migrations_perf_counter_update(counter); + ctx = perf_pin_task_context(task); + if (ctx) { + perf_swcounter_ctx_event(ctx, PERF_TYPE_SOFTWARE, + PERF_COUNT_CPU_MIGRATIONS, + 1, 1, NULL, 0); + perf_unpin_context(ctx); + } } -static const struct pmu perf_ops_cpu_migrations = { - .enable = cpu_migrations_perf_counter_enable, - .disable = cpu_migrations_perf_counter_disable, - .read = cpu_migrations_perf_counter_read, -}; - #ifdef CONFIG_EVENT_PROFILE void perf_tpcounter_event(int event_id) { @@ -3272,11 +3243,8 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) case PERF_COUNT_PAGE_FAULTS_MIN: case PERF_COUNT_PAGE_FAULTS_MAJ: case PERF_COUNT_CONTEXT_SWITCHES: - pmu = &perf_ops_generic; - break; case PERF_COUNT_CPU_MIGRATIONS: - if (!counter->hw_event.exclude_kernel) - pmu = &perf_ops_cpu_migrations; + pmu = &perf_ops_generic; break; } diff --git a/kernel/sched.c b/kernel/sched.c index 3226cc132e9..8d43347a0c0 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1977,6 +1977,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) if (task_hot(p, old_rq->clock, NULL)) schedstat_inc(p, se.nr_forced2_migrations); #endif + perf_counter_task_migration(p, new_cpu); } p->se.vruntime -= old_cfsrq->min_vruntime - new_cfsrq->min_vruntime; -- cgit v1.2.3-70-g09d2 From bf4e0ed3d027ce581be18496036862131b5f32aa Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 1 Jun 2009 17:53:16 +1000 Subject: perf_counter: Remove unused prev_state field This removes the prev_state field of struct perf_counter since it is now unused. It was only used by the cpu migration counter, which doesn't use it any more. Signed-off-by: Paul Mackerras Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo Cc: John Kacur LKML-Reference: <18979.35052.915728.626374@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 1 - kernel/perf_counter.c | 4 ---- 2 files changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index deb9acf9ad2..d970fbc16af 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -427,7 +427,6 @@ struct perf_counter { const struct pmu *pmu; enum perf_counter_active_state state; - enum perf_counter_active_state prev_state; atomic64_t count; /* diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index cd94cf3bf9e..fbed4d28ad7 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -572,7 +572,6 @@ group_sched_in(struct perf_counter *group_counter, if (ret) return ret < 0 ? ret : 0; - group_counter->prev_state = group_counter->state; if (counter_sched_in(group_counter, cpuctx, ctx, cpu)) return -EAGAIN; @@ -580,7 +579,6 @@ group_sched_in(struct perf_counter *group_counter, * Schedule in siblings as one group (if any): */ list_for_each_entry(counter, &group_counter->sibling_list, list_entry) { - counter->prev_state = counter->state; if (counter_sched_in(counter, cpuctx, ctx, cpu)) { partial_group = counter; goto group_error; @@ -657,7 +655,6 @@ static void add_counter_to_ctx(struct perf_counter *counter, struct perf_counter_context *ctx) { list_add_counter(counter, ctx); - counter->prev_state = PERF_COUNTER_STATE_OFF; counter->tstamp_enabled = ctx->time; counter->tstamp_running = ctx->time; counter->tstamp_stopped = ctx->time; @@ -820,7 +817,6 @@ static void __perf_counter_enable(void *info) ctx->is_active = 1; update_context_time(ctx); - counter->prev_state = counter->state; if (counter->state >= PERF_COUNTER_STATE_INACTIVE) goto unlock; counter->state = PERF_COUNTER_STATE_INACTIVE; -- cgit v1.2.3-70-g09d2 From 874ab9233eeddb85fd2dd85131c145bde75da39a Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 2 Jun 2009 13:58:56 +0200 Subject: netfilter: nf_ct_tcp: TCP simultaneous open support The patch below adds supporting TCP simultaneous open to conntrack. The unused LISTEN state is replaced by a new state (SYN_SENT2) denoting the second SYN sent from the reply direction in the new case. The state table is updated and the function tcp_in_window is modified to handle simultaneous open. The functionality can fairly easily be tested by socat. A sample tcpdump recording 23:21:34.244733 IP (tos 0x0, ttl 64, id 49224, offset 0, flags [DF], proto TCP (6), length 60) 192.168.0.254.2020 > 192.168.0.1.2020: S, cksum 0xe75f (correct), 3383710133:3383710133(0) win 5840 23:21:34.244783 IP (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto TCP (6), length 40) 192.168.0.1.2020 > 192.168.0.254.2020: R, cksum 0x0253 (correct), 0:0(0) ack 3383710134 win 0 23:21:36.038680 IP (tos 0x0, ttl 64, id 28092, offset 0, flags [DF], proto TCP (6), length 60) 192.168.0.1.2020 > 192.168.0.254.2020: S, cksum 0x704b (correct), 2634546729:2634546729(0) win 5840 23:21:36.038777 IP (tos 0x0, ttl 64, id 49225, offset 0, flags [DF], proto TCP (6), length 60) 192.168.0.254.2020 > 192.168.0.1.2020: S, cksum 0xb179 (correct), 3383710133:3383710133(0) ack 2634546730 win 5840 23:21:36.038847 IP (tos 0x0, ttl 64, id 28093, offset 0, flags [DF], proto TCP (6), length 52) 192.168.0.1.2020 > 192.168.0.254.2020: ., cksum 0xebad (correct), ack 3383710134 win 2920 and the corresponding netlink events: [NEW] tcp 6 120 SYN_SENT src=192.168.0.254 dst=192.168.0.1 sport=2020 dport=2020 [UNREPLIED] src=192.168.0.1 dst=192.168.0.254 sport=2020 dport=2020 [UPDATE] tcp 6 120 LISTEN src=192.168.0.254 dst=192.168.0.1 sport=2020 dport=2020 src=192.168.0.1 dst=192.168.0.254 sport=2020 dport=2020 [UPDATE] tcp 6 60 SYN_RECV src=192.168.0.254 dst=192.168.0.1 sport=2020 dport=2020 src=192.168.0.1 dst=192.168.0.254 sport=2020 dport=2020 [UPDATE] tcp 6 432000 ESTABLISHED src=192.168.0.254 dst=192.168.0.1 sport=2020 dport=2020 src=192.168.0.1 dst=192.168.0.254 sport=2020 dport=2020 [ASSURED] The RST packet was dropped in the raw table, thus it did not reach conntrack. nfnetlink_conntrack is unpatched so it shows the new SYN_SENT2 state as the old unused LISTEN. With TCP simultaneous open support we satisfy REQ-2 in RFC 5382 ;-) . Additional minor correction in this patch is that in order to catch uninitialized reply directions, "td_maxwin == 0" is used instead of "td_end == 0" because the former can't be true except in uninitialized state while td_end may accidentally be equal to zero in the mid of a connection. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- include/linux/netfilter/nf_conntrack_tcp.h | 3 +- net/netfilter/nf_conntrack_proto_tcp.c | 98 +++++++++++++++++++----------- 2 files changed, 63 insertions(+), 38 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/nf_conntrack_tcp.h b/include/linux/netfilter/nf_conntrack_tcp.h index 3066789b972..74c27ca770e 100644 --- a/include/linux/netfilter/nf_conntrack_tcp.h +++ b/include/linux/netfilter/nf_conntrack_tcp.h @@ -15,7 +15,8 @@ enum tcp_conntrack { TCP_CONNTRACK_LAST_ACK, TCP_CONNTRACK_TIME_WAIT, TCP_CONNTRACK_CLOSE, - TCP_CONNTRACK_LISTEN, + TCP_CONNTRACK_LISTEN, /* obsolete */ +#define TCP_CONNTRACK_SYN_SENT2 TCP_CONNTRACK_LISTEN TCP_CONNTRACK_MAX, TCP_CONNTRACK_IGNORE }; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index b5ccf2b4b2e..4c7f6f0dae9 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -59,7 +59,7 @@ static const char *const tcp_conntrack_names[] = { "LAST_ACK", "TIME_WAIT", "CLOSE", - "LISTEN" + "SYN_SENT2", }; #define SECS * HZ @@ -82,6 +82,7 @@ static unsigned int tcp_timeouts[TCP_CONNTRACK_MAX] __read_mostly = { [TCP_CONNTRACK_LAST_ACK] = 30 SECS, [TCP_CONNTRACK_TIME_WAIT] = 2 MINS, [TCP_CONNTRACK_CLOSE] = 10 SECS, + [TCP_CONNTRACK_SYN_SENT2] = 2 MINS, }; #define sNO TCP_CONNTRACK_NONE @@ -93,7 +94,7 @@ static unsigned int tcp_timeouts[TCP_CONNTRACK_MAX] __read_mostly = { #define sLA TCP_CONNTRACK_LAST_ACK #define sTW TCP_CONNTRACK_TIME_WAIT #define sCL TCP_CONNTRACK_CLOSE -#define sLI TCP_CONNTRACK_LISTEN +#define sS2 TCP_CONNTRACK_SYN_SENT2 #define sIV TCP_CONNTRACK_MAX #define sIG TCP_CONNTRACK_IGNORE @@ -123,6 +124,7 @@ enum tcp_bit_set { * * NONE: initial state * SYN_SENT: SYN-only packet seen + * SYN_SENT2: SYN-only packet seen from reply dir, simultaneous open * SYN_RECV: SYN-ACK packet seen * ESTABLISHED: ACK packet seen * FIN_WAIT: FIN packet seen @@ -131,26 +133,24 @@ enum tcp_bit_set { * TIME_WAIT: last ACK seen * CLOSE: closed connection (RST) * - * LISTEN state is not used. - * * Packets marked as IGNORED (sIG): * if they may be either invalid or valid * and the receiver may send back a connection * closing RST or a SYN/ACK. * * Packets marked as INVALID (sIV): - * if they are invalid - * or we do not support the request (simultaneous open) + * if we regard them as truly invalid packets */ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { { /* ORIGINAL */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV }, +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ +/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 }, /* * sNO -> sSS Initialize a new connection * sSS -> sSS Retransmitted SYN - * sSR -> sIG Late retransmitted SYN? + * sS2 -> sS2 Late retransmitted SYN + * sSR -> sIG * sES -> sIG Error: SYNs in window outside the SYN_SENT state * are errors. Receiver will reply with RST * and close the connection. @@ -161,22 +161,30 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sTW -> sSS Reopened connection (RFC 1122). * sCL -> sSS */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }, +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ +/*synack*/ { sIV, sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR }, /* - * A SYN/ACK from the client is always invalid: - * - either it tries to set up a simultaneous open, which is - * not supported; - * - or the firewall has just been inserted between the two hosts - * during the session set-up. The SYN will be retransmitted - * by the true client (or it'll time out). + * sNO -> sIV Too late and no reason to do anything + * sSS -> sIV Client can't send SYN and then SYN/ACK + * sS2 -> sSR SYN/ACK sent to SYN2 in simultaneous open + * sSR -> sIG + * sES -> sIG Error: SYNs in window outside the SYN_SENT state + * are errors. Receiver will reply with RST + * and close the connection. + * Or we are not in sync and hold a dead connection. + * sFW -> sIG + * sCW -> sIG + * sLA -> sIG + * sTW -> sIG + * sCL -> sIG */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, /* * sNO -> sIV Too late and no reason to do anything... * sSS -> sIV Client migth not send FIN in this state: * we enforce waiting for a SYN/ACK reply first. + * sS2 -> sIV * sSR -> sFW Close started. * sES -> sFW * sFW -> sLA FIN seen in both directions, waiting for @@ -187,11 +195,12 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sTW -> sTW * sCL -> sCL */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ /*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV }, /* * sNO -> sES Assumed. * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet. + * sS2 -> sIV * sSR -> sES Established state is reached. * sES -> sES :-) * sFW -> sCW Normal close request answered by ACK. @@ -200,29 +209,31 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sTW -> sTW Retransmitted last ACK. Remain in the same state. * sCL -> sCL */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV }, +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ +/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL }, /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } }, { /* REPLY */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*syn*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }, +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ +/*syn*/ { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sS2 }, /* * sNO -> sIV Never reached. - * sSS -> sIV Simultaneous open, not supported - * sSR -> sIV Simultaneous open, not supported. - * sES -> sIV Server may not initiate a connection. + * sSS -> sS2 Simultaneous open + * sS2 -> sS2 Retransmitted simultaneous SYN + * sSR -> sIV Invalid SYN packets sent by the server + * sES -> sIV * sFW -> sIV * sCW -> sIV * sLA -> sIV * sTW -> sIV Reopened connection, but server may not do it. * sCL -> sIV */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV }, +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ +/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sSR }, /* * sSS -> sSR Standard open. + * sS2 -> sSR Simultaneous open * sSR -> sSR Retransmitted SYN/ACK. * sES -> sIG Late retransmitted SYN/ACK? * sFW -> sIG Might be SYN/ACK answering ignored SYN @@ -231,10 +242,11 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sTW -> sIG * sCL -> sIG */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, /* * sSS -> sIV Server might not send FIN in this state. + * sS2 -> sIV * sSR -> sFW Close started. * sES -> sFW * sFW -> sLA FIN seen in both directions. @@ -243,10 +255,11 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sTW -> sTW * sCL -> sCL */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV }, +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ +/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG }, /* * sSS -> sIG Might be a half-open connection. + * sS2 -> sIG * sSR -> sSR Might answer late resent SYN. * sES -> sES :-) * sFW -> sCW Normal close request answered by ACK. @@ -255,8 +268,8 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sTW -> sTW Retransmitted last ACK. * sCL -> sCL */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV }, +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ +/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL }, /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } } }; @@ -521,13 +534,14 @@ static bool tcp_in_window(const struct nf_conn *ct, receiver->td_end, receiver->td_maxend, receiver->td_maxwin, receiver->td_scale); - if (sender->td_end == 0) { + if (sender->td_maxwin == 0) { /* * Initialize sender data. */ - if (tcph->syn && tcph->ack) { + if (tcph->syn) { /* - * Outgoing SYN-ACK in reply to a SYN. + * SYN-ACK in reply to a SYN + * or SYN from reply direction in simultaneous open. */ sender->td_end = sender->td_maxend = end; @@ -543,6 +557,9 @@ static bool tcp_in_window(const struct nf_conn *ct, && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) sender->td_scale = receiver->td_scale = 0; + if (!tcph->ack) + /* Simultaneous open */ + return true; } else { /* * We are in the middle of a connection, @@ -1068,7 +1085,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, ct->proto.tcp.seen[1].td_end = 0; ct->proto.tcp.seen[1].td_maxend = 0; - ct->proto.tcp.seen[1].td_maxwin = 1; + ct->proto.tcp.seen[1].td_maxwin = 0; ct->proto.tcp.seen[1].td_scale = 0; /* tcp_packet will set them */ @@ -1309,6 +1326,13 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, + { + .procname = "ip_conntrack_tcp_timeout_syn_sent2", + .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT2], + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, { .procname = "ip_conntrack_tcp_timeout_syn_recv", .data = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV], -- cgit v1.2.3-70-g09d2 From 709e50cf870e61745b39552044aa6c7c38e4f9e0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 2 Jun 2009 14:13:15 +0200 Subject: perf_counter: Use PID namespaces properly Stop using task_struct::pid and start using PID namespaces. PIDs will be reported in the PID namespace of the monitoring task at the moment of counter creation. Signed-off-by: Peter Zijlstra Cc: Eric W. Biederman Cc: Oleg Nesterov Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: John Kacur LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 3 +++ kernel/perf_counter.c | 42 ++++++++++++++++++++++++++++++++++-------- 2 files changed, 37 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index d970fbc16af..9ec20fc6bd3 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -317,6 +317,7 @@ enum perf_event_type { #include #include #include +#include #include struct task_struct; @@ -500,6 +501,8 @@ struct perf_counter { void (*destroy)(struct perf_counter *); struct rcu_head rcu_head; + + struct pid_namespace *ns; #endif }; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index fbed4d28ad7..caa012cfe49 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1432,6 +1432,8 @@ static void free_counter_rcu(struct rcu_head *head) struct perf_counter *counter; counter = container_of(head, struct perf_counter, rcu_head); + if (counter->ns) + put_pid_ns(counter->ns); kfree(counter); } @@ -2267,6 +2269,28 @@ static void perf_output_end(struct perf_output_handle *handle) rcu_read_unlock(); } +static u32 perf_counter_pid(struct perf_counter *counter, struct task_struct *p) +{ + /* + * only top level counters have the pid namespace they were created in + */ + if (counter->parent) + counter = counter->parent; + + return task_tgid_nr_ns(p, counter->ns); +} + +static u32 perf_counter_tid(struct perf_counter *counter, struct task_struct *p) +{ + /* + * only top level counters have the pid namespace they were created in + */ + if (counter->parent) + counter = counter->parent; + + return task_pid_nr_ns(p, counter->ns); +} + static void perf_counter_output(struct perf_counter *counter, int nmi, struct pt_regs *regs, u64 addr) { @@ -2303,8 +2327,8 @@ static void perf_counter_output(struct perf_counter *counter, if (record_type & PERF_RECORD_TID) { /* namespace issues */ - tid_entry.pid = current->group_leader->pid; - tid_entry.tid = current->pid; + tid_entry.pid = perf_counter_pid(counter, current); + tid_entry.tid = perf_counter_tid(counter, current); header.type |= PERF_RECORD_TID; header.size += sizeof(tid_entry); @@ -2432,6 +2456,9 @@ static void perf_counter_comm_output(struct perf_counter *counter, if (ret) return; + comm_event->event.pid = perf_counter_pid(counter, comm_event->task); + comm_event->event.tid = perf_counter_tid(counter, comm_event->task); + perf_output_put(&handle, comm_event->event); perf_output_copy(&handle, comm_event->comm, comm_event->comm_size); @@ -2504,8 +2531,6 @@ void perf_counter_comm(struct task_struct *task) .task = task, .event = { .header = { .type = PERF_EVENT_COMM, }, - .pid = task->group_leader->pid, - .tid = task->pid, }, }; @@ -2542,6 +2567,9 @@ static void perf_counter_mmap_output(struct perf_counter *counter, if (ret) return; + mmap_event->event.pid = perf_counter_pid(counter, current); + mmap_event->event.tid = perf_counter_tid(counter, current); + perf_output_put(&handle, mmap_event->event); perf_output_copy(&handle, mmap_event->file_name, mmap_event->file_size); @@ -2641,8 +2669,6 @@ void perf_counter_mmap(unsigned long addr, unsigned long len, .file = file, .event = { .header = { .type = PERF_EVENT_MMAP, }, - .pid = current->group_leader->pid, - .tid = current->pid, .start = addr, .len = len, .pgoff = pgoff, @@ -2664,8 +2690,6 @@ void perf_counter_munmap(unsigned long addr, unsigned long len, .file = file, .event = { .header = { .type = PERF_EVENT_MUNMAP, }, - .pid = current->group_leader->pid, - .tid = current->pid, .start = addr, .len = len, .pgoff = pgoff, @@ -3445,6 +3469,8 @@ SYSCALL_DEFINE5(perf_counter_open, list_add_tail(&counter->owner_entry, ¤t->perf_counter_list); mutex_unlock(¤t->perf_counter_mutex); + counter->ns = get_pid_ns(current->nsproxy->pid_ns); + fput_light(counter_file, fput_needed2); out_fput: -- cgit v1.2.3-70-g09d2 From f2f3e38c63c58a3d39bd710039af8bbd15ecaff6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 2 Jun 2009 20:03:35 +0200 Subject: netfilter: ctnetlink: rename tuple() by nf_ct_tuple() macro definition This patch move the internal tuple() macro definition to the header file as nf_ct_tuple(). Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 2 ++ net/netfilter/nf_conntrack_netlink.c | 13 ++++++------- 2 files changed, 8 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 6c3f964de9e..b909241b668 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -144,6 +144,8 @@ static inline u_int8_t nf_ct_protonum(const struct nf_conn *ct) return ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum; } +#define nf_ct_tuple(ct, dir) (&(ct)->tuplehash[dir].tuple) + /* get master conntrack via master expectation */ #define master_ct(conntr) (conntr->master) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 2d7d69ff215..3553f394125 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -346,8 +346,6 @@ nla_put_failure: return -1; } -#define tuple(ct, dir) (&(ct)->tuplehash[dir].tuple) - static int ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, const struct nf_conn *ct) @@ -369,14 +367,14 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED); if (!nest_parms) goto nla_put_failure; - if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) + if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) goto nla_put_failure; nla_nest_end(skb, nest_parms); nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED); if (!nest_parms) goto nla_put_failure; - if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0) + if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0) goto nla_put_failure; nla_nest_end(skb, nest_parms); @@ -509,7 +507,8 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, if (!item->report && !nfnetlink_has_listeners(group)) return NOTIFY_DONE; - skb = ctnetlink_alloc_skb(tuple(ct, IP_CT_DIR_ORIGINAL), GFP_ATOMIC); + skb = ctnetlink_alloc_skb(nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL), + GFP_ATOMIC); if (!skb) goto errout; @@ -528,14 +527,14 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED); if (!nest_parms) goto nla_put_failure; - if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) + if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) goto nla_put_failure; nla_nest_end(skb, nest_parms); nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED); if (!nest_parms) goto nla_put_failure; - if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0) + if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0) goto nla_put_failure; nla_nest_end(skb, nest_parms); -- cgit v1.2.3-70-g09d2 From 274d383b9c1906847a64bbb267b0183599ce86a0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 2 Jun 2009 20:08:38 +0200 Subject: netfilter: conntrack: don't report events on module removal During the module removal there are no possible event listeners since ctnetlink must be removed before to allow removing nf_conntrack. This patch removes the event reporting for the module removal case which is not of any use in the existing code. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 2 +- net/netfilter/nf_conntrack_core.c | 15 ++++++++++----- net/netfilter/nf_conntrack_netlink.c | 6 +++--- 3 files changed, 14 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index b909241b668..2ba36dd33ae 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -203,7 +203,7 @@ __nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple); extern void nf_conntrack_hash_insert(struct nf_conn *ct); -extern void nf_conntrack_flush(struct net *net, u32 pid, int report); +extern void nf_conntrack_flush_report(struct net *net, u32 pid, int report); extern bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, u_int16_t l3num, diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 8020db6274b..f59c4edf905 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1001,7 +1001,7 @@ struct __nf_ct_flush_report { int report; }; -static int kill_all(struct nf_conn *i, void *data) +static int kill_report(struct nf_conn *i, void *data) { struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data; @@ -1013,6 +1013,11 @@ static int kill_all(struct nf_conn *i, void *data) return 1; } +static int kill_all(struct nf_conn *i, void *data) +{ + return 1; +} + void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size) { if (vmalloced) @@ -1023,15 +1028,15 @@ void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size) } EXPORT_SYMBOL_GPL(nf_ct_free_hashtable); -void nf_conntrack_flush(struct net *net, u32 pid, int report) +void nf_conntrack_flush_report(struct net *net, u32 pid, int report) { struct __nf_ct_flush_report fr = { .pid = pid, .report = report, }; - nf_ct_iterate_cleanup(net, kill_all, &fr); + nf_ct_iterate_cleanup(net, kill_report, &fr); } -EXPORT_SYMBOL_GPL(nf_conntrack_flush); +EXPORT_SYMBOL_GPL(nf_conntrack_flush_report); static void nf_conntrack_cleanup_init_net(void) { @@ -1045,7 +1050,7 @@ static void nf_conntrack_cleanup_net(struct net *net) nf_ct_event_cache_flush(net); nf_conntrack_ecache_fini(net); i_see_dead_people: - nf_conntrack_flush(net, 0, 0); + nf_ct_iterate_cleanup(net, kill_all, NULL); if (atomic_read(&net->ct.count) != 0) { schedule(); goto i_see_dead_people; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 58fde0e47b0..3a20de1be63 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -777,9 +777,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3); else { /* Flush the whole table */ - nf_conntrack_flush(&init_net, - NETLINK_CB(skb).pid, - nlmsg_report(nlh)); + nf_conntrack_flush_report(&init_net, + NETLINK_CB(skb).pid, + nlmsg_report(nlh)); return 0; } -- cgit v1.2.3-70-g09d2 From 6bfea1984aea86089907caf8974513c2402a3b3d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 2 Jun 2009 20:08:44 +0200 Subject: netfilter: conntrack: remove events flags from userspace exposed file This patch moves the event flags from linux/netfilter/nf_conntrack_common.h to net/netfilter/nf_conntrack_ecache.h. This flags are not of any use from userspace. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_common.h | 69 --------------------------- include/net/netfilter/nf_conntrack_ecache.h | 69 +++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 69 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index 885cbe28226..a8248ee422b 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -75,75 +75,6 @@ enum ip_conntrack_status { IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT), }; -/* Connection tracking event bits */ -enum ip_conntrack_events -{ - /* New conntrack */ - IPCT_NEW_BIT = 0, - IPCT_NEW = (1 << IPCT_NEW_BIT), - - /* Expected connection */ - IPCT_RELATED_BIT = 1, - IPCT_RELATED = (1 << IPCT_RELATED_BIT), - - /* Destroyed conntrack */ - IPCT_DESTROY_BIT = 2, - IPCT_DESTROY = (1 << IPCT_DESTROY_BIT), - - /* Timer has been refreshed */ - IPCT_REFRESH_BIT = 3, - IPCT_REFRESH = (1 << IPCT_REFRESH_BIT), - - /* Status has changed */ - IPCT_STATUS_BIT = 4, - IPCT_STATUS = (1 << IPCT_STATUS_BIT), - - /* Update of protocol info */ - IPCT_PROTOINFO_BIT = 5, - IPCT_PROTOINFO = (1 << IPCT_PROTOINFO_BIT), - - /* Volatile protocol info */ - IPCT_PROTOINFO_VOLATILE_BIT = 6, - IPCT_PROTOINFO_VOLATILE = (1 << IPCT_PROTOINFO_VOLATILE_BIT), - - /* New helper for conntrack */ - IPCT_HELPER_BIT = 7, - IPCT_HELPER = (1 << IPCT_HELPER_BIT), - - /* Update of helper info */ - IPCT_HELPINFO_BIT = 8, - IPCT_HELPINFO = (1 << IPCT_HELPINFO_BIT), - - /* Volatile helper info */ - IPCT_HELPINFO_VOLATILE_BIT = 9, - IPCT_HELPINFO_VOLATILE = (1 << IPCT_HELPINFO_VOLATILE_BIT), - - /* NAT info */ - IPCT_NATINFO_BIT = 10, - IPCT_NATINFO = (1 << IPCT_NATINFO_BIT), - - /* Counter highest bit has been set, unused */ - IPCT_COUNTER_FILLING_BIT = 11, - IPCT_COUNTER_FILLING = (1 << IPCT_COUNTER_FILLING_BIT), - - /* Mark is set */ - IPCT_MARK_BIT = 12, - IPCT_MARK = (1 << IPCT_MARK_BIT), - - /* NAT sequence adjustment */ - IPCT_NATSEQADJ_BIT = 13, - IPCT_NATSEQADJ = (1 << IPCT_NATSEQADJ_BIT), - - /* Secmark is set */ - IPCT_SECMARK_BIT = 14, - IPCT_SECMARK = (1 << IPCT_SECMARK_BIT), -}; - -enum ip_conntrack_expect_events { - IPEXP_NEW_BIT = 0, - IPEXP_NEW = (1 << IPEXP_NEW_BIT), -}; - #ifdef __KERNEL__ struct ip_conntrack_stat { diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 0ff0dc69ca4..892b8cdf7f6 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -11,6 +11,75 @@ #include #include +/* Connection tracking event bits */ +enum ip_conntrack_events +{ + /* New conntrack */ + IPCT_NEW_BIT = 0, + IPCT_NEW = (1 << IPCT_NEW_BIT), + + /* Expected connection */ + IPCT_RELATED_BIT = 1, + IPCT_RELATED = (1 << IPCT_RELATED_BIT), + + /* Destroyed conntrack */ + IPCT_DESTROY_BIT = 2, + IPCT_DESTROY = (1 << IPCT_DESTROY_BIT), + + /* Timer has been refreshed */ + IPCT_REFRESH_BIT = 3, + IPCT_REFRESH = (1 << IPCT_REFRESH_BIT), + + /* Status has changed */ + IPCT_STATUS_BIT = 4, + IPCT_STATUS = (1 << IPCT_STATUS_BIT), + + /* Update of protocol info */ + IPCT_PROTOINFO_BIT = 5, + IPCT_PROTOINFO = (1 << IPCT_PROTOINFO_BIT), + + /* Volatile protocol info */ + IPCT_PROTOINFO_VOLATILE_BIT = 6, + IPCT_PROTOINFO_VOLATILE = (1 << IPCT_PROTOINFO_VOLATILE_BIT), + + /* New helper for conntrack */ + IPCT_HELPER_BIT = 7, + IPCT_HELPER = (1 << IPCT_HELPER_BIT), + + /* Update of helper info */ + IPCT_HELPINFO_BIT = 8, + IPCT_HELPINFO = (1 << IPCT_HELPINFO_BIT), + + /* Volatile helper info */ + IPCT_HELPINFO_VOLATILE_BIT = 9, + IPCT_HELPINFO_VOLATILE = (1 << IPCT_HELPINFO_VOLATILE_BIT), + + /* NAT info */ + IPCT_NATINFO_BIT = 10, + IPCT_NATINFO = (1 << IPCT_NATINFO_BIT), + + /* Counter highest bit has been set, unused */ + IPCT_COUNTER_FILLING_BIT = 11, + IPCT_COUNTER_FILLING = (1 << IPCT_COUNTER_FILLING_BIT), + + /* Mark is set */ + IPCT_MARK_BIT = 12, + IPCT_MARK = (1 << IPCT_MARK_BIT), + + /* NAT sequence adjustment */ + IPCT_NATSEQADJ_BIT = 13, + IPCT_NATSEQADJ = (1 << IPCT_NATSEQADJ_BIT), + + /* Secmark is set */ + IPCT_SECMARK_BIT = 14, + IPCT_SECMARK = (1 << IPCT_SECMARK_BIT), +}; + +enum ip_conntrack_expect_events { + IPEXP_NEW_BIT = 0, + IPEXP_NEW = (1 << IPEXP_NEW_BIT), +}; + #ifdef CONFIG_NF_CONNTRACK_EVENTS struct nf_conntrack_ecache { struct nf_conn *ct; -- cgit v1.2.3-70-g09d2 From 17e6e4eac070607a35464ea7e2c5eceac32e5eca Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 2 Jun 2009 20:08:46 +0200 Subject: netfilter: conntrack: simplify event caching system This patch simplifies the conntrack event caching system by removing several events: * IPCT_[*]_VOLATILE, IPCT_HELPINFO and IPCT_NATINFO has been deleted since the have no clients. * IPCT_COUNTER_FILLING which is a leftover of the 32-bits counter days. * IPCT_REFRESH which is not of any use since we always include the timeout in the messages. After this patch, the existing events are: * IPCT_NEW, IPCT_RELATED and IPCT_DESTROY, that are used to identify addition and deletion of entries. * IPCT_STATUS, that notes that the status bits have changes, eg. IPS_SEEN_REPLY and IPS_ASSURED. * IPCT_PROTOINFO, that reports that internal protocol information has changed, eg. the TCP, DCCP and SCTP protocol state. * IPCT_HELPER, that a helper has been assigned or unassigned to this entry. * IPCT_MARK and IPCT_SECMARK, that reports that the mark has changed, this covers the case when a mark is set to zero. * IPCT_NATSEQADJ, to report that there's updates in the NAT sequence adjustment. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_ecache.h | 36 +++++--------------------- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 1 - net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 1 - net/netfilter/nf_conntrack_core.c | 14 +--------- net/netfilter/nf_conntrack_ftp.c | 2 -- net/netfilter/nf_conntrack_netlink.c | 2 +- net/netfilter/nf_conntrack_proto_tcp.c | 1 - 7 files changed, 8 insertions(+), 49 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 892b8cdf7f6..2e17a2d0eb3 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -26,52 +26,28 @@ enum ip_conntrack_events IPCT_DESTROY_BIT = 2, IPCT_DESTROY = (1 << IPCT_DESTROY_BIT), - /* Timer has been refreshed */ - IPCT_REFRESH_BIT = 3, - IPCT_REFRESH = (1 << IPCT_REFRESH_BIT), - /* Status has changed */ - IPCT_STATUS_BIT = 4, + IPCT_STATUS_BIT = 3, IPCT_STATUS = (1 << IPCT_STATUS_BIT), /* Update of protocol info */ - IPCT_PROTOINFO_BIT = 5, + IPCT_PROTOINFO_BIT = 4, IPCT_PROTOINFO = (1 << IPCT_PROTOINFO_BIT), - /* Volatile protocol info */ - IPCT_PROTOINFO_VOLATILE_BIT = 6, - IPCT_PROTOINFO_VOLATILE = (1 << IPCT_PROTOINFO_VOLATILE_BIT), - /* New helper for conntrack */ - IPCT_HELPER_BIT = 7, + IPCT_HELPER_BIT = 5, IPCT_HELPER = (1 << IPCT_HELPER_BIT), - /* Update of helper info */ - IPCT_HELPINFO_BIT = 8, - IPCT_HELPINFO = (1 << IPCT_HELPINFO_BIT), - - /* Volatile helper info */ - IPCT_HELPINFO_VOLATILE_BIT = 9, - IPCT_HELPINFO_VOLATILE = (1 << IPCT_HELPINFO_VOLATILE_BIT), - - /* NAT info */ - IPCT_NATINFO_BIT = 10, - IPCT_NATINFO = (1 << IPCT_NATINFO_BIT), - - /* Counter highest bit has been set, unused */ - IPCT_COUNTER_FILLING_BIT = 11, - IPCT_COUNTER_FILLING = (1 << IPCT_COUNTER_FILLING_BIT), - /* Mark is set */ - IPCT_MARK_BIT = 12, + IPCT_MARK_BIT = 6, IPCT_MARK = (1 << IPCT_MARK_BIT), /* NAT sequence adjustment */ - IPCT_NATSEQADJ_BIT = 13, + IPCT_NATSEQADJ_BIT = 7, IPCT_NATSEQADJ = (1 << IPCT_NATSEQADJ_BIT), /* Secmark is set */ - IPCT_SECMARK_BIT = 14, + IPCT_SECMARK_BIT = 8, IPCT_SECMARK = (1 << IPCT_SECMARK_BIT), }; diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 23b2c2ee869..c6ab3d99e79 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -91,7 +91,6 @@ static int icmp_packet(struct nf_conn *ct, nf_ct_kill_acct(ct, ctinfo, skb); } else { atomic_inc(&ct->proto.icmp.count); - nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct); nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout); } diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 9903227bf37..a0acd9655fe 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -104,7 +104,6 @@ static int icmpv6_packet(struct nf_conn *ct, nf_ct_kill_acct(ct, ctinfo, skb); } else { atomic_inc(&ct->proto.icmp.count); - nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct); nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmpv6_timeout); } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index f59c4edf905..b54c23475e9 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -398,11 +398,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) help = nfct_help(ct); if (help && help->helper) nf_conntrack_event_cache(IPCT_HELPER, ct); -#ifdef CONFIG_NF_NAT_NEEDED - if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || - test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_NATINFO, ct); -#endif + nf_conntrack_event_cache(master_ct(ct) ? IPCT_RELATED : IPCT_NEW, ct); return NF_ACCEPT; @@ -807,8 +803,6 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, unsigned long extra_jiffies, int do_acct) { - int event = 0; - NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct); NF_CT_ASSERT(skb); @@ -821,7 +815,6 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, /* If not in hash table, timer will not be active yet */ if (!nf_ct_is_confirmed(ct)) { ct->timeout.expires = extra_jiffies; - event = IPCT_REFRESH; } else { unsigned long newtime = jiffies + extra_jiffies; @@ -832,7 +825,6 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, && del_timer(&ct->timeout)) { ct->timeout.expires = newtime; add_timer(&ct->timeout); - event = IPCT_REFRESH; } } @@ -849,10 +841,6 @@ acct: } spin_unlock_bh(&nf_conntrack_lock); - - /* must be unlocked when calling event cache */ - if (event) - nf_conntrack_event_cache(event, ct); } EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct); diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 00fecc385f9..5509dd1f14c 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -338,11 +338,9 @@ static void update_nl_seq(struct nf_conn *ct, u32 nl_seq, if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) { info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq; - nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, ct); } else if (oldest != NUM_SEQ_TO_REMEMBER && after(nl_seq, info->seq_aft_nl[dir][oldest])) { info->seq_aft_nl[dir][oldest] = nl_seq; - nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, ct); } } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 3a20de1be63..b1b9e4fb7de 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -477,7 +477,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, type = IPCTNL_MSG_CT_NEW; flags = NLM_F_CREATE|NLM_F_EXCL; group = NFNLGRP_CONNTRACK_NEW; - } else if (events & (IPCT_STATUS | IPCT_PROTOINFO)) { + } else if (events) { type = IPCTNL_MSG_CT_NEW; group = NFNLGRP_CONNTRACK_UPDATE; } else diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 4c7f6f0dae9..b7e8a825efe 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -991,7 +991,6 @@ static int tcp_packet(struct nf_conn *ct, timeout = tcp_timeouts[new_state]; write_unlock_bh(&tcp_lock); - nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct); if (new_state != old_state) nf_conntrack_event_cache(IPCT_PROTOINFO, ct); -- cgit v1.2.3-70-g09d2 From 53e111a730ea8b002d57dd226098c12789993329 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 2 Jun 2009 17:01:58 +0200 Subject: x86: Fix atomic_long_xchg() on 64bit Apparently I'm the first to use it :-) Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- include/asm-generic/atomic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index 3673a13b670..81d3be459ef 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -134,7 +134,7 @@ static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u) #define atomic_long_cmpxchg(l, old, new) \ (atomic64_cmpxchg((atomic64_t *)(l), (old), (new))) #define atomic_long_xchg(v, new) \ - (atomic64_xchg((atomic64_t *)(l), (new))) + (atomic64_xchg((atomic64_t *)(v), (new))) #else /* BITS_PER_LONG == 64 */ -- cgit v1.2.3-70-g09d2 From 8e5799b1ad2a0567fdfaaf0e91b40efee010f2c1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 2 Jun 2009 15:08:15 +0200 Subject: perf_counter: Add unique counter id Stephan raised the issue that we currently cannot distinguish between similar counters within a group (PERF_RECORD_GROUP uses the config value as identifier). Therefore, generate a new ID for each counter using a global u64 sequence counter. Reported-by: Stephane Eranian Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: John Kacur LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 8 +++++--- kernel/perf_counter.c | 9 +++++++-- 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 9ec20fc6bd3..4845a214b9e 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -114,8 +114,9 @@ enum perf_counter_record_format { * in increasing order of bit value, after the counter value. */ enum perf_counter_read_format { - PERF_FORMAT_TOTAL_TIME_ENABLED = 1, - PERF_FORMAT_TOTAL_TIME_RUNNING = 2, + PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, + PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, + PERF_FORMAT_ID = 1U << 2, }; /* @@ -290,7 +291,7 @@ enum perf_event_type { * { u32 cpu, res; } && PERF_RECORD_CPU * * { u64 nr; - * { u64 event, val; } cnt[nr]; } && PERF_RECORD_GROUP + * { u64 id, val; } cnt[nr]; } && PERF_RECORD_GROUP * * { u16 nr, * hv, @@ -503,6 +504,7 @@ struct perf_counter { struct rcu_head rcu_head; struct pid_namespace *ns; + u64 id; #endif }; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index caa012cfe49..978ecfcc7aa 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1510,6 +1510,8 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) if (counter->hw_event.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) values[n++] = counter->total_time_running + atomic64_read(&counter->child_total_time_running); + if (counter->hw_event.read_format & PERF_FORMAT_ID) + values[n++] = counter->id; mutex_unlock(&counter->child_mutex); if (count < n * sizeof(u64)) @@ -2303,7 +2305,7 @@ static void perf_counter_output(struct perf_counter *counter, u32 pid, tid; } tid_entry; struct { - u64 event; + u64 id; u64 counter; } group_entry; struct perf_callchain_entry *callchain = NULL; @@ -2416,7 +2418,7 @@ static void perf_counter_output(struct perf_counter *counter, if (sub != counter) sub->pmu->read(sub); - group_entry.event = sub->hw_event.config; + group_entry.id = sub->id; group_entry.counter = atomic64_read(&sub->count); perf_output_put(&handle, group_entry); @@ -3375,6 +3377,8 @@ done: return counter; } +static atomic64_t perf_counter_id; + /** * sys_perf_counter_open - open a performance counter, associate it to a task/cpu * @@ -3470,6 +3474,7 @@ SYSCALL_DEFINE5(perf_counter_open, mutex_unlock(¤t->perf_counter_mutex); counter->ns = get_pid_ns(current->nsproxy->pid_ns); + counter->id = atomic64_inc_return(&perf_counter_id); fput_light(counter_file, fput_needed2); -- cgit v1.2.3-70-g09d2 From b23f3325ed465f1bd914384884269af0d106778c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 2 Jun 2009 15:13:03 +0200 Subject: perf_counter: Rename various fields A few renames: s/irq_period/sample_period/ s/irq_freq/sample_freq/ s/PERF_RECORD_/PERF_SAMPLE_/ s/record_type/sample_type/ And change both the new sample_type and read_format to u64. Reported-by: Stephane Eranian Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: John Kacur LKML-Reference: Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 12 ++--- arch/x86/kernel/cpu/perf_counter.c | 8 +-- include/linux/perf_counter.h | 32 ++++++------ kernel/perf_counter.c | 104 ++++++++++++++++++------------------- 4 files changed, 78 insertions(+), 78 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index f96d55f55bd..c9633321e7a 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -535,7 +535,7 @@ void hw_perf_enable(void) continue; } val = 0; - if (counter->hw.irq_period) { + if (counter->hw.sample_period) { left = atomic64_read(&counter->hw.period_left); if (left < 0x80000000L) val = 0x80000000L - left; @@ -749,12 +749,12 @@ static void power_pmu_unthrottle(struct perf_counter *counter) s64 val, left; unsigned long flags; - if (!counter->hw.idx || !counter->hw.irq_period) + if (!counter->hw.idx || !counter->hw.sample_period) return; local_irq_save(flags); perf_disable(); power_pmu_read(counter); - left = counter->hw.irq_period; + left = counter->hw.sample_period; val = 0; if (left < 0x80000000L) val = 0x80000000L - left; @@ -789,7 +789,7 @@ static int can_go_on_limited_pmc(struct perf_counter *counter, u64 ev, if (counter->hw_event.exclude_user || counter->hw_event.exclude_kernel || counter->hw_event.exclude_hv - || counter->hw_event.irq_period) + || counter->hw_event.sample_period) return 0; if (ppmu->limited_pmc_event(ev)) @@ -925,7 +925,7 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) counter->hw.config = events[n]; counter->hw.counter_base = cflags[n]; - atomic64_set(&counter->hw.period_left, counter->hw.irq_period); + atomic64_set(&counter->hw.period_left, counter->hw.sample_period); /* * See if we need to reserve the PMU. @@ -958,7 +958,7 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) static void record_and_restart(struct perf_counter *counter, long val, struct pt_regs *regs, int nmi) { - u64 period = counter->hw.irq_period; + u64 period = counter->hw.sample_period; s64 prev, delta, left; int record = 0; u64 addr, mmcra, sdsync; diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 316b0c995f3..ec06aa5e928 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -290,11 +290,11 @@ static int __hw_perf_counter_init(struct perf_counter *counter) hwc->nmi = 1; hw_event->nmi = 1; - if (!hwc->irq_period) - hwc->irq_period = x86_pmu.max_period; + if (!hwc->sample_period) + hwc->sample_period = x86_pmu.max_period; atomic64_set(&hwc->period_left, - min(x86_pmu.max_period, hwc->irq_period)); + min(x86_pmu.max_period, hwc->sample_period)); /* * Raw event type provide the config in the event structure @@ -462,7 +462,7 @@ x86_perf_counter_set_period(struct perf_counter *counter, struct hw_perf_counter *hwc, int idx) { s64 left = atomic64_read(&hwc->period_left); - s64 period = min(x86_pmu.max_period, hwc->irq_period); + s64 period = min(x86_pmu.max_period, hwc->sample_period); int err; /* diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 4845a214b9e..1fcd3cc9385 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -94,18 +94,18 @@ enum sw_event_ids { #define PERF_COUNTER_EVENT_MASK __PERF_COUNTER_MASK(EVENT) /* - * Bits that can be set in hw_event.record_type to request information + * Bits that can be set in hw_event.sample_type to request information * in the overflow packets. */ -enum perf_counter_record_format { - PERF_RECORD_IP = 1U << 0, - PERF_RECORD_TID = 1U << 1, - PERF_RECORD_TIME = 1U << 2, - PERF_RECORD_ADDR = 1U << 3, - PERF_RECORD_GROUP = 1U << 4, - PERF_RECORD_CALLCHAIN = 1U << 5, - PERF_RECORD_CONFIG = 1U << 6, - PERF_RECORD_CPU = 1U << 7, +enum perf_counter_sample_format { + PERF_SAMPLE_IP = 1U << 0, + PERF_SAMPLE_TID = 1U << 1, + PERF_SAMPLE_TIME = 1U << 2, + PERF_SAMPLE_ADDR = 1U << 3, + PERF_SAMPLE_GROUP = 1U << 4, + PERF_SAMPLE_CALLCHAIN = 1U << 5, + PERF_SAMPLE_CONFIG = 1U << 6, + PERF_SAMPLE_CPU = 1U << 7, }; /* @@ -132,12 +132,12 @@ struct perf_counter_hw_event { __u64 config; union { - __u64 irq_period; - __u64 irq_freq; + __u64 sample_period; + __u64 sample_freq; }; - __u32 record_type; - __u32 read_format; + __u64 sample_type; + __u64 read_format; __u64 disabled : 1, /* off by default */ nmi : 1, /* NMI sampling */ @@ -262,7 +262,7 @@ enum perf_event_type { * struct { * struct perf_event_header header; * u64 time; - * u64 irq_period; + * u64 sample_period; * }; */ PERF_EVENT_PERIOD = 4, @@ -363,7 +363,7 @@ struct hw_perf_counter { }; }; atomic64_t prev_count; - u64 irq_period; + u64 sample_period; atomic64_t period_left; u64 interrupts; #endif diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 978ecfcc7aa..5ecd9981c03 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1186,7 +1186,7 @@ static void perf_log_period(struct perf_counter *counter, u64 period); static void perf_adjust_freq(struct perf_counter_context *ctx) { struct perf_counter *counter; - u64 interrupts, irq_period; + u64 interrupts, sample_period; u64 events, period; s64 delta; @@ -1204,23 +1204,23 @@ static void perf_adjust_freq(struct perf_counter_context *ctx) interrupts = 2*sysctl_perf_counter_limit/HZ; } - if (!counter->hw_event.freq || !counter->hw_event.irq_freq) + if (!counter->hw_event.freq || !counter->hw_event.sample_freq) continue; - events = HZ * interrupts * counter->hw.irq_period; - period = div64_u64(events, counter->hw_event.irq_freq); + events = HZ * interrupts * counter->hw.sample_period; + period = div64_u64(events, counter->hw_event.sample_freq); - delta = (s64)(1 + period - counter->hw.irq_period); + delta = (s64)(1 + period - counter->hw.sample_period); delta >>= 1; - irq_period = counter->hw.irq_period + delta; + sample_period = counter->hw.sample_period + delta; - if (!irq_period) - irq_period = 1; + if (!sample_period) + sample_period = 1; - perf_log_period(counter, irq_period); + perf_log_period(counter, sample_period); - counter->hw.irq_period = irq_period; + counter->hw.sample_period = sample_period; } spin_unlock(&ctx->lock); } @@ -2297,7 +2297,7 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, struct pt_regs *regs, u64 addr) { int ret; - u64 record_type = counter->hw_event.record_type; + u64 sample_type = counter->hw_event.sample_type; struct perf_output_handle handle; struct perf_event_header header; u64 ip; @@ -2321,61 +2321,61 @@ static void perf_counter_output(struct perf_counter *counter, header.misc = PERF_EVENT_MISC_OVERFLOW; header.misc |= perf_misc_flags(regs); - if (record_type & PERF_RECORD_IP) { + if (sample_type & PERF_SAMPLE_IP) { ip = perf_instruction_pointer(regs); - header.type |= PERF_RECORD_IP; + header.type |= PERF_SAMPLE_IP; header.size += sizeof(ip); } - if (record_type & PERF_RECORD_TID) { + if (sample_type & PERF_SAMPLE_TID) { /* namespace issues */ tid_entry.pid = perf_counter_pid(counter, current); tid_entry.tid = perf_counter_tid(counter, current); - header.type |= PERF_RECORD_TID; + header.type |= PERF_SAMPLE_TID; header.size += sizeof(tid_entry); } - if (record_type & PERF_RECORD_TIME) { + if (sample_type & PERF_SAMPLE_TIME) { /* * Maybe do better on x86 and provide cpu_clock_nmi() */ time = sched_clock(); - header.type |= PERF_RECORD_TIME; + header.type |= PERF_SAMPLE_TIME; header.size += sizeof(u64); } - if (record_type & PERF_RECORD_ADDR) { - header.type |= PERF_RECORD_ADDR; + if (sample_type & PERF_SAMPLE_ADDR) { + header.type |= PERF_SAMPLE_ADDR; header.size += sizeof(u64); } - if (record_type & PERF_RECORD_CONFIG) { - header.type |= PERF_RECORD_CONFIG; + if (sample_type & PERF_SAMPLE_CONFIG) { + header.type |= PERF_SAMPLE_CONFIG; header.size += sizeof(u64); } - if (record_type & PERF_RECORD_CPU) { - header.type |= PERF_RECORD_CPU; + if (sample_type & PERF_SAMPLE_CPU) { + header.type |= PERF_SAMPLE_CPU; header.size += sizeof(cpu_entry); cpu_entry.cpu = raw_smp_processor_id(); } - if (record_type & PERF_RECORD_GROUP) { - header.type |= PERF_RECORD_GROUP; + if (sample_type & PERF_SAMPLE_GROUP) { + header.type |= PERF_SAMPLE_GROUP; header.size += sizeof(u64) + counter->nr_siblings * sizeof(group_entry); } - if (record_type & PERF_RECORD_CALLCHAIN) { + if (sample_type & PERF_SAMPLE_CALLCHAIN) { callchain = perf_callchain(regs); if (callchain) { callchain_size = (1 + callchain->nr) * sizeof(u64); - header.type |= PERF_RECORD_CALLCHAIN; + header.type |= PERF_SAMPLE_CALLCHAIN; header.size += callchain_size; } } @@ -2386,28 +2386,28 @@ static void perf_counter_output(struct perf_counter *counter, perf_output_put(&handle, header); - if (record_type & PERF_RECORD_IP) + if (sample_type & PERF_SAMPLE_IP) perf_output_put(&handle, ip); - if (record_type & PERF_RECORD_TID) + if (sample_type & PERF_SAMPLE_TID) perf_output_put(&handle, tid_entry); - if (record_type & PERF_RECORD_TIME) + if (sample_type & PERF_SAMPLE_TIME) perf_output_put(&handle, time); - if (record_type & PERF_RECORD_ADDR) + if (sample_type & PERF_SAMPLE_ADDR) perf_output_put(&handle, addr); - if (record_type & PERF_RECORD_CONFIG) + if (sample_type & PERF_SAMPLE_CONFIG) perf_output_put(&handle, counter->hw_event.config); - if (record_type & PERF_RECORD_CPU) + if (sample_type & PERF_SAMPLE_CPU) perf_output_put(&handle, cpu_entry); /* - * XXX PERF_RECORD_GROUP vs inherited counters seems difficult. + * XXX PERF_SAMPLE_GROUP vs inherited counters seems difficult. */ - if (record_type & PERF_RECORD_GROUP) { + if (sample_type & PERF_SAMPLE_GROUP) { struct perf_counter *leader, *sub; u64 nr = counter->nr_siblings; @@ -2702,7 +2702,7 @@ void perf_counter_munmap(unsigned long addr, unsigned long len, } /* - * Log irq_period changes so that analyzing tools can re-normalize the + * Log sample_period changes so that analyzing tools can re-normalize the * event flow. */ @@ -2725,7 +2725,7 @@ static void perf_log_period(struct perf_counter *counter, u64 period) .period = period, }; - if (counter->hw.irq_period == period) + if (counter->hw.sample_period == period) return; ret = perf_output_begin(&handle, counter, sizeof(freq_event), 0, 0); @@ -2834,7 +2834,7 @@ static void perf_swcounter_set_period(struct perf_counter *counter) { struct hw_perf_counter *hwc = &counter->hw; s64 left = atomic64_read(&hwc->period_left); - s64 period = hwc->irq_period; + s64 period = hwc->sample_period; if (unlikely(left <= -period)) { left = period; @@ -2874,7 +2874,7 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) ret = HRTIMER_NORESTART; } - period = max_t(u64, 10000, counter->hw.irq_period); + period = max_t(u64, 10000, counter->hw.sample_period); hrtimer_forward_now(hrtimer, ns_to_ktime(period)); return ret; @@ -2959,7 +2959,7 @@ static void perf_swcounter_add(struct perf_counter *counter, u64 nr, { int neg = atomic64_add_negative(nr, &counter->hw.count); - if (counter->hw.irq_period && !neg && regs) + if (counter->hw.sample_period && !neg && regs) perf_swcounter_overflow(counter, nmi, regs, addr); } @@ -3080,8 +3080,8 @@ static int cpu_clock_perf_counter_enable(struct perf_counter *counter) atomic64_set(&hwc->prev_count, cpu_clock(cpu)); hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hwc->hrtimer.function = perf_swcounter_hrtimer; - if (hwc->irq_period) { - u64 period = max_t(u64, 10000, hwc->irq_period); + if (hwc->sample_period) { + u64 period = max_t(u64, 10000, hwc->sample_period); __hrtimer_start_range_ns(&hwc->hrtimer, ns_to_ktime(period), 0, HRTIMER_MODE_REL, 0); @@ -3092,7 +3092,7 @@ static int cpu_clock_perf_counter_enable(struct perf_counter *counter) static void cpu_clock_perf_counter_disable(struct perf_counter *counter) { - if (counter->hw.irq_period) + if (counter->hw.sample_period) hrtimer_cancel(&counter->hw.hrtimer); cpu_clock_perf_counter_update(counter); } @@ -3132,8 +3132,8 @@ static int task_clock_perf_counter_enable(struct perf_counter *counter) atomic64_set(&hwc->prev_count, now); hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hwc->hrtimer.function = perf_swcounter_hrtimer; - if (hwc->irq_period) { - u64 period = max_t(u64, 10000, hwc->irq_period); + if (hwc->sample_period) { + u64 period = max_t(u64, 10000, hwc->sample_period); __hrtimer_start_range_ns(&hwc->hrtimer, ns_to_ktime(period), 0, HRTIMER_MODE_REL, 0); @@ -3144,7 +3144,7 @@ static int task_clock_perf_counter_enable(struct perf_counter *counter) static void task_clock_perf_counter_disable(struct perf_counter *counter) { - if (counter->hw.irq_period) + if (counter->hw.sample_period) hrtimer_cancel(&counter->hw.hrtimer); task_clock_perf_counter_update(counter, counter->ctx->time); @@ -3223,7 +3223,7 @@ static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) return NULL; counter->destroy = tp_perf_counter_destroy; - counter->hw.irq_period = counter->hw_event.irq_period; + counter->hw.sample_period = counter->hw_event.sample_period; return &perf_ops_generic; } @@ -3323,15 +3323,15 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, pmu = NULL; hwc = &counter->hw; - if (hw_event->freq && hw_event->irq_freq) - hwc->irq_period = div64_u64(TICK_NSEC, hw_event->irq_freq); + if (hw_event->freq && hw_event->sample_freq) + hwc->sample_period = div64_u64(TICK_NSEC, hw_event->sample_freq); else - hwc->irq_period = hw_event->irq_period; + hwc->sample_period = hw_event->sample_period; /* - * we currently do not support PERF_RECORD_GROUP on inherited counters + * we currently do not support PERF_SAMPLE_GROUP on inherited counters */ - if (hw_event->inherit && (hw_event->record_type & PERF_RECORD_GROUP)) + if (hw_event->inherit && (hw_event->sample_type & PERF_SAMPLE_GROUP)) goto done; if (perf_event_raw(hw_event)) { -- cgit v1.2.3-70-g09d2 From 8a016db386195b193e2a8aeddff9fe937dcb7a40 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 2 Jun 2009 15:27:45 +0200 Subject: perf_counter: Remove the last nmi/irq bits IRQ (non-NMI) sampling is not used anymore - remove the last few bits. Signed-off-by: Peter Zijlstra Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: John Kacur LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 6 ------ include/linux/perf_counter.h | 4 +--- 2 files changed, 1 insertion(+), 9 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index ec06aa5e928..9e144fbebd2 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -284,12 +284,6 @@ static int __hw_perf_counter_init(struct perf_counter *counter) if (!hw_event->exclude_kernel) hwc->config |= ARCH_PERFMON_EVENTSEL_OS; - /* - * Use NMI events all the time: - */ - hwc->nmi = 1; - hw_event->nmi = 1; - if (!hwc->sample_period) hwc->sample_period = x86_pmu.max_period; diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 1fcd3cc9385..cef9931793f 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -140,7 +140,6 @@ struct perf_counter_hw_event { __u64 read_format; __u64 disabled : 1, /* off by default */ - nmi : 1, /* NMI sampling */ inherit : 1, /* children inherit it */ pinned : 1, /* must always be on PMU */ exclusive : 1, /* only group on PMU */ @@ -153,7 +152,7 @@ struct perf_counter_hw_event { comm : 1, /* include comm data */ freq : 1, /* use freq, not period */ - __reserved_1 : 51; + __reserved_1 : 52; __u32 wakeup_events; /* wakeup every n events */ __u32 __reserved_2; @@ -354,7 +353,6 @@ struct hw_perf_counter { u64 config; unsigned long config_base; unsigned long counter_base; - int nmi; int idx; }; union { /* software */ -- cgit v1.2.3-70-g09d2 From 8e3747c13c39246c7e46def7cf495d9d21d4c5f9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 2 Jun 2009 16:16:02 +0200 Subject: perf_counter: Change data head from u32 to u64 Since some people worried that 4G might not be a large enough as an mmap data window, extend it to 64 bit for capable platforms. Reported-by: Stephane Eranian Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 7 ++++--- kernel/perf_counter.c | 15 ++++++++------- 2 files changed, 12 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index cef9931793f..c046f7d97cf 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -212,7 +212,7 @@ struct perf_counter_mmap_page { * User-space reading this value should issue an rmb(), on SMP capable * platforms, after reading this value -- see perf_counter_wakeup(). */ - __u32 data_head; /* head in the data section */ + __u64 data_head; /* head in the data section */ }; #define PERF_EVENT_MISC_CPUMODE_MASK (3 << 0) @@ -397,10 +397,11 @@ struct perf_mmap_data { int nr_locked; /* nr pages mlocked */ atomic_t poll; /* POLL_ for wakeups */ - atomic_t head; /* write position */ atomic_t events; /* event limit */ - atomic_t done_head; /* completed head */ + atomic_long_t head; /* write position */ + atomic_long_t done_head; /* completed head */ + atomic_t lock; /* concurrent writes */ atomic_t wakeup; /* needs a wakeup */ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 5ecd9981c03..3f11a2bc6c7 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -2067,8 +2067,8 @@ __weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) struct perf_output_handle { struct perf_counter *counter; struct perf_mmap_data *data; - unsigned int offset; - unsigned int head; + unsigned long head; + unsigned long offset; int nmi; int overflow; int locked; @@ -2122,7 +2122,8 @@ static void perf_output_lock(struct perf_output_handle *handle) static void perf_output_unlock(struct perf_output_handle *handle) { struct perf_mmap_data *data = handle->data; - int head, cpu; + unsigned long head; + int cpu; data->done_head = data->head; @@ -2135,7 +2136,7 @@ again: * before we publish the new head, matched by a rmb() in userspace when * reading this position. */ - while ((head = atomic_xchg(&data->done_head, 0))) + while ((head = atomic_long_xchg(&data->done_head, 0))) data->user_page->data_head = head; /* @@ -2148,7 +2149,7 @@ again: /* * Therefore we have to validate we did not indeed do so. */ - if (unlikely(atomic_read(&data->done_head))) { + if (unlikely(atomic_long_read(&data->done_head))) { /* * Since we had it locked, we can lock it again. */ @@ -2195,7 +2196,7 @@ static int perf_output_begin(struct perf_output_handle *handle, do { offset = head = atomic_read(&data->head); head += size; - } while (atomic_cmpxchg(&data->head, offset, head) != offset); + } while (atomic_long_cmpxchg(&data->head, offset, head) != offset); handle->offset = offset; handle->head = head; @@ -2246,7 +2247,7 @@ static void perf_output_copy(struct perf_output_handle *handle, * Check we didn't copy past our reservation window, taking the * possible unsigned int wrap into account. */ - WARN_ON_ONCE(((int)(handle->head - handle->offset)) < 0); + WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0); } #define perf_output_put(handle, x) \ -- cgit v1.2.3-70-g09d2 From 08247e31ca79b8f02cce47b7e8120797a8726606 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 2 Jun 2009 16:46:57 +0200 Subject: perf_counter: Add ioctl for changing the sample period/frequency Reported-by: Stephane Eranian Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: John Kacur LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 9 +++++---- kernel/perf_counter.c | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index c046f7d97cf..45bdd3b95d3 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -164,10 +164,11 @@ struct perf_counter_hw_event { /* * Ioctls that can be done on a perf counter fd: */ -#define PERF_COUNTER_IOC_ENABLE _IOW('$', 0, u32) -#define PERF_COUNTER_IOC_DISABLE _IOW('$', 1, u32) -#define PERF_COUNTER_IOC_REFRESH _IOW('$', 2, u32) -#define PERF_COUNTER_IOC_RESET _IOW('$', 3, u32) +#define PERF_COUNTER_IOC_ENABLE _IO ('$', 0) +#define PERF_COUNTER_IOC_DISABLE _IO ('$', 1) +#define PERF_COUNTER_IOC_REFRESH _IO ('$', 2) +#define PERF_COUNTER_IOC_RESET _IO ('$', 3) +#define PERF_COUNTER_IOC_PERIOD _IOW('$', 4, u64) enum perf_counter_ioc_flags { PERF_IOC_FLAG_GROUP = 1U << 0, diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 3f11a2bc6c7..abe2f3b6c42 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1604,6 +1604,43 @@ static void perf_counter_for_each(struct perf_counter *counter, mutex_unlock(&counter->child_mutex); } +static int perf_counter_period(struct perf_counter *counter, u64 __user *arg) +{ + struct perf_counter_context *ctx = counter->ctx; + unsigned long size; + int ret = 0; + u64 value; + + if (!counter->hw_event.sample_period) + return -EINVAL; + + size = copy_from_user(&value, arg, sizeof(value)); + if (size != sizeof(value)) + return -EFAULT; + + if (!value) + return -EINVAL; + + spin_lock_irq(&ctx->lock); + if (counter->hw_event.freq) { + if (value > sysctl_perf_counter_limit) { + ret = -EINVAL; + goto unlock; + } + + counter->hw_event.sample_freq = value; + } else { + counter->hw_event.sample_period = value; + counter->hw.sample_period = value; + + perf_log_period(counter, value); + } +unlock: + spin_unlock_irq(&ctx->lock); + + return ret; +} + static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct perf_counter *counter = file->private_data; @@ -1623,6 +1660,10 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case PERF_COUNTER_IOC_REFRESH: return perf_counter_refresh(counter, arg); + + case PERF_COUNTER_IOC_PERIOD: + return perf_counter_period(counter, (u64 __user *)arg); + default: return -ENOTTY; } -- cgit v1.2.3-70-g09d2 From 0d48696f87e3618b0d35bd3e4e9d7c188d51e7de Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 2 Jun 2009 19:22:16 +0200 Subject: perf_counter: Rename perf_counter_hw_event => perf_counter_attr The structure isn't hw only and when I read event, I think about those things that fall out the other end. Rename the thing. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: John Kacur Cc: Stephane Eranian LKML-Reference: Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 38 ++++++------ arch/x86/kernel/cpu/perf_counter.c | 16 ++--- include/linux/perf_counter.h | 34 +++++------ include/linux/syscalls.h | 4 +- kernel/perf_counter.c | 116 ++++++++++++++++++------------------- 5 files changed, 104 insertions(+), 104 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index c9633321e7a..ea54686cb78 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -262,13 +262,13 @@ static int check_excludes(struct perf_counter **ctrs, unsigned int cflags[], } counter = ctrs[i]; if (first) { - eu = counter->hw_event.exclude_user; - ek = counter->hw_event.exclude_kernel; - eh = counter->hw_event.exclude_hv; + eu = counter->attr.exclude_user; + ek = counter->attr.exclude_kernel; + eh = counter->attr.exclude_hv; first = 0; - } else if (counter->hw_event.exclude_user != eu || - counter->hw_event.exclude_kernel != ek || - counter->hw_event.exclude_hv != eh) { + } else if (counter->attr.exclude_user != eu || + counter->attr.exclude_kernel != ek || + counter->attr.exclude_hv != eh) { return -EAGAIN; } } @@ -483,16 +483,16 @@ void hw_perf_enable(void) /* * Add in MMCR0 freeze bits corresponding to the - * hw_event.exclude_* bits for the first counter. + * attr.exclude_* bits for the first counter. * We have already checked that all counters have the * same values for these bits as the first counter. */ counter = cpuhw->counter[0]; - if (counter->hw_event.exclude_user) + if (counter->attr.exclude_user) cpuhw->mmcr[0] |= MMCR0_FCP; - if (counter->hw_event.exclude_kernel) + if (counter->attr.exclude_kernel) cpuhw->mmcr[0] |= freeze_counters_kernel; - if (counter->hw_event.exclude_hv) + if (counter->attr.exclude_hv) cpuhw->mmcr[0] |= MMCR0_FCHV; /* @@ -786,10 +786,10 @@ static int can_go_on_limited_pmc(struct perf_counter *counter, u64 ev, int n; u64 alt[MAX_EVENT_ALTERNATIVES]; - if (counter->hw_event.exclude_user - || counter->hw_event.exclude_kernel - || counter->hw_event.exclude_hv - || counter->hw_event.sample_period) + if (counter->attr.exclude_user + || counter->attr.exclude_kernel + || counter->attr.exclude_hv + || counter->attr.sample_period) return 0; if (ppmu->limited_pmc_event(ev)) @@ -855,13 +855,13 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) if (!ppmu) return ERR_PTR(-ENXIO); - if (!perf_event_raw(&counter->hw_event)) { - ev = perf_event_id(&counter->hw_event); + if (!perf_event_raw(&counter->attr)) { + ev = perf_event_id(&counter->attr); if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) return ERR_PTR(-EOPNOTSUPP); ev = ppmu->generic_events[ev]; } else { - ev = perf_event_config(&counter->hw_event); + ev = perf_event_config(&counter->attr); } counter->hw.config_base = ev; counter->hw.idx = 0; @@ -872,7 +872,7 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) * the user set it to. */ if (!firmware_has_feature(FW_FEATURE_LPAR)) - counter->hw_event.exclude_hv = 0; + counter->attr.exclude_hv = 0; /* * If this is a per-task counter, then we can use @@ -990,7 +990,7 @@ static void record_and_restart(struct perf_counter *counter, long val, */ if (record) { addr = 0; - if (counter->hw_event.record_type & PERF_RECORD_ADDR) { + if (counter->attr.record_type & PERF_RECORD_ADDR) { /* * The user wants a data address recorded. * If we're not doing instruction sampling, diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 904571bea71..e16e8c13132 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -247,11 +247,11 @@ static inline int x86_pmu_initialized(void) } /* - * Setup the hardware configuration for a given hw_event_type + * Setup the hardware configuration for a given attr_type */ static int __hw_perf_counter_init(struct perf_counter *counter) { - struct perf_counter_hw_event *hw_event = &counter->hw_event; + struct perf_counter_attr *attr = &counter->attr; struct hw_perf_counter *hwc = &counter->hw; int err; @@ -279,9 +279,9 @@ static int __hw_perf_counter_init(struct perf_counter *counter) /* * Count user and OS events unless requested not to. */ - if (!hw_event->exclude_user) + if (!attr->exclude_user) hwc->config |= ARCH_PERFMON_EVENTSEL_USR; - if (!hw_event->exclude_kernel) + if (!attr->exclude_kernel) hwc->config |= ARCH_PERFMON_EVENTSEL_OS; if (!hwc->sample_period) @@ -292,15 +292,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter) /* * Raw event type provide the config in the event structure */ - if (perf_event_raw(hw_event)) { - hwc->config |= x86_pmu.raw_event(perf_event_config(hw_event)); + if (perf_event_raw(attr)) { + hwc->config |= x86_pmu.raw_event(perf_event_config(attr)); } else { - if (perf_event_id(hw_event) >= x86_pmu.max_events) + if (perf_event_id(attr) >= x86_pmu.max_events) return -EINVAL; /* * The generic map: */ - hwc->config |= x86_pmu.event_map(perf_event_id(hw_event)); + hwc->config |= x86_pmu.event_map(perf_event_id(attr)); } counter->destroy = hw_perf_counter_destroy; diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 45bdd3b95d3..37d5541d74c 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -22,7 +22,7 @@ */ /* - * hw_event.type + * attr.type */ enum perf_event_types { PERF_TYPE_HARDWARE = 0, @@ -37,10 +37,10 @@ enum perf_event_types { }; /* - * Generalized performance counter event types, used by the hw_event.event_id + * Generalized performance counter event types, used by the attr.event_id * parameter of the sys_perf_counter_open() syscall: */ -enum hw_event_ids { +enum attr_ids { /* * Common hardware events, generalized by the kernel: */ @@ -94,7 +94,7 @@ enum sw_event_ids { #define PERF_COUNTER_EVENT_MASK __PERF_COUNTER_MASK(EVENT) /* - * Bits that can be set in hw_event.sample_type to request information + * Bits that can be set in attr.sample_type to request information * in the overflow packets. */ enum perf_counter_sample_format { @@ -109,7 +109,7 @@ enum perf_counter_sample_format { }; /* - * Bits that can be set in hw_event.read_format to request that + * Bits that can be set in attr.read_format to request that * reads on the counter should return the indicated quantities, * in increasing order of bit value, after the counter value. */ @@ -122,7 +122,7 @@ enum perf_counter_read_format { /* * Hardware event to monitor via a performance monitoring counter: */ -struct perf_counter_hw_event { +struct perf_counter_attr { /* * The MSB of the config word signifies if the rest contains cpu * specific (raw) counter configuration data, if unset, the next @@ -323,25 +323,25 @@ enum perf_event_type { struct task_struct; -static inline u64 perf_event_raw(struct perf_counter_hw_event *hw_event) +static inline u64 perf_event_raw(struct perf_counter_attr *attr) { - return hw_event->config & PERF_COUNTER_RAW_MASK; + return attr->config & PERF_COUNTER_RAW_MASK; } -static inline u64 perf_event_config(struct perf_counter_hw_event *hw_event) +static inline u64 perf_event_config(struct perf_counter_attr *attr) { - return hw_event->config & PERF_COUNTER_CONFIG_MASK; + return attr->config & PERF_COUNTER_CONFIG_MASK; } -static inline u64 perf_event_type(struct perf_counter_hw_event *hw_event) +static inline u64 perf_event_type(struct perf_counter_attr *attr) { - return (hw_event->config & PERF_COUNTER_TYPE_MASK) >> + return (attr->config & PERF_COUNTER_TYPE_MASK) >> PERF_COUNTER_TYPE_SHIFT; } -static inline u64 perf_event_id(struct perf_counter_hw_event *hw_event) +static inline u64 perf_event_id(struct perf_counter_attr *attr) { - return hw_event->config & PERF_COUNTER_EVENT_MASK; + return attr->config & PERF_COUNTER_EVENT_MASK; } /** @@ -457,7 +457,7 @@ struct perf_counter { u64 tstamp_running; u64 tstamp_stopped; - struct perf_counter_hw_event hw_event; + struct perf_counter_attr attr; struct hw_perf_counter hw; struct perf_counter_context *ctx; @@ -605,8 +605,8 @@ extern int perf_counter_overflow(struct perf_counter *counter, */ static inline int is_software_counter(struct perf_counter *counter) { - return !perf_event_raw(&counter->hw_event) && - perf_event_type(&counter->hw_event) != PERF_TYPE_HARDWARE; + return !perf_event_raw(&counter->attr) && + perf_event_type(&counter->attr) != PERF_TYPE_HARDWARE; } extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 79faae950e2..c6c84ad8bd7 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -55,7 +55,7 @@ struct compat_timeval; struct robust_list_head; struct getcpu_cache; struct old_linux_dirent; -struct perf_counter_hw_event; +struct perf_counter_attr; #include #include @@ -758,6 +758,6 @@ int kernel_execve(const char *filename, char *const argv[], char *const envp[]); asmlinkage long sys_perf_counter_open( - const struct perf_counter_hw_event __user *hw_event_uptr, + const struct perf_counter_attr __user *attr_uptr, pid_t pid, int cpu, int group_fd, unsigned long flags); #endif diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index abe2f3b6c42..317cef78a38 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -260,7 +260,7 @@ counter_sched_out(struct perf_counter *counter, if (!is_software_counter(counter)) cpuctx->active_oncpu--; ctx->nr_active--; - if (counter->hw_event.exclusive || !cpuctx->active_oncpu) + if (counter->attr.exclusive || !cpuctx->active_oncpu) cpuctx->exclusive = 0; } @@ -282,7 +282,7 @@ group_sched_out(struct perf_counter *group_counter, list_for_each_entry(counter, &group_counter->sibling_list, list_entry) counter_sched_out(counter, cpuctx, ctx); - if (group_counter->hw_event.exclusive) + if (group_counter->attr.exclusive) cpuctx->exclusive = 0; } @@ -550,7 +550,7 @@ counter_sched_in(struct perf_counter *counter, cpuctx->active_oncpu++; ctx->nr_active++; - if (counter->hw_event.exclusive) + if (counter->attr.exclusive) cpuctx->exclusive = 1; return 0; @@ -642,7 +642,7 @@ static int group_can_go_on(struct perf_counter *counter, * If this group is exclusive and there are already * counters on the CPU, it can't go on. */ - if (counter->hw_event.exclusive && cpuctx->active_oncpu) + if (counter->attr.exclusive && cpuctx->active_oncpu) return 0; /* * Otherwise, try to add it if all previous groups were able @@ -725,7 +725,7 @@ static void __perf_install_in_context(void *info) */ if (leader != counter) group_sched_out(leader, cpuctx, ctx); - if (leader->hw_event.pinned) { + if (leader->attr.pinned) { update_group_times(leader); leader->state = PERF_COUNTER_STATE_ERROR; } @@ -849,7 +849,7 @@ static void __perf_counter_enable(void *info) */ if (leader != counter) group_sched_out(leader, cpuctx, ctx); - if (leader->hw_event.pinned) { + if (leader->attr.pinned) { update_group_times(leader); leader->state = PERF_COUNTER_STATE_ERROR; } @@ -927,7 +927,7 @@ static int perf_counter_refresh(struct perf_counter *counter, int refresh) /* * not supported on inherited counters */ - if (counter->hw_event.inherit) + if (counter->attr.inherit) return -EINVAL; atomic_add(refresh, &counter->event_limit); @@ -1094,7 +1094,7 @@ __perf_counter_sched_in(struct perf_counter_context *ctx, */ list_for_each_entry(counter, &ctx->counter_list, list_entry) { if (counter->state <= PERF_COUNTER_STATE_OFF || - !counter->hw_event.pinned) + !counter->attr.pinned) continue; if (counter->cpu != -1 && counter->cpu != cpu) continue; @@ -1122,7 +1122,7 @@ __perf_counter_sched_in(struct perf_counter_context *ctx, * ignore pinned counters since we did them already. */ if (counter->state <= PERF_COUNTER_STATE_OFF || - counter->hw_event.pinned) + counter->attr.pinned) continue; /* @@ -1204,11 +1204,11 @@ static void perf_adjust_freq(struct perf_counter_context *ctx) interrupts = 2*sysctl_perf_counter_limit/HZ; } - if (!counter->hw_event.freq || !counter->hw_event.sample_freq) + if (!counter->attr.freq || !counter->attr.sample_freq) continue; events = HZ * interrupts * counter->hw.sample_period; - period = div64_u64(events, counter->hw_event.sample_freq); + period = div64_u64(events, counter->attr.sample_freq); delta = (s64)(1 + period - counter->hw.sample_period); delta >>= 1; @@ -1444,11 +1444,11 @@ static void free_counter(struct perf_counter *counter) perf_pending_sync(counter); atomic_dec(&nr_counters); - if (counter->hw_event.mmap) + if (counter->attr.mmap) atomic_dec(&nr_mmap_tracking); - if (counter->hw_event.munmap) + if (counter->attr.munmap) atomic_dec(&nr_munmap_tracking); - if (counter->hw_event.comm) + if (counter->attr.comm) atomic_dec(&nr_comm_tracking); if (counter->destroy) @@ -1504,13 +1504,13 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) mutex_lock(&counter->child_mutex); values[0] = perf_counter_read(counter); n = 1; - if (counter->hw_event.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) values[n++] = counter->total_time_enabled + atomic64_read(&counter->child_total_time_enabled); - if (counter->hw_event.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) values[n++] = counter->total_time_running + atomic64_read(&counter->child_total_time_running); - if (counter->hw_event.read_format & PERF_FORMAT_ID) + if (counter->attr.read_format & PERF_FORMAT_ID) values[n++] = counter->id; mutex_unlock(&counter->child_mutex); @@ -1611,7 +1611,7 @@ static int perf_counter_period(struct perf_counter *counter, u64 __user *arg) int ret = 0; u64 value; - if (!counter->hw_event.sample_period) + if (!counter->attr.sample_period) return -EINVAL; size = copy_from_user(&value, arg, sizeof(value)); @@ -1622,15 +1622,15 @@ static int perf_counter_period(struct perf_counter *counter, u64 __user *arg) return -EINVAL; spin_lock_irq(&ctx->lock); - if (counter->hw_event.freq) { + if (counter->attr.freq) { if (value > sysctl_perf_counter_limit) { ret = -EINVAL; goto unlock; } - counter->hw_event.sample_freq = value; + counter->attr.sample_freq = value; } else { - counter->hw_event.sample_period = value; + counter->attr.sample_period = value; counter->hw.sample_period = value; perf_log_period(counter, value); @@ -2299,7 +2299,7 @@ static void perf_output_end(struct perf_output_handle *handle) struct perf_counter *counter = handle->counter; struct perf_mmap_data *data = handle->data; - int wakeup_events = counter->hw_event.wakeup_events; + int wakeup_events = counter->attr.wakeup_events; if (handle->overflow && wakeup_events) { int events = atomic_inc_return(&data->events); @@ -2339,7 +2339,7 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, struct pt_regs *regs, u64 addr) { int ret; - u64 sample_type = counter->hw_event.sample_type; + u64 sample_type = counter->attr.sample_type; struct perf_output_handle handle; struct perf_event_header header; u64 ip; @@ -2441,7 +2441,7 @@ static void perf_counter_output(struct perf_counter *counter, perf_output_put(&handle, addr); if (sample_type & PERF_SAMPLE_CONFIG) - perf_output_put(&handle, counter->hw_event.config); + perf_output_put(&handle, counter->attr.config); if (sample_type & PERF_SAMPLE_CPU) perf_output_put(&handle, cpu_entry); @@ -2512,7 +2512,7 @@ static void perf_counter_comm_output(struct perf_counter *counter, static int perf_counter_comm_match(struct perf_counter *counter, struct perf_comm_event *comm_event) { - if (counter->hw_event.comm && + if (counter->attr.comm && comm_event->event.header.type == PERF_EVENT_COMM) return 1; @@ -2623,11 +2623,11 @@ static void perf_counter_mmap_output(struct perf_counter *counter, static int perf_counter_mmap_match(struct perf_counter *counter, struct perf_mmap_event *mmap_event) { - if (counter->hw_event.mmap && + if (counter->attr.mmap && mmap_event->event.header.type == PERF_EVENT_MMAP) return 1; - if (counter->hw_event.munmap && + if (counter->attr.munmap && mmap_event->event.header.type == PERF_EVENT_MUNMAP) return 1; @@ -2907,8 +2907,8 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) * In case we exclude kernel IPs or are somehow not in interrupt * context, provide the next best thing, the user IP. */ - if ((counter->hw_event.exclude_kernel || !regs) && - !counter->hw_event.exclude_user) + if ((counter->attr.exclude_kernel || !regs) && + !counter->attr.exclude_user) regs = task_pt_regs(current); if (regs) { @@ -2982,14 +2982,14 @@ static int perf_swcounter_match(struct perf_counter *counter, if (!perf_swcounter_is_counting(counter)) return 0; - if (counter->hw_event.config != event_config) + if (counter->attr.config != event_config) return 0; if (regs) { - if (counter->hw_event.exclude_user && user_mode(regs)) + if (counter->attr.exclude_user && user_mode(regs)) return 0; - if (counter->hw_event.exclude_kernel && !user_mode(regs)) + if (counter->attr.exclude_kernel && !user_mode(regs)) return 0; } @@ -3252,12 +3252,12 @@ extern void ftrace_profile_disable(int); static void tp_perf_counter_destroy(struct perf_counter *counter) { - ftrace_profile_disable(perf_event_id(&counter->hw_event)); + ftrace_profile_disable(perf_event_id(&counter->attr)); } static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) { - int event_id = perf_event_id(&counter->hw_event); + int event_id = perf_event_id(&counter->attr); int ret; ret = ftrace_profile_enable(event_id); @@ -3265,7 +3265,7 @@ static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) return NULL; counter->destroy = tp_perf_counter_destroy; - counter->hw.sample_period = counter->hw_event.sample_period; + counter->hw.sample_period = counter->attr.sample_period; return &perf_ops_generic; } @@ -3287,7 +3287,7 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) * to be kernel events, and page faults are never hypervisor * events. */ - switch (perf_event_id(&counter->hw_event)) { + switch (perf_event_id(&counter->attr)) { case PERF_COUNT_CPU_CLOCK: pmu = &perf_ops_cpu_clock; @@ -3319,7 +3319,7 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) * Allocate and initialize a counter structure */ static struct perf_counter * -perf_counter_alloc(struct perf_counter_hw_event *hw_event, +perf_counter_alloc(struct perf_counter_attr *attr, int cpu, struct perf_counter_context *ctx, struct perf_counter *group_leader, @@ -3352,36 +3352,36 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, mutex_init(&counter->mmap_mutex); counter->cpu = cpu; - counter->hw_event = *hw_event; + counter->attr = *attr; counter->group_leader = group_leader; counter->pmu = NULL; counter->ctx = ctx; counter->oncpu = -1; counter->state = PERF_COUNTER_STATE_INACTIVE; - if (hw_event->disabled) + if (attr->disabled) counter->state = PERF_COUNTER_STATE_OFF; pmu = NULL; hwc = &counter->hw; - if (hw_event->freq && hw_event->sample_freq) - hwc->sample_period = div64_u64(TICK_NSEC, hw_event->sample_freq); + if (attr->freq && attr->sample_freq) + hwc->sample_period = div64_u64(TICK_NSEC, attr->sample_freq); else - hwc->sample_period = hw_event->sample_period; + hwc->sample_period = attr->sample_period; /* * we currently do not support PERF_SAMPLE_GROUP on inherited counters */ - if (hw_event->inherit && (hw_event->sample_type & PERF_SAMPLE_GROUP)) + if (attr->inherit && (attr->sample_type & PERF_SAMPLE_GROUP)) goto done; - if (perf_event_raw(hw_event)) { + if (perf_event_raw(attr)) { pmu = hw_perf_counter_init(counter); goto done; } - switch (perf_event_type(hw_event)) { + switch (perf_event_type(attr)) { case PERF_TYPE_HARDWARE: pmu = hw_perf_counter_init(counter); break; @@ -3409,11 +3409,11 @@ done: counter->pmu = pmu; atomic_inc(&nr_counters); - if (counter->hw_event.mmap) + if (counter->attr.mmap) atomic_inc(&nr_mmap_tracking); - if (counter->hw_event.munmap) + if (counter->attr.munmap) atomic_inc(&nr_munmap_tracking); - if (counter->hw_event.comm) + if (counter->attr.comm) atomic_inc(&nr_comm_tracking); return counter; @@ -3424,17 +3424,17 @@ static atomic64_t perf_counter_id; /** * sys_perf_counter_open - open a performance counter, associate it to a task/cpu * - * @hw_event_uptr: event type attributes for monitoring/sampling + * @attr_uptr: event type attributes for monitoring/sampling * @pid: target pid * @cpu: target cpu * @group_fd: group leader counter fd */ SYSCALL_DEFINE5(perf_counter_open, - const struct perf_counter_hw_event __user *, hw_event_uptr, + const struct perf_counter_attr __user *, attr_uptr, pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) { struct perf_counter *counter, *group_leader; - struct perf_counter_hw_event hw_event; + struct perf_counter_attr attr; struct perf_counter_context *ctx; struct file *counter_file = NULL; struct file *group_file = NULL; @@ -3446,7 +3446,7 @@ SYSCALL_DEFINE5(perf_counter_open, if (flags) return -EINVAL; - if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0) + if (copy_from_user(&attr, attr_uptr, sizeof(attr)) != 0) return -EFAULT; /* @@ -3484,11 +3484,11 @@ SYSCALL_DEFINE5(perf_counter_open, /* * Only a group leader can be exclusive or pinned */ - if (hw_event.exclusive || hw_event.pinned) + if (attr.exclusive || attr.pinned) goto err_put_context; } - counter = perf_counter_alloc(&hw_event, cpu, ctx, group_leader, + counter = perf_counter_alloc(&attr, cpu, ctx, group_leader, GFP_KERNEL); ret = PTR_ERR(counter); if (IS_ERR(counter)) @@ -3556,7 +3556,7 @@ inherit_counter(struct perf_counter *parent_counter, if (parent_counter->parent) parent_counter = parent_counter->parent; - child_counter = perf_counter_alloc(&parent_counter->hw_event, + child_counter = perf_counter_alloc(&parent_counter->attr, parent_counter->cpu, child_ctx, group_leader, GFP_KERNEL); if (IS_ERR(child_counter)) @@ -3565,7 +3565,7 @@ inherit_counter(struct perf_counter *parent_counter, /* * Make the child state follow the state of the parent counter, - * not its hw_event.disabled bit. We hold the parent's mutex, + * not its attr.disabled bit. We hold the parent's mutex, * so we won't race with perf_counter_{en, dis}able_family. */ if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE) @@ -3582,7 +3582,7 @@ inherit_counter(struct perf_counter *parent_counter, /* * inherit into child's child as well: */ - child_counter->hw_event.inherit = 1; + child_counter->attr.inherit = 1; /* * Get a reference to the parent filp - we will fput it @@ -3838,7 +3838,7 @@ int perf_counter_init_task(struct task_struct *child) if (counter != counter->group_leader) continue; - if (!counter->hw_event.inherit) { + if (!counter->attr.inherit) { inherited_all = 0; continue; } -- cgit v1.2.3-70-g09d2 From a05c0205ba031c01bba33a21bf0a35920eb64833 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 3 Jun 2009 09:33:18 +0200 Subject: block: Fix bounce limit setting in DM blk_queue_bounce_limit() is more than a wrapper about the request queue limits.bounce_pfn variable. Introduce blk_queue_bounce_pfn() which can be called by stacking drivers that wish to set the bounce limit explicitly. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/blk-settings.c | 17 +++++++++++++++++ drivers/md/dm-table.c | 2 +- include/linux/blkdev.h | 1 + 3 files changed, 19 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/block/blk-settings.c b/block/blk-settings.c index 8d339349289..9acd0b7e802 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -193,6 +193,23 @@ void blk_queue_bounce_limit(struct request_queue *q, u64 dma_mask) } EXPORT_SYMBOL(blk_queue_bounce_limit); +/** + * blk_queue_bounce_pfn - set the bounce buffer limit for queue + * @q: the request queue for the device + * @pfn: max address + * + * Description: + * This function is similar to blk_queue_bounce_limit except it + * neither changes allocation flags, nor does it set up the ISA DMA + * pool. This function should only be used by stacking drivers. + * Hardware drivers should use blk_queue_bounce_limit instead. + */ +void blk_queue_bounce_pfn(struct request_queue *q, u64 pfn) +{ + q->limits.bounce_pfn = pfn; +} +EXPORT_SYMBOL(blk_queue_bounce_pfn); + /** * blk_queue_max_sectors - set max sectors for a request for this queue * @q: the request queue for the device diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index e9a73bb242b..3ca1604ddd5 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -920,7 +920,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q) blk_queue_max_segment_size(q, t->limits.max_segment_size); blk_queue_max_hw_sectors(q, t->limits.max_hw_sectors); blk_queue_segment_boundary(q, t->limits.seg_boundary_mask); - blk_queue_bounce_limit(q, t->limits.bounce_pfn); + blk_queue_bounce_pfn(q, t->limits.bounce_pfn); if (t->limits.no_cluster) queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5e740a135e7..989aa1790f4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -910,6 +910,7 @@ extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *); extern void blk_cleanup_queue(struct request_queue *); extern void blk_queue_make_request(struct request_queue *, make_request_fn *); extern void blk_queue_bounce_limit(struct request_queue *, u64); +extern void blk_queue_bounce_pfn(struct request_queue *, u64); extern void blk_queue_max_sectors(struct request_queue *, unsigned int); extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short); -- cgit v1.2.3-70-g09d2 From e34d5c1a4f9919a81b4ea4591d7383245f35cb8e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 3 Jun 2009 10:32:06 +0200 Subject: netfilter: conntrack: replace notify chain by function pointer This patch removes the notify chain infrastructure and replace it by a simple function pointer. This issue has been mentioned in the mailing list several times: the use of the notify chain adds too much overhead for something that is only used by ctnetlink. This patch also changes nfnetlink_send(). It seems that gfp_any() returns GFP_KERNEL for user-context request, like those via ctnetlink, inside the RCU read-side section which is not valid. Using GFP_KERNEL is also evil since netlink may schedule(), this leads to "scheduling while atomic" bug reports. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink.h | 2 +- include/net/netfilter/nf_conntrack_ecache.h | 68 ++++++++++++++++------- net/netfilter/nf_conntrack_ecache.c | 83 +++++++++++++++++++++++------ net/netfilter/nf_conntrack_netlink.c | 42 +++++++-------- net/netfilter/nfnetlink.c | 5 +- 5 files changed, 139 insertions(+), 61 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index c600083cbdf..2214e516146 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -75,7 +75,7 @@ extern int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n); extern int nfnetlink_has_listeners(unsigned int group); extern int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, - int echo); + int echo, gfp_t flags); extern void nfnetlink_set_err(u32 pid, u32 group, int error); extern int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags); diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 2e17a2d0eb3..1afb907e015 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -6,7 +6,6 @@ #define _NF_CONNTRACK_ECACHE_H #include -#include #include #include #include @@ -69,9 +68,13 @@ struct nf_ct_event { int report; }; -extern struct atomic_notifier_head nf_conntrack_chain; -extern int nf_conntrack_register_notifier(struct notifier_block *nb); -extern int nf_conntrack_unregister_notifier(struct notifier_block *nb); +struct nf_ct_event_notifier { + int (*fcn)(unsigned int events, struct nf_ct_event *item); +}; + +extern struct nf_ct_event_notifier *nf_conntrack_event_cb; +extern int nf_conntrack_register_notifier(struct nf_ct_event_notifier *nb); +extern void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *nb); extern void nf_ct_deliver_cached_events(const struct nf_conn *ct); extern void __nf_ct_event_cache_init(struct nf_conn *ct); @@ -97,13 +100,23 @@ nf_conntrack_event_report(enum ip_conntrack_events event, u32 pid, int report) { - struct nf_ct_event item = { - .ct = ct, - .pid = pid, - .report = report - }; - if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) - atomic_notifier_call_chain(&nf_conntrack_chain, event, &item); + struct nf_ct_event_notifier *notify; + + rcu_read_lock(); + notify = rcu_dereference(nf_conntrack_event_cb); + if (notify == NULL) + goto out_unlock; + + if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) { + struct nf_ct_event item = { + .ct = ct, + .pid = pid, + .report = report + }; + notify->fcn(event, &item); + } +out_unlock: + rcu_read_unlock(); } static inline void @@ -118,9 +131,13 @@ struct nf_exp_event { int report; }; -extern struct atomic_notifier_head nf_ct_expect_chain; -extern int nf_ct_expect_register_notifier(struct notifier_block *nb); -extern int nf_ct_expect_unregister_notifier(struct notifier_block *nb); +struct nf_exp_event_notifier { + int (*fcn)(unsigned int events, struct nf_exp_event *item); +}; + +extern struct nf_exp_event_notifier *nf_expect_event_cb; +extern int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *nb); +extern void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *nb); static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, @@ -128,12 +145,23 @@ nf_ct_expect_event_report(enum ip_conntrack_expect_events event, u32 pid, int report) { - struct nf_exp_event item = { - .exp = exp, - .pid = pid, - .report = report - }; - atomic_notifier_call_chain(&nf_ct_expect_chain, event, &item); + struct nf_exp_event_notifier *notify; + + rcu_read_lock(); + notify = rcu_dereference(nf_expect_event_cb); + if (notify == NULL) + goto out_unlock; + + { + struct nf_exp_event item = { + .exp = exp, + .pid = pid, + .report = report + }; + notify->fcn(event, &item); + } +out_unlock: + rcu_read_unlock(); } static inline void diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index dee4190209c..5516b3e64b4 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -16,24 +16,32 @@ #include #include #include -#include #include #include #include #include -ATOMIC_NOTIFIER_HEAD(nf_conntrack_chain); -EXPORT_SYMBOL_GPL(nf_conntrack_chain); +static DEFINE_MUTEX(nf_ct_ecache_mutex); -ATOMIC_NOTIFIER_HEAD(nf_ct_expect_chain); -EXPORT_SYMBOL_GPL(nf_ct_expect_chain); +struct nf_ct_event_notifier *nf_conntrack_event_cb __read_mostly; +EXPORT_SYMBOL_GPL(nf_conntrack_event_cb); + +struct nf_exp_event_notifier *nf_expect_event_cb __read_mostly; +EXPORT_SYMBOL_GPL(nf_expect_event_cb); /* deliver cached events and clear cache entry - must be called with locally * disabled softirqs */ static inline void __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache) { + struct nf_ct_event_notifier *notify; + + rcu_read_lock(); + notify = rcu_dereference(nf_conntrack_event_cb); + if (notify == NULL) + goto out_unlock; + if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct) && ecache->events) { struct nf_ct_event item = { @@ -42,14 +50,15 @@ __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache) .report = 0 }; - atomic_notifier_call_chain(&nf_conntrack_chain, - ecache->events, - &item); + notify->fcn(ecache->events, &item); } ecache->events = 0; nf_ct_put(ecache->ct); ecache->ct = NULL; + +out_unlock: + rcu_read_unlock(); } /* Deliver all cached events for a particular conntrack. This is called @@ -111,26 +120,68 @@ void nf_conntrack_ecache_fini(struct net *net) free_percpu(net->ct.ecache); } -int nf_conntrack_register_notifier(struct notifier_block *nb) +int nf_conntrack_register_notifier(struct nf_ct_event_notifier *new) { - return atomic_notifier_chain_register(&nf_conntrack_chain, nb); + int ret = 0; + struct nf_ct_event_notifier *notify; + + mutex_lock(&nf_ct_ecache_mutex); + notify = rcu_dereference(nf_conntrack_event_cb); + if (notify != NULL) { + ret = -EBUSY; + goto out_unlock; + } + rcu_assign_pointer(nf_conntrack_event_cb, new); + mutex_unlock(&nf_ct_ecache_mutex); + return ret; + +out_unlock: + mutex_unlock(&nf_ct_ecache_mutex); + return ret; } EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier); -int nf_conntrack_unregister_notifier(struct notifier_block *nb) +void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *new) { - return atomic_notifier_chain_unregister(&nf_conntrack_chain, nb); + struct nf_ct_event_notifier *notify; + + mutex_lock(&nf_ct_ecache_mutex); + notify = rcu_dereference(nf_conntrack_event_cb); + BUG_ON(notify != new); + rcu_assign_pointer(nf_conntrack_event_cb, NULL); + mutex_unlock(&nf_ct_ecache_mutex); } EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); -int nf_ct_expect_register_notifier(struct notifier_block *nb) +int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *new) { - return atomic_notifier_chain_register(&nf_ct_expect_chain, nb); + int ret = 0; + struct nf_exp_event_notifier *notify; + + mutex_lock(&nf_ct_ecache_mutex); + notify = rcu_dereference(nf_expect_event_cb); + if (notify != NULL) { + ret = -EBUSY; + goto out_unlock; + } + rcu_assign_pointer(nf_expect_event_cb, new); + mutex_unlock(&nf_ct_ecache_mutex); + return ret; + +out_unlock: + mutex_unlock(&nf_ct_ecache_mutex); + return ret; } EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier); -int nf_ct_expect_unregister_notifier(struct notifier_block *nb) +void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *new) { - return atomic_notifier_chain_unregister(&nf_ct_expect_chain, nb); + struct nf_exp_event_notifier *notify; + + mutex_lock(&nf_ct_ecache_mutex); + notify = rcu_dereference(nf_expect_event_cb); + BUG_ON(notify != new); + rcu_assign_pointer(nf_expect_event_cb, NULL); + mutex_unlock(&nf_ct_ecache_mutex); } EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index b1b9e4fb7de..4448b062de0 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include @@ -454,13 +453,12 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct) ; } -static int ctnetlink_conntrack_event(struct notifier_block *this, - unsigned long events, void *ptr) +static int +ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; struct nlattr *nest_parms; - struct nf_ct_event *item = (struct nf_ct_event *)ptr; struct nf_conn *ct = item->ct; struct sk_buff *skb; unsigned int type; @@ -468,7 +466,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, /* ignore our fake conntrack entry */ if (ct == &nf_conntrack_untracked) - return NOTIFY_DONE; + return 0; if (events & IPCT_DESTROY) { type = IPCTNL_MSG_CT_DELETE; @@ -481,10 +479,10 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, type = IPCTNL_MSG_CT_NEW; group = NFNLGRP_CONNTRACK_UPDATE; } else - return NOTIFY_DONE; + return 0; if (!item->report && !nfnetlink_has_listeners(group)) - return NOTIFY_DONE; + return 0; skb = nlmsg_new(ctnetlink_nlmsg_size(ct), GFP_ATOMIC); if (skb == NULL) @@ -560,8 +558,8 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, rcu_read_unlock(); nlmsg_end(skb, nlh); - nfnetlink_send(skb, item->pid, group, item->report); - return NOTIFY_DONE; + nfnetlink_send(skb, item->pid, group, item->report, GFP_ATOMIC); + return 0; nla_put_failure: rcu_read_unlock(); @@ -570,7 +568,7 @@ nlmsg_failure: kfree_skb(skb); errout: nfnetlink_set_err(0, group, -ENOBUFS); - return NOTIFY_DONE; + return 0; } #endif /* CONFIG_NF_CONNTRACK_EVENTS */ @@ -1507,12 +1505,11 @@ nla_put_failure: } #ifdef CONFIG_NF_CONNTRACK_EVENTS -static int ctnetlink_expect_event(struct notifier_block *this, - unsigned long events, void *ptr) +static int +ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - struct nf_exp_event *item = (struct nf_exp_event *)ptr; struct nf_conntrack_expect *exp = item->exp; struct sk_buff *skb; unsigned int type; @@ -1522,11 +1519,11 @@ static int ctnetlink_expect_event(struct notifier_block *this, type = IPCTNL_MSG_EXP_NEW; flags = NLM_F_CREATE|NLM_F_EXCL; } else - return NOTIFY_DONE; + return 0; if (!item->report && !nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW)) - return NOTIFY_DONE; + return 0; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (skb == NULL) @@ -1548,8 +1545,9 @@ static int ctnetlink_expect_event(struct notifier_block *this, rcu_read_unlock(); nlmsg_end(skb, nlh); - nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW, item->report); - return NOTIFY_DONE; + nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW, + item->report, GFP_ATOMIC); + return 0; nla_put_failure: rcu_read_unlock(); @@ -1558,7 +1556,7 @@ nlmsg_failure: kfree_skb(skb); errout: nfnetlink_set_err(0, 0, -ENOBUFS); - return NOTIFY_DONE; + return 0; } #endif static int ctnetlink_exp_done(struct netlink_callback *cb) @@ -1864,12 +1862,12 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, } #ifdef CONFIG_NF_CONNTRACK_EVENTS -static struct notifier_block ctnl_notifier = { - .notifier_call = ctnetlink_conntrack_event, +static struct nf_ct_event_notifier ctnl_notifier = { + .fcn = ctnetlink_conntrack_event, }; -static struct notifier_block ctnl_notifier_exp = { - .notifier_call = ctnetlink_expect_event, +static struct nf_exp_event_notifier ctnl_notifier_exp = { + .fcn = ctnetlink_expect_event, }; #endif diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 9dbd5709aad..92761a98837 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -107,9 +107,10 @@ int nfnetlink_has_listeners(unsigned int group) } EXPORT_SYMBOL_GPL(nfnetlink_has_listeners); -int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) +int nfnetlink_send(struct sk_buff *skb, u32 pid, + unsigned group, int echo, gfp_t flags) { - return nlmsg_notify(nfnl, skb, pid, group, echo, gfp_any()); + return nlmsg_notify(nfnl, skb, pid, group, echo, flags); } EXPORT_SYMBOL_GPL(nfnetlink_send); -- cgit v1.2.3-70-g09d2 From dfbf97f3ac980b69dfbc41c83a208211a38443e8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 2 Jun 2009 05:13:45 +0000 Subject: net: add _skb_dst opaque field struct sk_buff uses one union to define dst and rtable fields. We want to replace direct access to these pointers by accessors. First patch adds a new "unsigned long _skb_dst;" opaque field in this union. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index aff494ba6a3..d4d7c666ca6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -325,6 +325,7 @@ struct sk_buff { union { struct dst_entry *dst; struct rtable *rtable; + unsigned long _skb_dst; }; #ifdef CONFIG_XFRM struct sec_path *sp; -- cgit v1.2.3-70-g09d2 From 511c3f92ad5b6d9f8f6464be1b4f85f0422be91a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 2 Jun 2009 05:14:27 +0000 Subject: net: skb->rtable accessor Define skb_rtable(const struct sk_buff *skb) accessor to get rtable from skb Delete skb->rtable field Setting rtable is not allowed, just set dst instead as rtable is an alias. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/appletalk/ipddp.c | 2 +- include/linux/skbuff.h | 6 +++++- include/net/route.h | 2 +- net/bridge/br_netfilter.c | 25 +++++++++++++--------- net/dccp/ipv4.c | 6 +++--- net/ipv4/arp.c | 4 ++-- net/ipv4/icmp.c | 10 ++++----- net/ipv4/igmp.c | 2 +- net/ipv4/ip_forward.c | 2 +- net/ipv4/ip_gre.c | 4 ++-- net/ipv4/ip_input.c | 2 +- net/ipv4/ip_options.c | 16 +++++++------- net/ipv4/ip_output.c | 10 ++++----- net/ipv4/ip_sockglue.c | 2 +- net/ipv4/ipip.c | 2 +- net/ipv4/ipmr.c | 6 +++--- net/ipv4/netfilter/ipt_MASQUERADE.c | 2 +- net/ipv4/netfilter/nf_nat_helper.c | 4 ++-- net/ipv4/route.c | 37 ++++++++++++++++++++------------- net/ipv4/tcp_ipv4.c | 4 ++-- net/netfilter/nf_conntrack_netbios_ns.c | 2 +- net/sched/em_meta.c | 4 ++-- net/sctp/protocol.c | 8 +++---- 23 files changed, 89 insertions(+), 73 deletions(-) (limited to 'include') diff --git a/drivers/net/appletalk/ipddp.c b/drivers/net/appletalk/ipddp.c index 9832b757f10..78cea5e80b1 100644 --- a/drivers/net/appletalk/ipddp.c +++ b/drivers/net/appletalk/ipddp.c @@ -115,7 +115,7 @@ static struct net_device * __init ipddp_init(void) */ static int ipddp_xmit(struct sk_buff *skb, struct net_device *dev) { - __be32 paddr = ((struct rtable*)skb->dst)->rt_gateway; + __be32 paddr = skb_rtable(skb)->rt_gateway; struct ddpehdr *ddp; struct ipddp_route *rt; struct atalk_addr *our_addr; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d4d7c666ca6..a3ae3c52583 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -324,7 +324,6 @@ struct sk_buff { union { struct dst_entry *dst; - struct rtable *rtable; unsigned long _skb_dst; }; #ifdef CONFIG_XFRM @@ -427,6 +426,11 @@ extern void skb_dma_unmap(struct device *dev, struct sk_buff *skb, enum dma_data_direction dir); #endif +static inline struct rtable *skb_rtable(const struct sk_buff *skb) +{ + return (struct rtable *)skb->_skb_dst; +} + extern void kfree_skb(struct sk_buff *skb); extern void consume_skb(struct sk_buff *skb); extern void __kfree_skb(struct sk_buff *skb); diff --git a/include/net/route.h b/include/net/route.h index 4e8cae0e584..40f6346ef49 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -210,7 +210,7 @@ static inline struct inet_peer *rt_get_peer(struct rtable *rt) static inline int inet_iif(const struct sk_buff *skb) { - return skb->rtable->rt_iif; + return skb_rtable(skb)->rt_iif; } #endif /* _ROUTE_H */ diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index e4a418fcb35..e0ceb66a9ec 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -228,6 +228,7 @@ int nf_bridge_copy_header(struct sk_buff *skb) static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = skb->nf_bridge; + struct rtable *rt; if (nf_bridge->mask & BRNF_PKT_TYPE) { skb->pkt_type = PACKET_OTHERHOST; @@ -235,12 +236,13 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb) } nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; - skb->rtable = bridge_parent_rtable(nf_bridge->physindev); - if (!skb->rtable) { + rt = bridge_parent_rtable(nf_bridge->physindev); + if (!rt) { kfree_skb(skb); return 0; } - dst_hold(&skb->rtable->u.dst); + dst_hold(&rt->u.dst); + skb->dst = &rt->u.dst; skb->dev = nf_bridge->physindev; nf_bridge_push_encap_header(skb); @@ -338,6 +340,7 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb) struct net_device *dev = skb->dev; struct iphdr *iph = ip_hdr(skb); struct nf_bridge_info *nf_bridge = skb->nf_bridge; + struct rtable *rt; int err; if (nf_bridge->mask & BRNF_PKT_TYPE) { @@ -347,7 +350,6 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb) nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; if (dnat_took_place(skb)) { if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { - struct rtable *rt; struct flowi fl = { .nl_u = { .ip4_u = { @@ -404,12 +406,13 @@ bridged_dnat: skb->pkt_type = PACKET_HOST; } } else { - skb->rtable = bridge_parent_rtable(nf_bridge->physindev); - if (!skb->rtable) { + rt = bridge_parent_rtable(nf_bridge->physindev); + if (!rt) { kfree_skb(skb); return 0; } - dst_hold(&skb->rtable->u.dst); + dst_hold(&rt->u.dst); + skb->dst = &rt->u.dst; } skb->dev = nf_bridge->physindev; @@ -628,9 +631,11 @@ static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff *skb, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - if (skb->rtable && skb->rtable == bridge_parent_rtable(in)) { - dst_release(&skb->rtable->u.dst); - skb->rtable = NULL; + struct rtable *rt = skb_rtable(skb); + + if (rt && rt == bridge_parent_rtable(in)) { + dst_release(&rt->u.dst); + skb->dst = NULL; } return NF_ACCEPT; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index d1dd95289b8..2cf48ba0dbb 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -452,7 +452,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, struct sk_buff *skb) { struct rtable *rt; - struct flowi fl = { .oif = skb->rtable->rt_iif, + struct flowi fl = { .oif = skb_rtable(skb)->rt_iif, .nl_u = { .ip4_u = { .daddr = ip_hdr(skb)->saddr, .saddr = ip_hdr(skb)->daddr, @@ -514,7 +514,7 @@ static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) if (dccp_hdr(rxskb)->dccph_type == DCCP_PKT_RESET) return; - if (rxskb->rtable->rt_type != RTN_LOCAL) + if (skb_rtable(rxskb)->rt_type != RTN_LOCAL) return; dst = dccp_v4_route_skb(net, ctl_sk, rxskb); @@ -567,7 +567,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ - if (skb->rtable->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) + if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) return 0; /* discard, don't send a reset here */ if (dccp_bad_service_code(sk, service)) { diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index f11931c1838..816494f271a 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -474,7 +474,7 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb) return 1; } - paddr = skb->rtable->rt_gateway; + paddr = skb_rtable(skb)->rt_gateway; if (arp_set_predefined(inet_addr_type(dev_net(dev), paddr), haddr, paddr, dev)) return 0; @@ -817,7 +817,7 @@ static int arp_process(struct sk_buff *skb) if (arp->ar_op == htons(ARPOP_REQUEST) && ip_route_input(skb, tip, sip, 0, dev) == 0) { - rt = skb->rtable; + rt = skb_rtable(skb); addr_type = rt->rt_type; if (addr_type == RTN_LOCAL) { diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 3f50807237e..94f75efae93 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -356,7 +356,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) { struct ipcm_cookie ipc; - struct rtable *rt = skb->rtable; + struct rtable *rt = skb_rtable(skb); struct net *net = dev_net(rt->u.dst.dev); struct sock *sk; struct inet_sock *inet; @@ -416,7 +416,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) struct iphdr *iph; int room; struct icmp_bxm icmp_param; - struct rtable *rt = skb_in->rtable; + struct rtable *rt = skb_rtable(skb_in); struct ipcm_cookie ipc; __be32 saddr; u8 tos; @@ -596,7 +596,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) RT_TOS(tos), rt2->u.dst.dev); dst_release(&rt2->u.dst); - rt2 = skb_in->rtable; + rt2 = skb_rtable(skb_in); skb_in->dst = odst; } @@ -926,7 +926,7 @@ static void icmp_address(struct sk_buff *skb) static void icmp_address_reply(struct sk_buff *skb) { - struct rtable *rt = skb->rtable; + struct rtable *rt = skb_rtable(skb); struct net_device *dev = skb->dev; struct in_device *in_dev; struct in_ifaddr *ifa; @@ -970,7 +970,7 @@ static void icmp_discard(struct sk_buff *skb) int icmp_rcv(struct sk_buff *skb) { struct icmphdr *icmph; - struct rtable *rt = skb->rtable; + struct rtable *rt = skb_rtable(skb); struct net *net = dev_net(rt->u.dst.dev); if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index e6058a50379..afabd2758b6 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -948,7 +948,7 @@ int igmp_rcv(struct sk_buff *skb) case IGMPV2_HOST_MEMBERSHIP_REPORT: case IGMPV3_HOST_MEMBERSHIP_REPORT: /* Is it our report looped back? */ - if (skb->rtable->fl.iif == 0) + if (skb_rtable(skb)->fl.iif == 0) break; /* don't rely on MC router hearing unicast reports */ if (skb->pkt_type == PACKET_MULTICAST || diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index df3fe50bbf0..0761cd9bbd1 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -81,7 +81,7 @@ int ip_forward(struct sk_buff *skb) if (!xfrm4_route_forward(skb)) goto drop; - rt = skb->rtable; + rt = skb_rtable(skb); if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway) goto sr_failed; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 77436e2732e..85ddad45a91 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -602,7 +602,7 @@ static int ipgre_rcv(struct sk_buff *skb) #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(iph->daddr)) { /* Looped back packet, drop it! */ - if (skb->rtable->fl.iif == 0) + if (skb_rtable(skb)->fl.iif == 0) goto drop; stats->multicast++; skb->pkt_type = PACKET_BROADCAST; @@ -704,7 +704,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) } if (skb->protocol == htons(ETH_P_IP)) { - rt = skb->rtable; + rt = skb_rtable(skb); if ((dst = rt->rt_gateway) == 0) goto tx_error_icmp; } diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 40f6206b2aa..cea784b0aa4 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -357,7 +357,7 @@ static int ip_rcv_finish(struct sk_buff *skb) if (iph->ihl > 5 && ip_rcv_options(skb)) goto drop; - rt = skb->rtable; + rt = skb_rtable(skb); if (rt->rt_type == RTN_MULTICAST) { IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INMCAST, skb->len); diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 2c88da6e786..7e1074ffdbd 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -102,7 +102,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) sptr = skb_network_header(skb); dptr = dopt->__data; - daddr = skb->rtable->rt_spec_dst; + daddr = skb_rtable(skb)->rt_spec_dst; if (sopt->rr) { optlen = sptr[sopt->rr+1]; @@ -257,7 +257,7 @@ int ip_options_compile(struct net *net, struct rtable *rt = NULL; if (skb != NULL) { - rt = skb->rtable; + rt = skb_rtable(skb); optptr = (unsigned char *)&(ip_hdr(skb)[1]); } else optptr = opt->__data; @@ -550,7 +550,7 @@ void ip_forward_options(struct sk_buff *skb) { struct ip_options * opt = &(IPCB(skb)->opt); unsigned char * optptr; - struct rtable *rt = skb->rtable; + struct rtable *rt = skb_rtable(skb); unsigned char *raw = skb_network_header(skb); if (opt->rr_needaddr) { @@ -598,7 +598,7 @@ int ip_options_rcv_srr(struct sk_buff *skb) __be32 nexthop; struct iphdr *iph = ip_hdr(skb); unsigned char *optptr = skb_network_header(skb) + opt->srr; - struct rtable *rt = skb->rtable; + struct rtable *rt = skb_rtable(skb); struct rtable *rt2; int err; @@ -623,13 +623,13 @@ int ip_options_rcv_srr(struct sk_buff *skb) } memcpy(&nexthop, &optptr[srrptr-1], 4); - rt = skb->rtable; - skb->rtable = NULL; + rt = skb_rtable(skb); + skb->dst = NULL; err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev); - rt2 = skb->rtable; + rt2 = skb_rtable(skb); if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) { ip_rt_put(rt2); - skb->rtable = rt; + skb->dst = &rt->u.dst; return -EINVAL; } ip_rt_put(rt); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index ea19c37ccc0..8d845ebfcca 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -140,7 +140,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, __be32 saddr, __be32 daddr, struct ip_options *opt) { struct inet_sock *inet = inet_sk(sk); - struct rtable *rt = skb->rtable; + struct rtable *rt = skb_rtable(skb); struct iphdr *iph; /* Build the IP header. */ @@ -238,7 +238,7 @@ static int ip_finish_output(struct sk_buff *skb) int ip_mc_output(struct sk_buff *skb) { struct sock *sk = skb->sk; - struct rtable *rt = skb->rtable; + struct rtable *rt = skb_rtable(skb); struct net_device *dev = rt->u.dst.dev; /* @@ -319,7 +319,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) /* Skip all of this if the packet is already routed, * f.e. by something like SCTP. */ - rt = skb->rtable; + rt = skb_rtable(skb); if (rt != NULL) goto packet_routed; @@ -440,7 +440,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) unsigned int mtu, hlen, left, len, ll_rs, pad; int offset; __be16 not_last_frag; - struct rtable *rt = skb->rtable; + struct rtable *rt = skb_rtable(skb); int err = 0; dev = rt->u.dst.dev; @@ -1362,7 +1362,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar } replyopts; struct ipcm_cookie ipc; __be32 daddr; - struct rtable *rt = skb->rtable; + struct rtable *rt = skb_rtable(skb); if (ip_options_echo(&replyopts.opt, skb)) return; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index cb49936856e..fc7993e9061 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -57,7 +57,7 @@ static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) { struct in_pktinfo info; - struct rtable *rt = skb->rtable; + struct rtable *rt = skb_rtable(skb); info.ipi_addr.s_addr = ip_hdr(skb)->daddr; if (rt) { diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index bb2f1b17fbf..0c6e7bf18a4 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -416,7 +416,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (!dst) { /* NBMA tunnel */ - if ((rt = skb->rtable) == NULL) { + if ((rt = skb_rtable(skb)) == NULL) { stats->tx_fifo_errors++; goto tx_error; } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 13e9dd3012b..69dd058283e 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1354,7 +1354,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local if (net->ipv4.vif_table[vif].dev != skb->dev) { int true_vifi; - if (skb->rtable->fl.iif == 0) { + if (skb_rtable(skb)->fl.iif == 0) { /* It is our own packet, looped back. Very complicated situation... @@ -1430,7 +1430,7 @@ int ip_mr_input(struct sk_buff *skb) { struct mfc_cache *cache; struct net *net = dev_net(skb->dev); - int local = skb->rtable->rt_flags&RTCF_LOCAL; + int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; /* Packet is looped back after forward, it should not be forwarded second time, but still can be delivered locally. @@ -1646,7 +1646,7 @@ int ipmr_get_route(struct net *net, { int err; struct mfc_cache *cache; - struct rtable *rt = skb->rtable; + struct rtable *rt = skb_rtable(skb); read_lock(&mrt_lock); cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst); diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index f389f60cb10..c0992c75bda 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -72,7 +72,7 @@ masquerade_tg(struct sk_buff *skb, const struct xt_target_param *par) return NF_ACCEPT; mr = par->targinfo; - rt = skb->rtable; + rt = skb_rtable(skb); newsrc = inet_select_addr(par->out, rt->rt_gateway, RT_SCOPE_UNIVERSE); if (!newsrc) { printk("MASQUERADE: %s ate my IP address\n", par->out->name); diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index cf7a42bf982..155c008626c 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -140,7 +140,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb, const char *rep_buffer, unsigned int rep_len) { - struct rtable *rt = skb->rtable; + struct rtable *rt = skb_rtable(skb); struct iphdr *iph; struct tcphdr *tcph; int oldlen, datalen; @@ -218,7 +218,7 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb, const char *rep_buffer, unsigned int rep_len) { - struct rtable *rt = skb->rtable; + struct rtable *rt = skb_rtable(skb); struct iphdr *iph; struct udphdr *udph; int datalen, oldlen; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 28205e5bfa9..f20060ac2f0 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1064,7 +1064,8 @@ work_done: out: return 0; } -static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp) +static int rt_intern_hash(unsigned hash, struct rtable *rt, + struct rtable **rp, struct sk_buff *skb) { struct rtable *rth, **rthp; unsigned long now; @@ -1114,7 +1115,10 @@ restart: spin_unlock_bh(rt_hash_lock_addr(hash)); rt_drop(rt); - *rp = rth; + if (rp) + *rp = rth; + else + skb->dst = &rth->u.dst; return 0; } @@ -1210,7 +1214,10 @@ restart: rcu_assign_pointer(rt_hash_table[hash].chain, rt); spin_unlock_bh(rt_hash_lock_addr(hash)); - *rp = rt; + if (rp) + *rp = rt; + else + skb->dst = &rt->u.dst; return 0; } @@ -1407,7 +1414,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, &netevent); rt_del(hash, rth); - if (!rt_intern_hash(hash, rt, &rt)) + if (!rt_intern_hash(hash, rt, &rt, NULL)) ip_rt_put(rt); goto do_next; } @@ -1473,7 +1480,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) void ip_rt_send_redirect(struct sk_buff *skb) { - struct rtable *rt = skb->rtable; + struct rtable *rt = skb_rtable(skb); struct in_device *in_dev = in_dev_get(rt->u.dst.dev); if (!in_dev) @@ -1521,7 +1528,7 @@ out: static int ip_error(struct sk_buff *skb) { - struct rtable *rt = skb->rtable; + struct rtable *rt = skb_rtable(skb); unsigned long now; int code; @@ -1698,7 +1705,7 @@ static void ipv4_link_failure(struct sk_buff *skb) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); - rt = skb->rtable; + rt = skb_rtable(skb); if (rt) dst_set_expires(&rt->u.dst, 0); } @@ -1858,7 +1865,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, in_dev_put(in_dev); hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); - return rt_intern_hash(hash, rth, &skb->rtable); + return rt_intern_hash(hash, rth, NULL, skb); e_nobufs: in_dev_put(in_dev); @@ -2019,7 +2026,7 @@ static int ip_mkroute_input(struct sk_buff *skb, /* put it into the cache */ hash = rt_hash(daddr, saddr, fl->iif, rt_genid(dev_net(rth->u.dst.dev))); - return rt_intern_hash(hash, rth, &skb->rtable); + return rt_intern_hash(hash, rth, NULL, skb); } /* @@ -2175,7 +2182,7 @@ local_input: } rth->rt_type = res.type; hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); - err = rt_intern_hash(hash, rth, &skb->rtable); + err = rt_intern_hash(hash, rth, NULL, skb); goto done; no_route: @@ -2244,7 +2251,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, dst_use(&rth->u.dst, jiffies); RT_CACHE_STAT_INC(in_hit); rcu_read_unlock(); - skb->rtable = rth; + skb->dst = &rth->u.dst; return 0; } RT_CACHE_STAT_INC(in_hlist_search); @@ -2420,7 +2427,7 @@ static int ip_mkroute_output(struct rtable **rp, if (err == 0) { hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif, rt_genid(dev_net(dev_out))); - err = rt_intern_hash(hash, rth, rp); + err = rt_intern_hash(hash, rth, rp, NULL); } return err; @@ -2763,7 +2770,7 @@ static int rt_fill_info(struct net *net, struct sk_buff *skb, u32 pid, u32 seq, int event, int nowait, unsigned int flags) { - struct rtable *rt = skb->rtable; + struct rtable *rt = skb_rtable(skb); struct rtmsg *r; struct nlmsghdr *nlh; long expires; @@ -2907,7 +2914,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); local_bh_enable(); - rt = skb->rtable; + rt = skb_rtable(skb); if (err == 0 && rt->u.dst.error) err = -rt->u.dst.error; } else { @@ -2927,7 +2934,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void if (err) goto errout_free; - skb->rtable = rt; + skb->dst = &rt->u.dst; if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index fc79e341628..319c8852644 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -546,7 +546,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) if (th->rst) return; - if (skb->rtable->rt_type != RTN_LOCAL) + if (skb_rtable(skb)->rt_type != RTN_LOCAL) return; /* Swap the send and the receive. */ @@ -1185,7 +1185,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) #endif /* Never answer to SYNs send to broadcast or multicast */ - if (skb->rtable->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) + if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) goto drop; /* TW buckets are converted to open requests without diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c index 8a3875e36ec..497b2224536 100644 --- a/net/netfilter/nf_conntrack_netbios_ns.c +++ b/net/netfilter/nf_conntrack_netbios_ns.c @@ -48,7 +48,7 @@ static int help(struct sk_buff *skb, unsigned int protoff, { struct nf_conntrack_expect *exp; struct iphdr *iph = ip_hdr(skb); - struct rtable *rt = skb->rtable; + struct rtable *rt = skb_rtable(skb); struct in_device *in_dev; __be32 mask = 0; diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index fad596bf32d..b6b588bed4e 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -258,10 +258,10 @@ META_COLLECTOR(int_rtclassid) META_COLLECTOR(int_rtiif) { - if (unlikely(skb->rtable == NULL)) + if (unlikely(skb_rtable(skb) == NULL)) *err = -1; else - dst->value = skb->rtable->fl.iif; + dst->value = skb_rtable(skb)->fl.iif; } /************************************************************************** diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 8eb3e61cb70..cb2c50dbd42 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -393,7 +393,7 @@ static int sctp_v4_addr_valid(union sctp_addr *addr, return 0; /* Is this a broadcast address? */ - if (skb && skb->rtable->rt_flags & RTCF_BROADCAST) + if (skb && skb_rtable(skb)->rt_flags & RTCF_BROADCAST) return 0; return 1; @@ -572,7 +572,7 @@ static void sctp_v4_get_saddr(struct sctp_sock *sk, /* What interface did this skb arrive on? */ static int sctp_v4_skb_iif(const struct sk_buff *skb) { - return skb->rtable->rt_iif; + return skb_rtable(skb)->rt_iif; } /* Was this packet marked by Explicit Congestion Notification? */ @@ -848,8 +848,8 @@ static inline int sctp_v4_xmit(struct sk_buff *skb, SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, src:%pI4, dst:%pI4\n", __func__, skb, skb->len, - &skb->rtable->rt_src, - &skb->rtable->rt_dst); + &skb_rtable(skb)->rt_src, + &skb_rtable(skb)->rt_dst); inet->pmtudisc = transport->param_flags & SPP_PMTUD_ENABLE ? IP_PMTUDISC_DO : IP_PMTUDISC_DONT; -- cgit v1.2.3-70-g09d2 From adf30907d63893e4208dfe3f5c88ae12bc2f25d5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 2 Jun 2009 05:19:30 +0000 Subject: net: skb->dst accessors Define three accessors to get/set dst attached to a skb struct dst_entry *skb_dst(const struct sk_buff *skb) void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) void skb_dst_drop(struct sk_buff *skb) This one should replace occurrences of : dst_release(skb->dst) skb->dst = NULL; Delete skb->dst field Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 4 +- drivers/infiniband/ulp/ipoib/ipoib_main.c | 30 ++++++------- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 10 ++--- drivers/net/pppol2tp.c | 11 +++-- drivers/s390/net/qeth_core_main.c | 4 +- drivers/s390/net/qeth_l3_main.c | 8 ++-- include/linux/skbuff.h | 13 +++++- include/net/dst.h | 12 ++++-- include/net/inet6_hashtables.h | 2 +- include/net/inet_hashtables.h | 2 +- include/net/ip6_route.h | 2 +- include/net/xfrm.h | 4 +- net/atm/br2684.c | 2 +- net/atm/clip.c | 14 +++--- net/bridge/br_netfilter.c | 18 ++++---- net/core/dev.c | 7 ++- net/core/neighbour.c | 11 +++-- net/core/skbuff.c | 4 +- net/dccp/ipv4.c | 4 +- net/dccp/ipv6.c | 8 ++-- net/dccp/output.c | 2 +- net/decnet/af_decnet.c | 6 ++- net/decnet/dn_neigh.c | 8 ++-- net/decnet/dn_nsp_out.c | 6 +-- net/decnet/dn_route.c | 25 +++++------ net/ipv4/arp.c | 2 +- net/ipv4/icmp.c | 10 ++--- net/ipv4/igmp.c | 4 +- net/ipv4/ip_forward.c | 4 +- net/ipv4/ip_fragment.c | 2 +- net/ipv4/ip_gre.c | 23 +++++----- net/ipv4/ip_input.c | 6 +-- net/ipv4/ip_options.c | 6 +-- net/ipv4/ip_output.c | 20 ++++----- net/ipv4/ipip.c | 13 +++--- net/ipv4/ipmr.c | 13 +++--- net/ipv4/netfilter.c | 28 ++++++------ net/ipv4/netfilter/ipt_REJECT.c | 7 ++- net/ipv4/netfilter/nf_nat_standalone.c | 7 ++- net/ipv4/raw.c | 2 +- net/ipv4/route.c | 14 +++--- net/ipv4/tcp_ipv4.c | 4 +- net/ipv4/tcp_output.c | 2 +- net/ipv4/udp.c | 4 +- net/ipv4/xfrm4_input.c | 2 +- net/ipv4/xfrm4_mode_tunnel.c | 4 +- net/ipv4/xfrm4_output.c | 6 +-- net/ipv6/exthdrs.c | 40 ++++++++--------- net/ipv6/inet6_connection_sock.c | 2 +- net/ipv6/ip6_input.c | 12 +++--- net/ipv6/ip6_output.c | 60 +++++++++++++------------- net/ipv6/ip6_tunnel.c | 26 ++++++----- net/ipv6/ip6mr.c | 13 +++--- net/ipv6/mcast.c | 17 +++++--- net/ipv6/ndisc.c | 4 +- net/ipv6/netfilter.c | 16 ++++--- net/ipv6/netfilter/ip6t_REJECT.c | 2 +- net/ipv6/raw.c | 2 +- net/ipv6/reassembly.c | 22 +++++----- net/ipv6/route.c | 12 +++--- net/ipv6/sit.c | 21 +++++---- net/ipv6/tcp_ipv6.c | 8 ++-- net/ipv6/udp.c | 7 ++- net/ipv6/xfrm6_mode_tunnel.c | 4 +- net/ipv6/xfrm6_output.c | 4 +- net/netfilter/ipvs/ip_vs_xmit.c | 48 ++++++++++----------- net/netfilter/nf_conntrack_proto_gre.c | 2 +- net/netfilter/xt_TCPMSS.c | 6 +-- net/netfilter/xt_policy.c | 2 +- net/netfilter/xt_realm.c | 2 +- net/packet/af_packet.c | 6 +-- net/sched/cls_flow.c | 8 ++-- net/sched/cls_route.c | 2 +- net/sched/em_meta.c | 4 +- net/sched/sch_sfq.c | 2 +- net/sched/sch_teql.c | 6 +-- net/sctp/output.c | 6 +-- net/sunrpc/xprtsock.c | 2 +- net/xfrm/xfrm_input.c | 3 +- net/xfrm/xfrm_output.c | 21 ++++----- net/xfrm/xfrm_policy.c | 8 +++- security/selinux/hooks.c | 2 +- security/selinux/xfrm.c | 2 +- 83 files changed, 414 insertions(+), 390 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 4248c313936..181b1f32325 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1394,8 +1394,8 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb, struct ipoib_dev_priv *priv = netdev_priv(dev); int e = skb_queue_empty(&priv->cm.skb_queue); - if (skb->dst) - skb->dst->ops->update_pmtu(skb->dst, mtu); + if (skb_dst(skb)) + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); skb_queue_tail(&priv->cm.skb_queue, skb); if (e) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 69c6304cc94..e319d91f60a 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -561,7 +561,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) struct ipoib_neigh *neigh; unsigned long flags; - neigh = ipoib_neigh_alloc(skb->dst->neighbour, skb->dev); + neigh = ipoib_neigh_alloc(skb_dst(skb)->neighbour, skb->dev); if (!neigh) { ++dev->stats.tx_dropped; dev_kfree_skb_any(skb); @@ -570,9 +570,9 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) spin_lock_irqsave(&priv->lock, flags); - path = __path_find(dev, skb->dst->neighbour->ha + 4); + path = __path_find(dev, skb_dst(skb)->neighbour->ha + 4); if (!path) { - path = path_rec_create(dev, skb->dst->neighbour->ha + 4); + path = path_rec_create(dev, skb_dst(skb)->neighbour->ha + 4); if (!path) goto err_path; @@ -605,7 +605,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) goto err_drop; } } else - ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb->dst->neighbour->ha)); + ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb_dst(skb)->neighbour->ha)); } else { neigh->ah = NULL; @@ -635,15 +635,15 @@ static void ipoib_path_lookup(struct sk_buff *skb, struct net_device *dev) struct ipoib_dev_priv *priv = netdev_priv(skb->dev); /* Look up path record for unicasts */ - if (skb->dst->neighbour->ha[4] != 0xff) { + if (skb_dst(skb)->neighbour->ha[4] != 0xff) { neigh_add_path(skb, dev); return; } /* Add in the P_Key for multicasts */ - skb->dst->neighbour->ha[8] = (priv->pkey >> 8) & 0xff; - skb->dst->neighbour->ha[9] = priv->pkey & 0xff; - ipoib_mcast_send(dev, skb->dst->neighbour->ha + 4, skb); + skb_dst(skb)->neighbour->ha[8] = (priv->pkey >> 8) & 0xff; + skb_dst(skb)->neighbour->ha[9] = priv->pkey & 0xff; + ipoib_mcast_send(dev, skb_dst(skb)->neighbour->ha + 4, skb); } static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, @@ -708,16 +708,16 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) struct ipoib_neigh *neigh; unsigned long flags; - if (likely(skb->dst && skb->dst->neighbour)) { - if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) { + if (likely(skb_dst(skb) && skb_dst(skb)->neighbour)) { + if (unlikely(!*to_ipoib_neigh(skb_dst(skb)->neighbour))) { ipoib_path_lookup(skb, dev); return NETDEV_TX_OK; } - neigh = *to_ipoib_neigh(skb->dst->neighbour); + neigh = *to_ipoib_neigh(skb_dst(skb)->neighbour); if (unlikely((memcmp(&neigh->dgid.raw, - skb->dst->neighbour->ha + 4, + skb_dst(skb)->neighbour->ha + 4, sizeof(union ib_gid))) || (neigh->dev != dev))) { spin_lock_irqsave(&priv->lock, flags); @@ -743,7 +743,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } } else if (neigh->ah) { - ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(skb->dst->neighbour->ha)); + ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(skb_dst(skb)->neighbour->ha)); return NETDEV_TX_OK; } @@ -772,7 +772,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) if ((be16_to_cpup((__be16 *) skb->data) != ETH_P_ARP) && (be16_to_cpup((__be16 *) skb->data) != ETH_P_RARP)) { ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x %pI6\n", - skb->dst ? "neigh" : "dst", + skb_dst(skb) ? "neigh" : "dst", be16_to_cpup((__be16 *) skb->data), IPOIB_QPN(phdr->hwaddr), phdr->hwaddr + 4); @@ -817,7 +817,7 @@ static int ipoib_hard_header(struct sk_buff *skb, * destination address onto the front of the skb so we can * figure out where to send the packet later. */ - if ((!skb->dst || !skb->dst->neighbour) && daddr) { + if ((!skb_dst(skb) || !skb_dst(skb)->neighbour) && daddr) { struct ipoib_pseudoheader *phdr = (struct ipoib_pseudoheader *) skb_push(skb, sizeof *phdr); memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 425e31112ed..a0e97532e71 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -261,7 +261,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, skb->dev = dev; - if (!skb->dst || !skb->dst->neighbour) { + if (!skb_dst(skb) || !skb_dst(skb)->neighbour) { /* put pseudoheader back on for next time */ skb_push(skb, sizeof (struct ipoib_pseudoheader)); } @@ -707,10 +707,10 @@ void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb) out: if (mcast && mcast->ah) { - if (skb->dst && - skb->dst->neighbour && - !*to_ipoib_neigh(skb->dst->neighbour)) { - struct ipoib_neigh *neigh = ipoib_neigh_alloc(skb->dst->neighbour, + if (skb_dst(skb) && + skb_dst(skb)->neighbour && + !*to_ipoib_neigh(skb_dst(skb)->neighbour)) { + struct ipoib_neigh *neigh = ipoib_neigh_alloc(skb_dst(skb)->neighbour, skb->dev); if (neigh) { diff --git a/drivers/net/pppol2tp.c b/drivers/net/pppol2tp.c index 5981debcde5..e7935d09c89 100644 --- a/drivers/net/pppol2tp.c +++ b/drivers/net/pppol2tp.c @@ -433,8 +433,7 @@ static void pppol2tp_recv_dequeue_skb(struct pppol2tp_session *session, struct s * to the inner packet either */ secpath_reset(skb); - dst_release(skb->dst); - skb->dst = NULL; + skb_dst_drop(skb); nf_reset(skb); po = pppox_sk(session_sock); @@ -976,7 +975,7 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh /* Calculate UDP checksum if configured to do so */ if (sk_tun->sk_no_check == UDP_CSUM_NOXMIT) skb->ip_summed = CHECKSUM_NONE; - else if (!(skb->dst->dev->features & NETIF_F_V4_CSUM)) { + else if (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) { skb->ip_summed = CHECKSUM_COMPLETE; csum = skb_checksum(skb, 0, udp_len, 0); uh->check = csum_tcpudp_magic(inet->saddr, inet->daddr, @@ -1172,14 +1171,14 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) nf_reset(skb); /* Get routing info from the tunnel socket */ - dst_release(skb->dst); - skb->dst = dst_clone(__sk_dst_get(sk_tun)); + skb_dst_drop(skb); + skb_dst_set(skb, dst_clone(__sk_dst_get(sk_tun))); pppol2tp_skb_set_owner_w(skb, sk_tun); /* Calculate UDP checksum if configured to do so */ if (sk_tun->sk_no_check == UDP_CSUM_NOXMIT) skb->ip_summed = CHECKSUM_NONE; - else if (!(skb->dst->dev->features & NETIF_F_V4_CSUM)) { + else if (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) { skb->ip_summed = CHECKSUM_COMPLETE; csum = skb_checksum(skb, 0, udp_len, 0); uh->check = csum_tcpudp_magic(inet->saddr, inet->daddr, diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 2994aa1ed46..74c49d9a8db 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -2937,8 +2937,8 @@ int qeth_get_cast_type(struct qeth_card *card, struct sk_buff *skb) if (card->info.type == QETH_CARD_TYPE_OSN) return cast_type; - if (skb->dst && skb->dst->neighbour) { - cast_type = skb->dst->neighbour->type; + if (skb_dst(skb) && skb_dst(skb)->neighbour) { + cast_type = skb_dst(skb)->neighbour->type; if ((cast_type == RTN_BROADCAST) || (cast_type == RTN_MULTICAST) || (cast_type == RTN_ANYCAST)) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index cb64b0b534a..6f2386e9d6e 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2549,9 +2549,9 @@ static void qeth_l3_fill_header(struct qeth_card *card, struct qeth_hdr *hdr, /* IPv4 */ hdr->hdr.l3.flags = qeth_l3_get_qeth_hdr_flags4(cast_type); memset(hdr->hdr.l3.dest_addr, 0, 12); - if ((skb->dst) && (skb->dst->neighbour)) { + if ((skb_dst(skb)) && (skb_dst(skb)->neighbour)) { *((u32 *) (&hdr->hdr.l3.dest_addr[12])) = - *((u32 *) skb->dst->neighbour->primary_key); + *((u32 *) skb_dst(skb)->neighbour->primary_key); } else { /* fill in destination address used in ip header */ *((u32 *) (&hdr->hdr.l3.dest_addr[12])) = @@ -2562,9 +2562,9 @@ static void qeth_l3_fill_header(struct qeth_card *card, struct qeth_hdr *hdr, hdr->hdr.l3.flags = qeth_l3_get_qeth_hdr_flags6(cast_type); if (card->info.type == QETH_CARD_TYPE_IQD) hdr->hdr.l3.flags &= ~QETH_HDR_PASSTHRU; - if ((skb->dst) && (skb->dst->neighbour)) { + if ((skb_dst(skb)) && (skb_dst(skb)->neighbour)) { memcpy(hdr->hdr.l3.dest_addr, - skb->dst->neighbour->primary_key, 16); + skb_dst(skb)->neighbour->primary_key, 16); } else { /* fill in destination address used in ip header */ memcpy(hdr->hdr.l3.dest_addr, diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a3ae3c52583..9ef6eb20247 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -323,7 +323,6 @@ struct sk_buff { struct net_device *dev; union { - struct dst_entry *dst; unsigned long _skb_dst; }; #ifdef CONFIG_XFRM @@ -426,9 +425,19 @@ extern void skb_dma_unmap(struct device *dev, struct sk_buff *skb, enum dma_data_direction dir); #endif +static inline struct dst_entry *skb_dst(const struct sk_buff *skb) +{ + return (struct dst_entry *)skb->_skb_dst; +} + +static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) +{ + skb->_skb_dst = (unsigned long)dst; +} + static inline struct rtable *skb_rtable(const struct sk_buff *skb) { - return (struct rtable *)skb->_skb_dst; + return (struct rtable *)skb_dst(skb); } extern void kfree_skb(struct sk_buff *skb); diff --git a/include/net/dst.h b/include/net/dst.h index 6be3b082a07..7fc409c19b3 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -195,6 +195,12 @@ struct dst_entry * dst_clone(struct dst_entry * dst) } extern void dst_release(struct dst_entry *dst); +static inline void skb_dst_drop(struct sk_buff *skb) +{ + if (skb->_skb_dst) + dst_release(skb_dst(skb)); + skb->_skb_dst = 0UL; +} /* Children define the path of the packet through the * Linux networking. Thus, destinations are stackable. @@ -246,7 +252,7 @@ static inline void dst_negative_advice(struct dst_entry **dst_p) static inline void dst_link_failure(struct sk_buff *skb) { - struct dst_entry * dst = skb->dst; + struct dst_entry *dst = skb_dst(skb); if (dst && dst->ops && dst->ops->link_failure) dst->ops->link_failure(skb); } @@ -265,13 +271,13 @@ static inline void dst_set_expires(struct dst_entry *dst, int timeout) /* Output packet to network from transport. */ static inline int dst_output(struct sk_buff *skb) { - return skb->dst->output(skb); + return skb_dst(skb)->output(skb); } /* Input packet from network to transport. */ static inline int dst_input(struct sk_buff *skb) { - return skb->dst->input(skb); + return skb_dst(skb)->input(skb); } static inline struct dst_entry *dst_check(struct dst_entry *dst, u32 cookie) diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index f74665d7bea..22c73a77cd9 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -100,7 +100,7 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, if (unlikely(sk = skb_steal_sock(skb))) return sk; - else return __inet6_lookup(dev_net(skb->dst->dev), hashinfo, + else return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, &ipv6_hdr(skb)->saddr, sport, &ipv6_hdr(skb)->daddr, ntohs(dport), inet6_iif(skb)); diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index a44e2248b2e..d522dcf3031 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -385,7 +385,7 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, if (unlikely(sk = skb_steal_sock(skb))) return sk; else - return __inet_lookup(dev_net(skb->dst->dev), hashinfo, + return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, iph->saddr, sport, iph->daddr, dport, inet_iif(skb)); } diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 5f53db7e4e5..0e1b8aebaff 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -142,7 +142,7 @@ static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst, static inline int ipv6_unicast_destination(struct sk_buff *skb) { - struct rt6_info *rt = (struct rt6_info *) skb->dst; + struct rt6_info *rt = (struct rt6_info *) skb_dst(skb); return rt->rt6i_flags & RTF_LOCAL; } diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 2e9f5c0018a..736bca45088 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -994,7 +994,7 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir, return __xfrm_policy_check(sk, ndir, skb, family); return (!net->xfrm.policy_count[dir] && !skb->sp) || - (skb->dst->flags & DST_NOPOLICY) || + (skb_dst(skb)->flags & DST_NOPOLICY) || __xfrm_policy_check(sk, ndir, skb, family); } @@ -1048,7 +1048,7 @@ static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family) struct net *net = dev_net(skb->dev); return !net->xfrm.policy_count[XFRM_POLICY_OUT] || - (skb->dst->flags & DST_NOXFRM) || + (skb_dst(skb)->flags & DST_NOXFRM) || __xfrm_route_forward(skb, family); } diff --git a/net/atm/br2684.c b/net/atm/br2684.c index bfa8fa9894f..2912665fc58 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -228,7 +228,7 @@ static int br2684_start_xmit(struct sk_buff *skb, struct net_device *dev) struct br2684_dev *brdev = BRPRIV(dev); struct br2684_vcc *brvcc; - pr_debug("br2684_start_xmit, skb->dst=%p\n", skb->dst); + pr_debug("br2684_start_xmit, skb_dst(skb)=%p\n", skb_dst(skb)); read_lock(&devs_lock); brvcc = pick_outgoing_vcc(skb, brdev); if (brvcc == NULL) { diff --git a/net/atm/clip.c b/net/atm/clip.c index fb7623c080f..e65a3b1477f 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -369,16 +369,16 @@ static int clip_start_xmit(struct sk_buff *skb, struct net_device *dev) unsigned long flags; pr_debug("clip_start_xmit (skb %p)\n", skb); - if (!skb->dst) { - printk(KERN_ERR "clip_start_xmit: skb->dst == NULL\n"); + if (!skb_dst(skb)) { + printk(KERN_ERR "clip_start_xmit: skb_dst(skb) == NULL\n"); dev_kfree_skb(skb); dev->stats.tx_dropped++; return 0; } - if (!skb->dst->neighbour) { + if (!skb_dst(skb)->neighbour) { #if 0 - skb->dst->neighbour = clip_find_neighbour(skb->dst, 1); - if (!skb->dst->neighbour) { + skb_dst(skb)->neighbour = clip_find_neighbour(skb_dst(skb), 1); + if (!skb_dst(skb)->neighbour) { dev_kfree_skb(skb); /* lost that one */ dev->stats.tx_dropped++; return 0; @@ -389,7 +389,7 @@ static int clip_start_xmit(struct sk_buff *skb, struct net_device *dev) dev->stats.tx_dropped++; return 0; } - entry = NEIGH2ENTRY(skb->dst->neighbour); + entry = NEIGH2ENTRY(skb_dst(skb)->neighbour); if (!entry->vccs) { if (time_after(jiffies, entry->expires)) { /* should be resolved */ @@ -406,7 +406,7 @@ static int clip_start_xmit(struct sk_buff *skb, struct net_device *dev) } pr_debug("neigh %p, vccs %p\n", entry, entry->vccs); ATM_SKB(skb)->vcc = vcc = entry->vccs->vcc; - pr_debug("using neighbour %p, vcc %p\n", skb->dst->neighbour, vcc); + pr_debug("using neighbour %p, vcc %p\n", skb_dst(skb)->neighbour, vcc); if (entry->vccs->encap) { void *here; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index e0ceb66a9ec..d22f611e400 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -242,7 +242,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb) return 0; } dst_hold(&rt->u.dst); - skb->dst = &rt->u.dst; + skb_dst_set(skb, &rt->u.dst); skb->dev = nf_bridge->physindev; nf_bridge_push_encap_header(skb); @@ -322,7 +322,7 @@ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb) skb->dev = bridge_parent(skb->dev); if (skb->dev) { - struct dst_entry *dst = skb->dst; + struct dst_entry *dst = skb_dst(skb); nf_bridge_pull_encap_header(skb); @@ -375,7 +375,7 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb) /* - Bridged-and-DNAT'ed traffic doesn't * require ip_forwarding. */ if (((struct dst_entry *)rt)->dev == dev) { - skb->dst = (struct dst_entry *)rt; + skb_dst_set(skb, (struct dst_entry *)rt); goto bridged_dnat; } /* we are sure that forwarding is disabled, so printing @@ -389,7 +389,7 @@ free_skb: kfree_skb(skb); return 0; } else { - if (skb->dst->dev == dev) { + if (skb_dst(skb)->dev == dev) { bridged_dnat: /* Tell br_nf_local_out this is a * bridged frame */ @@ -412,7 +412,7 @@ bridged_dnat: return 0; } dst_hold(&rt->u.dst); - skb->dst = &rt->u.dst; + skb_dst_set(skb, &rt->u.dst); } skb->dev = nf_bridge->physindev; @@ -633,10 +633,8 @@ static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff *skb, { struct rtable *rt = skb_rtable(skb); - if (rt && rt == bridge_parent_rtable(in)) { - dst_release(&rt->u.dst); - skb->dst = NULL; - } + if (rt && rt == bridge_parent_rtable(in)) + skb_dst_drop(skb); return NF_ACCEPT; } @@ -851,7 +849,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb, return NF_ACCEPT; #ifdef CONFIG_NETFILTER_DEBUG - if (skb->dst == NULL) { + if (skb_dst(skb) == NULL) { printk(KERN_INFO "br_netfilter post_routing: skb->dst == NULL\n"); goto print_error; } diff --git a/net/core/dev.c b/net/core/dev.c index e2fcc5f1017..34b49a6a22f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1693,10 +1693,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, * If device doesnt need skb->dst, release it right now while * its hot in this cpu cache */ - if ((dev->priv_flags & IFF_XMIT_DST_RELEASE) && skb->dst) { - dst_release(skb->dst); - skb->dst = NULL; - } + if (dev->priv_flags & IFF_XMIT_DST_RELEASE) + skb_dst_drop(skb); + rc = ops->ndo_start_xmit(skb, dev); if (rc == 0) txq_trans_update(txq); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index a1cbce7fdae..c54229befcf 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1088,8 +1088,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, struct neighbour *n1 = neigh; write_unlock_bh(&neigh->lock); /* On shaper/eql skb->dst->neighbour != neigh :( */ - if (skb->dst && skb->dst->neighbour) - n1 = skb->dst->neighbour; + if (skb_dst(skb) && skb_dst(skb)->neighbour) + n1 = skb_dst(skb)->neighbour; n1->output(skb); write_lock_bh(&neigh->lock); } @@ -1182,7 +1182,7 @@ EXPORT_SYMBOL(neigh_compat_output); int neigh_resolve_output(struct sk_buff *skb) { - struct dst_entry *dst = skb->dst; + struct dst_entry *dst = skb_dst(skb); struct neighbour *neigh; int rc = 0; @@ -1229,7 +1229,7 @@ EXPORT_SYMBOL(neigh_resolve_output); int neigh_connected_output(struct sk_buff *skb) { int err; - struct dst_entry *dst = skb->dst; + struct dst_entry *dst = skb_dst(skb); struct neighbour *neigh = dst->neighbour; struct net_device *dev = neigh->dev; @@ -1298,8 +1298,7 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, if (time_before(tbl->proxy_timer.expires, sched_next)) sched_next = tbl->proxy_timer.expires; } - dst_release(skb->dst); - skb->dst = NULL; + skb_dst_drop(skb); dev_hold(skb->dev); __skb_queue_tail(&tbl->proxy_queue, skb); mod_timer(&tbl->proxy_timer, sched_next); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8e815e685f2..6adf19ec95c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -381,7 +381,7 @@ static void kfree_skbmem(struct sk_buff *skb) static void skb_release_head_state(struct sk_buff *skb) { - dst_release(skb->dst); + skb_dst_drop(skb); #ifdef CONFIG_XFRM secpath_put(skb->sp); #endif @@ -521,7 +521,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->transport_header = old->transport_header; new->network_header = old->network_header; new->mac_header = old->mac_header; - new->dst = dst_clone(old->dst); + skb_dst_set(new, dst_clone(skb_dst(old))); #ifdef CONFIG_XFRM new->sp = secpath_get(old->sp); #endif diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 2cf48ba0dbb..a0a36c9e6cc 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -507,7 +507,7 @@ static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) const struct iphdr *rxiph; struct sk_buff *skb; struct dst_entry *dst; - struct net *net = dev_net(rxskb->dst->dev); + struct net *net = dev_net(skb_dst(rxskb)->dev); struct sock *ctl_sk = net->dccp.v4_ctl_sk; /* Never send a reset in response to a reset. */ @@ -528,7 +528,7 @@ static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) rxiph = ip_hdr(rxskb); dccp_hdr(skb)->dccph_checksum = dccp_v4_csum_finish(skb, rxiph->saddr, rxiph->daddr); - skb->dst = dst_clone(dst); + skb_dst_set(skb, dst_clone(dst)); bh_lock_sock(ctl_sk); err = ip_build_and_send_pkt(skb, ctl_sk, diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index b963f35c65f..05ea7440d9e 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -314,8 +314,9 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) struct ipv6hdr *rxip6h; struct sk_buff *skb; struct flowi fl; - struct net *net = dev_net(rxskb->dst->dev); + struct net *net = dev_net(skb_dst(rxskb)->dev); struct sock *ctl_sk = net->dccp.v6_ctl_sk; + struct dst_entry *dst; if (dccp_hdr(rxskb)->dccph_type == DCCP_PKT_RESET) return; @@ -342,8 +343,9 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) security_skb_classify_flow(rxskb, &fl); /* sk = NULL, but it is safe for now. RST socket required. */ - if (!ip6_dst_lookup(ctl_sk, &skb->dst, &fl)) { - if (xfrm_lookup(net, &skb->dst, &fl, NULL, 0) >= 0) { + if (!ip6_dst_lookup(ctl_sk, &dst, &fl)) { + if (xfrm_lookup(net, &dst, &fl, NULL, 0) >= 0) { + skb_dst_set(skb, dst); ip6_xmit(ctl_sk, skb, &fl, NULL, 0); DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); diff --git a/net/dccp/output.c b/net/dccp/output.c index 36bcc00654d..c0e88c16d08 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -350,7 +350,7 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, /* Reserve space for headers. */ skb_reserve(skb, sk->sk_prot->max_header); - skb->dst = dst_clone(dst); + skb_dst_set(skb, dst_clone(dst)); dreq = dccp_rsk(req); if (inet_rsk(req)->acked) /* increase ISS upon retransmission */ diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index bccb3887773..a5e3a593e47 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1075,6 +1075,7 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags) int err = 0; unsigned char type; long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + struct dst_entry *dst; lock_sock(sk); @@ -1102,8 +1103,9 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags) } release_sock(sk); - dst_release(xchg(&newsk->sk_dst_cache, skb->dst)); - skb->dst = NULL; + dst = skb_dst(skb); + dst_release(xchg(&newsk->sk_dst_cache, dst)); + skb_dst_set(skb, NULL); DN_SK(newsk)->state = DN_CR; DN_SK(newsk)->addrrem = cb->src_port; diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 05b5aa05e50..923786bd6d0 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -204,7 +204,7 @@ static void dn_short_error_report(struct neighbour *neigh, struct sk_buff *skb) static int dn_neigh_output_packet(struct sk_buff *skb) { - struct dst_entry *dst = skb->dst; + struct dst_entry *dst = skb_dst(skb); struct dn_route *rt = (struct dn_route *)dst; struct neighbour *neigh = dst->neighbour; struct net_device *dev = neigh->dev; @@ -224,7 +224,7 @@ static int dn_neigh_output_packet(struct sk_buff *skb) static int dn_long_output(struct sk_buff *skb) { - struct dst_entry *dst = skb->dst; + struct dst_entry *dst = skb_dst(skb); struct neighbour *neigh = dst->neighbour; struct net_device *dev = neigh->dev; int headroom = dev->hard_header_len + sizeof(struct dn_long_packet) + 3; @@ -270,7 +270,7 @@ static int dn_long_output(struct sk_buff *skb) static int dn_short_output(struct sk_buff *skb) { - struct dst_entry *dst = skb->dst; + struct dst_entry *dst = skb_dst(skb); struct neighbour *neigh = dst->neighbour; struct net_device *dev = neigh->dev; int headroom = dev->hard_header_len + sizeof(struct dn_short_packet) + 2; @@ -313,7 +313,7 @@ static int dn_short_output(struct sk_buff *skb) */ static int dn_phase3_output(struct sk_buff *skb) { - struct dst_entry *dst = skb->dst; + struct dst_entry *dst = skb_dst(skb); struct neighbour *neigh = dst->neighbour; struct net_device *dev = neigh->dev; int headroom = dev->hard_header_len + sizeof(struct dn_short_packet) + 2; diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c index da04f459337..a65e929ce76 100644 --- a/net/decnet/dn_nsp_out.c +++ b/net/decnet/dn_nsp_out.c @@ -85,7 +85,7 @@ static void dn_nsp_send(struct sk_buff *skb) dst = sk_dst_check(sk, 0); if (dst) { try_again: - skb->dst = dst; + skb_dst_set(skb, dst); dst_output(skb); return; } @@ -582,7 +582,7 @@ static __inline__ void dn_nsp_do_disc(struct sock *sk, unsigned char msgflg, * to be able to send disc packets out which have no socket * associations. */ - skb->dst = dst_clone(dst); + skb_dst_set(skb, dst_clone(dst)); dst_output(skb); } @@ -611,7 +611,7 @@ void dn_nsp_return_disc(struct sk_buff *skb, unsigned char msgflg, int ddl = 0; gfp_t gfp = GFP_ATOMIC; - dn_nsp_do_disc(NULL, msgflg, reason, gfp, skb->dst, ddl, + dn_nsp_do_disc(NULL, msgflg, reason, gfp, skb_dst(skb), ddl, NULL, cb->src_port, cb->dst_port); } diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 0cc4394117d..1d6ca8a98dc 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -678,7 +678,7 @@ out: static int dn_output(struct sk_buff *skb) { - struct dst_entry *dst = skb->dst; + struct dst_entry *dst = skb_dst(skb); struct dn_route *rt = (struct dn_route *)dst; struct net_device *dev = dst->dev; struct dn_skb_cb *cb = DN_SKB_CB(skb); @@ -717,7 +717,7 @@ error: static int dn_forward(struct sk_buff *skb) { struct dn_skb_cb *cb = DN_SKB_CB(skb); - struct dst_entry *dst = skb->dst; + struct dst_entry *dst = skb_dst(skb); struct dn_dev *dn_db = dst->dev->dn_ptr; struct dn_route *rt; struct neighbour *neigh = dst->neighbour; @@ -730,7 +730,7 @@ static int dn_forward(struct sk_buff *skb) goto drop; /* Ensure that we have enough space for headers */ - rt = (struct dn_route *)skb->dst; + rt = (struct dn_route *)skb_dst(skb); header_len = dn_db->use_long ? 21 : 6; if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+header_len)) goto drop; @@ -1392,7 +1392,8 @@ make_route: goto e_neighbour; hash = dn_hash(rt->fl.fld_src, rt->fl.fld_dst); - dn_insert_route(rt, hash, (struct dn_route **)&skb->dst); + dn_insert_route(rt, hash, &rt); + skb_dst_set(skb, &rt->u.dst); done: if (neigh) @@ -1424,7 +1425,7 @@ static int dn_route_input(struct sk_buff *skb) struct dn_skb_cb *cb = DN_SKB_CB(skb); unsigned hash = dn_hash(cb->src, cb->dst); - if (skb->dst) + if (skb_dst(skb)) return 0; rcu_read_lock(); @@ -1437,7 +1438,7 @@ static int dn_route_input(struct sk_buff *skb) (rt->fl.iif == cb->iif)) { dst_use(&rt->u.dst, jiffies); rcu_read_unlock(); - skb->dst = (struct dst_entry *)rt; + skb_dst_set(skb, (struct dst_entry *)rt); return 0; } } @@ -1449,7 +1450,7 @@ static int dn_route_input(struct sk_buff *skb) static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, int nowait, unsigned int flags) { - struct dn_route *rt = (struct dn_route *)skb->dst; + struct dn_route *rt = (struct dn_route *)skb_dst(skb); struct rtmsg *r; struct nlmsghdr *nlh; unsigned char *b = skb_tail_pointer(skb); @@ -1554,7 +1555,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void err = dn_route_input(skb); local_bh_enable(); memset(cb, 0, sizeof(struct dn_skb_cb)); - rt = (struct dn_route *)skb->dst; + rt = (struct dn_route *)skb_dst(skb); if (!err && -rt->u.dst.error) err = rt->u.dst.error; } else { @@ -1570,7 +1571,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void skb->dev = NULL; if (err) goto out_free; - skb->dst = &rt->u.dst; + skb_dst_set(skb, &rt->u.dst); if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; @@ -1622,15 +1623,15 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb) rt = rcu_dereference(rt->u.dst.dn_next), idx++) { if (idx < s_idx) continue; - skb->dst = dst_clone(&rt->u.dst); + skb_dst_set(skb, dst_clone(&rt->u.dst)); if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, 1, NLM_F_MULTI) <= 0) { - dst_release(xchg(&skb->dst, NULL)); + skb_dst_drop(skb); rcu_read_unlock_bh(); goto done; } - dst_release(xchg(&skb->dst, NULL)); + skb_dst_drop(skb); } rcu_read_unlock_bh(); } diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 816494f271a..8a3881e28ac 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -468,7 +468,7 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb) __be32 paddr; struct neighbour *n; - if (!skb->dst) { + if (!skb_dst(skb)) { printk(KERN_DEBUG "arp_find is called with dst==NULL\n"); kfree_skb(skb); return 1; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 94f75efae93..97c410e8438 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -591,13 +591,13 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) goto relookup_failed; /* Ugh! */ - odst = skb_in->dst; + odst = skb_dst(skb_in); err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src, RT_TOS(tos), rt2->u.dst.dev); dst_release(&rt2->u.dst); rt2 = skb_rtable(skb_in); - skb_in->dst = odst; + skb_dst_set(skb_in, odst); } if (err) @@ -659,7 +659,7 @@ static void icmp_unreach(struct sk_buff *skb) u32 info = 0; struct net *net; - net = dev_net(skb->dst->dev); + net = dev_net(skb_dst(skb)->dev); /* * Incomplete header ? @@ -822,7 +822,7 @@ static void icmp_echo(struct sk_buff *skb) { struct net *net; - net = dev_net(skb->dst->dev); + net = dev_net(skb_dst(skb)->dev); if (!net->ipv4.sysctl_icmp_echo_ignore_all) { struct icmp_bxm icmp_param; @@ -873,7 +873,7 @@ static void icmp_timestamp(struct sk_buff *skb) out: return; out_err: - ICMP_INC_STATS_BH(dev_net(skb->dst->dev), ICMP_MIB_INERRORS); + ICMP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ICMP_MIB_INERRORS); goto out; } diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index afabd2758b6..01b4284ed69 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -311,7 +311,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) return NULL; } - skb->dst = &rt->u.dst; + skb_dst_set(skb, &rt->u.dst); skb->dev = dev; skb_reserve(skb, LL_RESERVED_SPACE(dev)); @@ -659,7 +659,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, return -1; } - skb->dst = &rt->u.dst; + skb_dst_set(skb, &rt->u.dst); skb_reserve(skb, LL_RESERVED_SPACE(dev)); diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 0761cd9bbd1..a2991bc8e32 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -42,7 +42,7 @@ static int ip_forward_finish(struct sk_buff *skb) { struct ip_options * opt = &(IPCB(skb)->opt); - IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); + IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); if (unlikely(opt->optlen)) ip_forward_options(skb); @@ -123,7 +123,7 @@ sr_failed: too_many_hops: /* Tell the sender its packet died... */ - IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_INHDRERRORS); + IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_INHDRERRORS); icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0); drop: kfree_skb(skb); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 7985346653b..1f1b82475ea 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -573,7 +573,7 @@ int ip_defrag(struct sk_buff *skb, u32 user) struct ipq *qp; struct net *net; - net = skb->dev ? dev_net(skb->dev) : dev_net(skb->dst->dev); + net = skb->dev ? dev_net(skb->dev) : dev_net(skb_dst(skb)->dev); IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS); /* Start by cleaning up the memory. */ diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 85ddad45a91..44e2a3d2359 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -643,8 +643,7 @@ static int ipgre_rcv(struct sk_buff *skb) stats->rx_packets++; stats->rx_bytes += len; skb->dev = tunnel->dev; - dst_release(skb->dst); - skb->dst = NULL; + skb_dst_drop(skb); nf_reset(skb); skb_reset_network_header(skb); @@ -698,7 +697,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if ((dst = tiph->daddr) == 0) { /* NBMA tunnel */ - if (skb->dst == NULL) { + if (skb_dst(skb) == NULL) { stats->tx_fifo_errors++; goto tx_error; } @@ -712,7 +711,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) else if (skb->protocol == htons(ETH_P_IPV6)) { struct in6_addr *addr6; int addr_type; - struct neighbour *neigh = skb->dst->neighbour; + struct neighbour *neigh = skb_dst(skb)->neighbour; if (neigh == NULL) goto tx_error; @@ -766,10 +765,10 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (df) mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen; else - mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu; + mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; - if (skb->dst) - skb->dst->ops->update_pmtu(skb->dst, mtu); + if (skb_dst(skb)) + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); if (skb->protocol == htons(ETH_P_IP)) { df |= (old_iph->frag_off&htons(IP_DF)); @@ -783,14 +782,14 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) } #ifdef CONFIG_IPV6 else if (skb->protocol == htons(ETH_P_IPV6)) { - struct rt6_info *rt6 = (struct rt6_info *)skb->dst; + struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); - if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) { + if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) { if ((tunnel->parms.iph.daddr && !ipv4_is_multicast(tunnel->parms.iph.daddr)) || rt6->rt6i_dst.plen == 128) { rt6->rt6i_flags |= RTF_MODIFIED; - skb->dst->metrics[RTAX_MTU-1] = mtu; + skb_dst(skb)->metrics[RTAX_MTU-1] = mtu; } } @@ -837,8 +836,8 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED); - dst_release(skb->dst); - skb->dst = &rt->u.dst; + skb_dst_drop(skb); + skb_dst_set(skb, &rt->u.dst); /* * Push down and install the IPIP header. diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index cea784b0aa4..490ce20faf3 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -329,7 +329,7 @@ static int ip_rcv_finish(struct sk_buff *skb) * Initialise the virtual path cache for the packet. It describes * how the packet travels inside Linux networking. */ - if (skb->dst == NULL) { + if (skb_dst(skb) == NULL) { int err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, skb->dev); if (unlikely(err)) { @@ -344,9 +344,9 @@ static int ip_rcv_finish(struct sk_buff *skb) } #ifdef CONFIG_NET_CLS_ROUTE - if (unlikely(skb->dst->tclassid)) { + if (unlikely(skb_dst(skb)->tclassid)) { struct ip_rt_acct *st = per_cpu_ptr(ip_rt_acct, smp_processor_id()); - u32 idx = skb->dst->tclassid; + u32 idx = skb_dst(skb)->tclassid; st[idx&0xFF].o_packets++; st[idx&0xFF].o_bytes += skb->len; st[(idx>>16)&0xFF].i_packets++; diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 7e1074ffdbd..94bf105ef3c 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -143,7 +143,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) __be32 addr; memcpy(&addr, sptr+soffset-1, 4); - if (inet_addr_type(dev_net(skb->dst->dev), addr) != RTN_LOCAL) { + if (inet_addr_type(dev_net(skb_dst(skb)->dev), addr) != RTN_LOCAL) { dopt->ts_needtime = 1; soffset += 8; } @@ -624,12 +624,12 @@ int ip_options_rcv_srr(struct sk_buff *skb) memcpy(&nexthop, &optptr[srrptr-1], 4); rt = skb_rtable(skb); - skb->dst = NULL; + skb_dst_set(skb, NULL); err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev); rt2 = skb_rtable(skb); if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) { ip_rt_put(rt2); - skb->dst = &rt->u.dst; + skb_dst_set(skb, &rt->u.dst); return -EINVAL; } ip_rt_put(rt); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 8d845ebfcca..3d6167fb2d9 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -95,7 +95,7 @@ int __ip_local_out(struct sk_buff *skb) iph->tot_len = htons(skb->len); ip_send_check(iph); - return nf_hook(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev, + return nf_hook(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev, dst_output); } @@ -118,7 +118,7 @@ static int ip_dev_loopback_xmit(struct sk_buff *newskb) __skb_pull(newskb, skb_network_offset(newskb)); newskb->pkt_type = PACKET_LOOPBACK; newskb->ip_summed = CHECKSUM_UNNECESSARY; - WARN_ON(!newskb->dst); + WARN_ON(!skb_dst(newskb)); netif_rx(newskb); return 0; } @@ -176,7 +176,7 @@ EXPORT_SYMBOL_GPL(ip_build_and_send_pkt); static inline int ip_finish_output2(struct sk_buff *skb) { - struct dst_entry *dst = skb->dst; + struct dst_entry *dst = skb_dst(skb); struct rtable *rt = (struct rtable *)dst; struct net_device *dev = dst->dev; unsigned int hh_len = LL_RESERVED_SPACE(dev); @@ -217,14 +217,14 @@ static inline int ip_skb_dst_mtu(struct sk_buff *skb) struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL; return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ? - skb->dst->dev->mtu : dst_mtu(skb->dst); + skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); } static int ip_finish_output(struct sk_buff *skb) { #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) /* Policy lookup after SNAT yielded a new policy */ - if (skb->dst->xfrm != NULL) { + if (skb_dst(skb)->xfrm != NULL) { IPCB(skb)->flags |= IPSKB_REROUTED; return dst_output(skb); } @@ -296,7 +296,7 @@ int ip_mc_output(struct sk_buff *skb) int ip_output(struct sk_buff *skb) { - struct net_device *dev = skb->dst->dev; + struct net_device *dev = skb_dst(skb)->dev; IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len); @@ -355,7 +355,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) } sk_setup_caps(sk, &rt->u.dst); } - skb->dst = dst_clone(&rt->u.dst); + skb_dst_set(skb, dst_clone(&rt->u.dst)); packet_routed: if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) @@ -401,8 +401,8 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->pkt_type = from->pkt_type; to->priority = from->priority; to->protocol = from->protocol; - dst_release(to->dst); - to->dst = dst_clone(from->dst); + skb_dst_drop(to); + skb_dst_set(to, dst_clone(skb_dst(from))); to->dev = from->dev; to->mark = from->mark; @@ -1294,7 +1294,7 @@ int ip_push_pending_frames(struct sock *sk) * on dst refcount */ inet->cork.dst = NULL; - skb->dst = &rt->u.dst; + skb_dst_set(skb, &rt->u.dst); if (iph->protocol == IPPROTO_ICMP) icmp_out_count(net, ((struct icmphdr *) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 0c6e7bf18a4..93e2b787da2 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -370,8 +370,7 @@ static int ipip_rcv(struct sk_buff *skb) tunnel->dev->stats.rx_packets++; tunnel->dev->stats.rx_bytes += skb->len; skb->dev = tunnel->dev; - dst_release(skb->dst); - skb->dst = NULL; + skb_dst_drop(skb); nf_reset(skb); ipip_ecn_decapsulate(iph, skb); netif_rx(skb); @@ -447,15 +446,15 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (tiph->frag_off) mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); else - mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu; + mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; if (mtu < 68) { stats->collisions++; ip_rt_put(rt); goto tx_error; } - if (skb->dst) - skb->dst->ops->update_pmtu(skb->dst, mtu); + if (skb_dst(skb)) + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); df |= (old_iph->frag_off&htons(IP_DF)); @@ -502,8 +501,8 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED); - dst_release(skb->dst); - skb->dst = &rt->u.dst; + skb_dst_drop(skb); + skb_dst_set(skb, &rt->u.dst); /* * Push down and install the IPIP header. diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 69dd058283e..ffd98610446 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -651,7 +651,7 @@ static int ipmr_cache_report(struct net *net, ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */ msg = (struct igmpmsg *)skb_network_header(skb); msg->im_vif = vifi; - skb->dst = dst_clone(pkt->dst); + skb_dst_set(skb, dst_clone(skb_dst(pkt))); /* * Add our header @@ -1201,7 +1201,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) iph->protocol = IPPROTO_IPIP; iph->ihl = 5; iph->tot_len = htons(skb->len); - ip_select_ident(iph, skb->dst, NULL); + ip_select_ident(iph, skb_dst(skb), NULL); ip_send_check(iph); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); @@ -1212,7 +1212,7 @@ static inline int ipmr_forward_finish(struct sk_buff *skb) { struct ip_options * opt = &(IPCB(skb)->opt); - IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); + IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); if (unlikely(opt->optlen)) ip_forward_options(skb); @@ -1290,8 +1290,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) vif->pkt_out++; vif->bytes_out += skb->len; - dst_release(skb->dst); - skb->dst = &rt->u.dst; + skb_dst_drop(skb); + skb_dst_set(skb, &rt->u.dst); ip_decrease_ttl(ip_hdr(skb)); /* FIXME: forward and output firewalls used to be called here. @@ -1543,8 +1543,7 @@ static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen) skb->protocol = htons(ETH_P_IP); skb->ip_summed = 0; skb->pkt_type = PACKET_HOST; - dst_release(skb->dst); - skb->dst = NULL; + skb_dst_drop(skb); reg_dev->stats.rx_bytes += skb->len; reg_dev->stats.rx_packets++; nf_reset(skb); diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index fdf6811c31a..1725dc0ef68 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -12,7 +12,7 @@ /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) { - struct net *net = dev_net(skb->dst->dev); + struct net *net = dev_net(skb_dst(skb)->dev); const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; struct flowi fl = {}; @@ -41,8 +41,8 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) return -1; /* Drop old route. */ - dst_release(skb->dst); - skb->dst = &rt->u.dst; + skb_dst_drop(skb); + skb_dst_set(skb, &rt->u.dst); } else { /* non-local src, find valid iif to satisfy * rp-filter when calling ip_route_input. */ @@ -50,7 +50,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) if (ip_route_output_key(net, &rt, &fl) != 0) return -1; - odst = skb->dst; + odst = skb_dst(skb); if (ip_route_input(skb, iph->daddr, iph->saddr, RT_TOS(iph->tos), rt->u.dst.dev) != 0) { dst_release(&rt->u.dst); @@ -60,18 +60,22 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) dst_release(odst); } - if (skb->dst->error) + if (skb_dst(skb)->error) return -1; #ifdef CONFIG_XFRM if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && - xfrm_decode_session(skb, &fl, AF_INET) == 0) - if (xfrm_lookup(net, &skb->dst, &fl, skb->sk, 0)) + xfrm_decode_session(skb, &fl, AF_INET) == 0) { + struct dst_entry *dst = skb_dst(skb); + skb_dst_set(skb, NULL); + if (xfrm_lookup(net, &dst, &fl, skb->sk, 0)) return -1; + skb_dst_set(skb, dst); + } #endif /* Change in oif may mean change in hh_len. */ - hh_len = skb->dst->dev->hard_header_len; + hh_len = skb_dst(skb)->dev->hard_header_len; if (skb_headroom(skb) < hh_len && pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC)) return -1; @@ -92,7 +96,7 @@ int ip_xfrm_me_harder(struct sk_buff *skb) if (xfrm_decode_session(skb, &fl, AF_INET) < 0) return -1; - dst = skb->dst; + dst = skb_dst(skb); if (dst->xfrm) dst = ((struct xfrm_dst *)dst)->route; dst_hold(dst); @@ -100,11 +104,11 @@ int ip_xfrm_me_harder(struct sk_buff *skb) if (xfrm_lookup(dev_net(dst->dev), &dst, &fl, skb->sk, 0) < 0) return -1; - dst_release(skb->dst); - skb->dst = dst; + skb_dst_drop(skb); + skb_dst_set(skb, dst); /* Change in oif may mean change in hh_len. */ - hh_len = skb->dst->dev->hard_header_len; + hh_len = skb_dst(skb)->dev->hard_header_len; if (skb_headroom(skb) < hh_len && pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC)) return -1; diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 0b4b6e0ff2b..c93ae44bff2 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -108,17 +108,16 @@ static void send_reset(struct sk_buff *oldskb, int hook) addr_type = RTN_LOCAL; /* ip_route_me_harder expects skb->dst to be set */ - dst_hold(oldskb->dst); - nskb->dst = oldskb->dst; + skb_dst_set(nskb, dst_clone(skb_dst(oldskb))); if (ip_route_me_harder(nskb, addr_type)) goto free_nskb; - niph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT); + niph->ttl = dst_metric(skb_dst(nskb), RTAX_HOPLIMIT); nskb->ip_summed = CHECKSUM_NONE; /* "Never happens" */ - if (nskb->len > dst_mtu(nskb->dst)) + if (nskb->len > dst_mtu(skb_dst(nskb))) goto free_nskb; nf_ct_attach(nskb, oldskb); diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index b7dd695691a..5567bd0d075 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -167,10 +167,9 @@ nf_nat_in(unsigned int hooknum, ret = nf_nat_fn(hooknum, skb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN && - daddr != ip_hdr(skb)->daddr) { - dst_release(skb->dst); - skb->dst = NULL; - } + daddr != ip_hdr(skb)->daddr) + skb_dst_drop(skb); + return ret; } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index f774651f0a4..3dc9171a272 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -343,7 +343,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - skb->dst = dst_clone(&rt->u.dst); + skb_dst_set(skb, dst_clone(&rt->u.dst)); skb_reset_network_header(skb); iph = ip_hdr(skb); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f20060ac2f0..a849bb15d86 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1118,7 +1118,7 @@ restart: if (rp) *rp = rth; else - skb->dst = &rth->u.dst; + skb_dst_set(skb, &rth->u.dst); return 0; } @@ -1217,7 +1217,7 @@ restart: if (rp) *rp = rt; else - skb->dst = &rt->u.dst; + skb_dst_set(skb, &rt->u.dst); return 0; } @@ -2251,7 +2251,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, dst_use(&rth->u.dst, jiffies); RT_CACHE_STAT_INC(in_hit); rcu_read_unlock(); - skb->dst = &rth->u.dst; + skb_dst_set(skb, &rth->u.dst); return 0; } RT_CACHE_STAT_INC(in_hlist_search); @@ -2934,7 +2934,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void if (err) goto errout_free; - skb->dst = &rt->u.dst; + skb_dst_set(skb, &rt->u.dst); if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; @@ -2975,15 +2975,15 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) continue; if (rt_is_expired(rt)) continue; - skb->dst = dst_clone(&rt->u.dst); + skb_dst_set(skb, dst_clone(&rt->u.dst)); if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, 1, NLM_F_MULTI) <= 0) { - dst_release(xchg(&skb->dst, NULL)); + skb_dst_drop(skb); rcu_read_unlock_bh(); goto done; } - dst_release(xchg(&skb->dst, NULL)); + skb_dst_drop(skb); } rcu_read_unlock_bh(); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 319c8852644..5a1ca2698c8 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -590,7 +590,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) arg.csumoffset = offsetof(struct tcphdr, check) / 2; arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; - net = dev_net(skb->dst->dev); + net = dev_net(skb_dst(skb)->dev); ip_send_reply(net->ipv4.tcp_sock, skb, &arg, arg.iov[0].iov_len); @@ -617,7 +617,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, ]; } rep; struct ip_reply_arg arg; - struct net *net = dev_net(skb->dst->dev); + struct net *net = dev_net(skb_dst(skb)->dev); memset(&rep.th, 0, sizeof(struct tcphdr)); memset(&arg, 0, sizeof(arg)); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 79c39dc9b01..416fc4c2e7e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2202,7 +2202,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, /* Reserve space for headers. */ skb_reserve(skb, MAX_TCP_HEADER); - skb->dst = dst_clone(dst); + skb_dst_set(skb, dst_clone(dst)); mss = dst_metric(dst, RTAX_ADVMSS); if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7a1d1ce22e6..8f4158d7c9a 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -328,7 +328,7 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, if (unlikely(sk = skb_steal_sock(skb))) return sk; else - return __udp4_lib_lookup(dev_net(skb->dst->dev), iph->saddr, sport, + return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport, iph->daddr, dport, inet_iif(skb), udptable); } @@ -1237,7 +1237,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, struct sock *sk; struct udphdr *uh; unsigned short ulen; - struct rtable *rt = (struct rtable*)skb->dst; + struct rtable *rt = skb_rtable(skb); __be32 saddr, daddr; struct net *net = dev_net(skb->dev); diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 4ec2162a437..f9f922a0ba8 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -23,7 +23,7 @@ int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb) static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb) { - if (skb->dst == NULL) { + if (skb_dst(skb) == NULL) { const struct iphdr *iph = ip_hdr(skb); if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 7135279f3f8..3444f3b34ec 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -28,7 +28,7 @@ static inline void ipip_ecn_decapsulate(struct sk_buff *skb) */ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) { - struct dst_entry *dst = skb->dst; + struct dst_entry *dst = skb_dst(skb); struct iphdr *top_iph; int flags; @@ -41,7 +41,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) top_iph->ihl = 5; top_iph->version = 4; - top_iph->protocol = xfrm_af2proto(skb->dst->ops->family); + top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family); /* DS disclosed */ top_iph->tos = INET_ECN_encapsulate(XFRM_MODE_SKB_CB(skb)->tos, diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 8c3180adddb..c908bd99bcb 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -29,7 +29,7 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb) if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->local_df) goto out; - dst = skb->dst; + dst = skb_dst(skb); mtu = dst_mtu(dst); if (skb->len > mtu) { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); @@ -72,7 +72,7 @@ EXPORT_SYMBOL(xfrm4_prepare_output); static int xfrm4_output_finish(struct sk_buff *skb) { #ifdef CONFIG_NETFILTER - if (!skb->dst->xfrm) { + if (!skb_dst(skb)->xfrm) { IPCB(skb)->flags |= IPSKB_REROUTED; return dst_output(skb); } @@ -87,6 +87,6 @@ static int xfrm4_output_finish(struct sk_buff *skb) int xfrm4_output(struct sk_buff *skb) { return NF_HOOK_COND(PF_INET, NF_INET_POST_ROUTING, skb, - NULL, skb->dst->dev, xfrm4_output_finish, + NULL, skb_dst(skb)->dev, xfrm4_output_finish, !(IPCB(skb)->flags & IPSKB_REROUTED)); } diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 1c7f400a3cf..4aae658e550 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -277,7 +277,7 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + ((skb_transport_header(skb)[1] + 1) << 3)))) { - IP6_INC_STATS_BH(dev_net(skb->dst->dev), ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -1; @@ -288,7 +288,7 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) dstbuf = opt->dst1; #endif - dst = dst_clone(skb->dst); + dst = dst_clone(skb_dst(skb)); if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) { dst_release(dst); skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; @@ -333,7 +333,7 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + ((skb_transport_header(skb)[1] + 1) << 3)))) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -1; @@ -343,7 +343,7 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) || skb->pkt_type != PACKET_HOST) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; @@ -358,7 +358,7 @@ looped_back: * processed by own */ if (!addr) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; @@ -384,7 +384,7 @@ looped_back: goto unknown_rh; /* Silently discard invalid RTH type 2 */ if (hdr->hdrlen != 2 || hdr->segments_left != 1) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -1; @@ -403,7 +403,7 @@ looped_back: n = hdr->hdrlen >> 1; if (hdr->segments_left > n) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, ((&hdr->segments_left) - @@ -417,7 +417,7 @@ looped_back: if (skb_cloned(skb)) { /* the copy is a forwarded packet */ if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return -1; @@ -440,13 +440,13 @@ looped_back: if (xfrm6_input_addr(skb, (xfrm_address_t *)addr, (xfrm_address_t *)&ipv6_hdr(skb)->saddr, IPPROTO_ROUTING) < 0) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; } - if (!ipv6_chk_home_addr(dev_net(skb->dst->dev), addr)) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), + if (!ipv6_chk_home_addr(dev_net(skb_dst(skb)->dev), addr)) { + IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; @@ -458,7 +458,7 @@ looped_back: } if (ipv6_addr_is_multicast(addr)) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; @@ -468,17 +468,17 @@ looped_back: ipv6_addr_copy(addr, &ipv6_hdr(skb)->daddr); ipv6_addr_copy(&ipv6_hdr(skb)->daddr, &daddr); - dst_release(xchg(&skb->dst, NULL)); + skb_dst_drop(skb); ip6_route_input(skb); - if (skb->dst->error) { + if (skb_dst(skb)->error) { skb_push(skb, skb->data - skb_network_header(skb)); dst_input(skb); return -1; } - if (skb->dst->dev->flags&IFF_LOOPBACK) { + if (skb_dst(skb)->dev->flags&IFF_LOOPBACK) { if (ipv6_hdr(skb)->hop_limit <= 1) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS); icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0, skb->dev); @@ -494,7 +494,7 @@ looped_back: return -1; unknown_rh: - IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb_network_header(skb)); return -1; @@ -552,11 +552,11 @@ void ipv6_exthdrs_exit(void) **********************************/ /* - * Note: we cannot rely on skb->dst before we assign it in ip6_route_input(). + * Note: we cannot rely on skb_dst(skb) before we assign it in ip6_route_input(). */ static inline struct inet6_dev *ipv6_skb_idev(struct sk_buff *skb) { - return skb->dst ? ip6_dst_idev(skb->dst) : __in6_dev_get(skb->dev); + return skb_dst(skb) ? ip6_dst_idev(skb_dst(skb)) : __in6_dev_get(skb->dev); } /* Router Alert as of RFC 2711 */ @@ -581,7 +581,7 @@ static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff) { const unsigned char *nh = skb_network_header(skb); u32 pkt_len; - struct net *net = dev_net(skb->dst->dev); + struct net *net = dev_net(skb_dst(skb)->dev); if (nh[optoff + 1] != 4 || (optoff & 3) != 2) { LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 3c3732d50c1..cc4797dd832 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -228,7 +228,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) __inet6_csk_dst_store(sk, dst, NULL, NULL); } - skb->dst = dst_clone(dst); + skb_dst_set(skb, dst_clone(dst)); /* Restore final destination back after routing done */ ipv6_addr_copy(&fl.fl6_dst, &np->daddr); diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index bc1a920c34a..c3a07d75b5f 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -48,7 +48,7 @@ inline int ip6_rcv_finish( struct sk_buff *skb) { - if (skb->dst == NULL) + if (skb_dst(skb) == NULL) ip6_route_input(skb); return dst_input(skb); @@ -91,7 +91,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt * arrived via the sending interface (ethX), because of the * nature of scoping architecture. --yoshfuji */ - IP6CB(skb)->iif = skb->dst ? ip6_dst_idev(skb->dst)->dev->ifindex : dev->ifindex; + IP6CB(skb)->iif = skb_dst(skb) ? ip6_dst_idev(skb_dst(skb))->dev->ifindex : dev->ifindex; if (unlikely(!pskb_may_pull(skb, sizeof(*hdr)))) goto err; @@ -161,7 +161,7 @@ static int ip6_input_finish(struct sk_buff *skb) int nexthdr, raw; u8 hash; struct inet6_dev *idev; - struct net *net = dev_net(skb->dst->dev); + struct net *net = dev_net(skb_dst(skb)->dev); /* * Parse extension headers @@ -169,7 +169,7 @@ static int ip6_input_finish(struct sk_buff *skb) rcu_read_lock(); resubmit: - idev = ip6_dst_idev(skb->dst); + idev = ip6_dst_idev(skb_dst(skb)); if (!pskb_pull(skb, skb_transport_offset(skb))) goto discard; nhoff = IP6CB(skb)->nhoff; @@ -242,8 +242,8 @@ int ip6_mc_input(struct sk_buff *skb) struct ipv6hdr *hdr; int deliver; - IP6_UPD_PO_STATS_BH(dev_net(skb->dst->dev), - ip6_dst_idev(skb->dst), IPSTATS_MIB_INMCAST, + IP6_UPD_PO_STATS_BH(dev_net(skb_dst(skb)->dev), + ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INMCAST, skb->len); hdr = ipv6_hdr(skb); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 735a2bf4b5f..c8dc8e5a822 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -78,7 +78,7 @@ int __ip6_local_out(struct sk_buff *skb) len = 0; ipv6_hdr(skb)->payload_len = htons(len); - return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev, + return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev, dst_output); } @@ -96,7 +96,7 @@ EXPORT_SYMBOL_GPL(ip6_local_out); static int ip6_output_finish(struct sk_buff *skb) { - struct dst_entry *dst = skb->dst; + struct dst_entry *dst = skb_dst(skb); if (dst->hh) return neigh_hh_output(dst->hh, skb); @@ -117,7 +117,7 @@ static int ip6_dev_loopback_xmit(struct sk_buff *newskb) __skb_pull(newskb, skb_network_offset(newskb)); newskb->pkt_type = PACKET_LOOPBACK; newskb->ip_summed = CHECKSUM_UNNECESSARY; - WARN_ON(!newskb->dst); + WARN_ON(!skb_dst(newskb)); netif_rx(newskb); return 0; @@ -126,7 +126,7 @@ static int ip6_dev_loopback_xmit(struct sk_buff *newskb) static int ip6_output2(struct sk_buff *skb) { - struct dst_entry *dst = skb->dst; + struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; skb->protocol = htons(ETH_P_IPV6); @@ -134,7 +134,7 @@ static int ip6_output2(struct sk_buff *skb) if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL; - struct inet6_dev *idev = ip6_dst_idev(skb->dst); + struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) && ((mroute6_socket(dev_net(dev)) && @@ -172,21 +172,21 @@ static inline int ip6_skb_dst_mtu(struct sk_buff *skb) struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ? - skb->dst->dev->mtu : dst_mtu(skb->dst); + skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); } int ip6_output(struct sk_buff *skb) { - struct inet6_dev *idev = ip6_dst_idev(skb->dst); + struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); if (unlikely(idev->cnf.disable_ipv6)) { - IP6_INC_STATS(dev_net(skb->dst->dev), idev, + IP6_INC_STATS(dev_net(skb_dst(skb)->dev), idev, IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return 0; } if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || - dst_allfrag(skb->dst)) + dst_allfrag(skb_dst(skb))) return ip6_fragment(skb, ip6_output2); else return ip6_output2(skb); @@ -202,7 +202,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, struct net *net = sock_net(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *first_hop = &fl->fl6_dst; - struct dst_entry *dst = skb->dst; + struct dst_entry *dst = skb_dst(skb); struct ipv6hdr *hdr; u8 proto = fl->proto; int seg_len = skb->len; @@ -222,7 +222,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, if (skb_headroom(skb) < head_room) { struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); if (skb2 == NULL) { - IP6_INC_STATS(net, ip6_dst_idev(skb->dst), + IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return -ENOBUFS; @@ -276,7 +276,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, mtu = dst_mtu(dst); if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { - IP6_UPD_PO_STATS(net, ip6_dst_idev(skb->dst), + IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUT, skb->len); return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev, dst_output); @@ -286,7 +286,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n"); skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); - IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); + IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; } @@ -416,7 +416,7 @@ static inline int ip6_forward_finish(struct sk_buff *skb) int ip6_forward(struct sk_buff *skb) { - struct dst_entry *dst = skb->dst; + struct dst_entry *dst = skb_dst(skb); struct ipv6hdr *hdr = ipv6_hdr(skb); struct inet6_skb_parm *opt = IP6CB(skb); struct net *net = dev_net(dst->dev); @@ -485,7 +485,7 @@ int ip6_forward(struct sk_buff *skb) IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); goto drop; } - dst = skb->dst; + dst = skb_dst(skb); /* IPv6 specs say nothing about it, but it is clear that we cannot send redirects to source routed frames. @@ -566,8 +566,8 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->pkt_type = from->pkt_type; to->priority = from->priority; to->protocol = from->protocol; - dst_release(to->dst); - to->dst = dst_clone(from->dst); + skb_dst_drop(to); + skb_dst_set(to, dst_clone(skb_dst(from))); to->dev = from->dev; to->mark = from->mark; @@ -624,7 +624,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { struct sk_buff *frag; - struct rt6_info *rt = (struct rt6_info*)skb->dst; + struct rt6_info *rt = (struct rt6_info*)skb_dst(skb); struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; struct ipv6hdr *tmp_hdr; struct frag_hdr *fh; @@ -632,7 +632,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) __be32 frag_id = 0; int ptr, offset = 0, err=0; u8 *prevhdr, nexthdr = 0; - struct net *net = dev_net(skb->dst->dev); + struct net *net = dev_net(skb_dst(skb)->dev); hlen = ip6_find_1stfragopt(skb, &prevhdr); nexthdr = *prevhdr; @@ -644,9 +644,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) * check should be redundant, but it's free.) */ if (!skb->local_df) { - skb->dev = skb->dst->dev; + skb->dev = skb_dst(skb)->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); - IP6_INC_STATS(net, ip6_dst_idev(skb->dst), + IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; @@ -696,7 +696,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) *prevhdr = NEXTHDR_FRAGMENT; tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); if (!tmp_hdr) { - IP6_INC_STATS(net, ip6_dst_idev(skb->dst), + IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); return -ENOMEM; } @@ -809,7 +809,7 @@ slow_path: if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) { NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); - IP6_INC_STATS(net, ip6_dst_idev(skb->dst), + IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); err = -ENOMEM; goto fail; @@ -873,16 +873,16 @@ slow_path: if (err) goto fail; - IP6_INC_STATS(net, ip6_dst_idev(skb->dst), + IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGCREATES); } - IP6_INC_STATS(net, ip6_dst_idev(skb->dst), + IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGOKS); kfree_skb(skb); return err; fail: - IP6_INC_STATS(net, ip6_dst_idev(skb->dst), + IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return err; @@ -1516,10 +1516,10 @@ int ip6_push_pending_frames(struct sock *sk) skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - skb->dst = dst_clone(&rt->u.dst); + skb_dst_set(skb, dst_clone(&rt->u.dst)); IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); if (proto == IPPROTO_ICMPV6) { - struct inet6_dev *idev = ip6_dst_idev(skb->dst); + struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type); ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); @@ -1545,8 +1545,8 @@ void ip6_flush_pending_frames(struct sock *sk) struct sk_buff *skb; while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { - if (skb->dst) - IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb->dst), + if (skb_dst(skb)) + IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index af256d47fd3..404d16a97d5 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -532,8 +532,8 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!skb2) return 0; - dst_release(skb2->dst); - skb2->dst = NULL; + skb_dst_drop(skb2); + skb_pull(skb2, offset); skb_reset_network_header(skb2); eiph = ip_hdr(skb2); @@ -560,21 +560,21 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, ip_rt_put(rt); goto out; } - skb2->dst = (struct dst_entry *)rt; + skb_dst_set(skb2, (struct dst_entry *)rt); } else { ip_rt_put(rt); if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) || - skb2->dst->dev->type != ARPHRD_TUNNEL) + skb_dst(skb2)->dev->type != ARPHRD_TUNNEL) goto out; } /* change mtu on this route */ if (rel_type == ICMP_DEST_UNREACH && rel_code == ICMP_FRAG_NEEDED) { - if (rel_info > dst_mtu(skb2->dst)) + if (rel_info > dst_mtu(skb_dst(skb2))) goto out; - skb2->dst->ops->update_pmtu(skb2->dst, rel_info); + skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), rel_info); } icmp_send(skb2, rel_type, rel_code, htonl(rel_info)); @@ -606,8 +606,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!skb2) return 0; - dst_release(skb2->dst); - skb2->dst = NULL; + skb_dst_drop(skb2); skb_pull(skb2, offset); skb_reset_network_header(skb2); @@ -720,8 +719,7 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, skb->pkt_type = PACKET_HOST; memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); skb->dev = t->dev; - dst_release(skb->dst); - skb->dst = NULL; + skb_dst_drop(skb); nf_reset(skb); dscp_ecn_decapsulate(t, ipv6h, skb); @@ -885,8 +883,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, } if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - if (skb->dst) - skb->dst->ops->update_pmtu(skb->dst, mtu); + if (skb_dst(skb)) + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); if (skb->len > mtu) { *pmtu = mtu; err = -EMSGSIZE; @@ -910,8 +908,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, kfree_skb(skb); skb = new_skb; } - dst_release(skb->dst); - skb->dst = dst_clone(dst); + skb_dst_drop(skb); + skb_dst_set(skb, dst_clone(dst)); skb->transport_header = skb->network_header; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 228be551e9c..a35d8fc55b0 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -398,10 +398,9 @@ static int pim6_rcv(struct sk_buff *skb) skb->protocol = htons(ETH_P_IPV6); skb->ip_summed = 0; skb->pkt_type = PACKET_HOST; - dst_release(skb->dst); + skb_dst_drop(skb); reg_dev->stats.rx_bytes += skb->len; reg_dev->stats.rx_packets++; - skb->dst = NULL; nf_reset(skb); netif_rx(skb); dev_put(reg_dev); @@ -849,7 +848,7 @@ static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi, ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr); ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr); - skb->dst = dst_clone(pkt->dst); + skb_dst_set(skb, dst_clone(skb_dst(pkt))); skb->ip_summed = CHECKSUM_UNNECESSARY; } @@ -1487,7 +1486,7 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) static inline int ip6mr_forward2_finish(struct sk_buff *skb) { - IP6_INC_STATS_BH(dev_net(skb->dst->dev), ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTFORWDATAGRAMS); return dst_output(skb); } @@ -1532,8 +1531,8 @@ static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi) if (!dst) goto out_free; - dst_release(skb->dst); - skb->dst = dst; + skb_dst_drop(skb); + skb_dst_set(skb, dst); /* * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally @@ -1722,7 +1721,7 @@ int ip6mr_get_route(struct net *net, { int err; struct mfc6_cache *cache; - struct rt6_info *rt = (struct rt6_info *)skb->dst; + struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); read_lock(&mrt_lock); cache = ip6mr_cache_find(net, &rt->rt6i_src.addr, &rt->rt6i_dst.addr); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 4b48819a5b8..4b264ed40a8 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1448,6 +1448,7 @@ static void mld_sendpack(struct sk_buff *skb) struct net *net = dev_net(skb->dev); int err; struct flowi fl; + struct dst_entry *dst; IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); @@ -1459,9 +1460,9 @@ static void mld_sendpack(struct sk_buff *skb) IPPROTO_ICMPV6, csum_partial(skb_transport_header(skb), mldlen, 0)); - skb->dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr); + dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr); - if (!skb->dst) { + if (!dst) { err = -ENOMEM; goto err_out; } @@ -1470,7 +1471,8 @@ static void mld_sendpack(struct sk_buff *skb) &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); - err = xfrm_lookup(net, &skb->dst, &fl, NULL, 0); + err = xfrm_lookup(net, &dst, &fl, NULL, 0); + skb_dst_set(skb, dst); if (err) goto err_out; @@ -1775,6 +1777,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) IPV6_TLV_ROUTERALERT, 2, 0, 0, IPV6_TLV_PADN, 0 }; struct flowi fl; + struct dst_entry *dst; if (type == ICMPV6_MGM_REDUCTION) snd_addr = &in6addr_linklocal_allrouters; @@ -1828,8 +1831,8 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) idev = in6_dev_get(skb->dev); - skb->dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr); - if (!skb->dst) { + dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr); + if (!dst) { err = -ENOMEM; goto err_out; } @@ -1838,11 +1841,11 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); - err = xfrm_lookup(net, &skb->dst, &fl, NULL, 0); + err = xfrm_lookup(net, &dst, &fl, NULL, 0); if (err) goto err_out; - + skb_dst_set(skb, dst); err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev, dst_output); out: diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 1d13d996498..9eb68e92cc1 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -530,7 +530,7 @@ void ndisc_send_skb(struct sk_buff *skb, return; } - skb->dst = dst; + skb_dst_set(skb, dst); idev = in6_dev_get(dst->dev); IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); @@ -1612,7 +1612,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, len, IPPROTO_ICMPV6, csum_partial(icmph, len, 0)); - buff->dst = dst; + skb_dst_set(buff, dst); idev = in6_dev_get(dst->dev); IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev, diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 834cea69fb5..d5ed92b1434 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -12,7 +12,7 @@ int ip6_route_me_harder(struct sk_buff *skb) { - struct net *net = dev_net(skb->dst->dev); + struct net *net = dev_net(skb_dst(skb)->dev); struct ipv6hdr *iph = ipv6_hdr(skb); struct dst_entry *dst; struct flowi fl = { @@ -28,9 +28,15 @@ int ip6_route_me_harder(struct sk_buff *skb) #ifdef CONFIG_XFRM if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && - xfrm_decode_session(skb, &fl, AF_INET6) == 0) - if (xfrm_lookup(net, &skb->dst, &fl, skb->sk, 0)) + xfrm_decode_session(skb, &fl, AF_INET6) == 0) { + struct dst_entry *dst2 = skb_dst(skb); + + if (xfrm_lookup(net, &dst2, &fl, skb->sk, 0)) { + skb_dst_set(skb, NULL); return -1; + } + skb_dst_set(skb, dst2); + } #endif if (dst->error) { @@ -41,9 +47,9 @@ int ip6_route_me_harder(struct sk_buff *skb) } /* Drop old route. */ - dst_release(skb->dst); + skb_dst_drop(skb); - skb->dst = dst; + skb_dst_set(skb, dst); return 0; } EXPORT_SYMBOL(ip6_route_me_harder); diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 5a2d0a41694..5a7f00cd15c 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -112,7 +112,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) return; } - nskb->dst = dst; + skb_dst_set(nskb, dst); skb_reserve(nskb, hh_len + dst->header_len); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index e99307fba0b..36a090d87a3 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -625,7 +625,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - skb->dst = dst_clone(&rt->u.dst); + skb_dst_set(skb, dst_clone(&rt->u.dst)); skb_put(skb, length); skb_reset_network_header(skb); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index e9ac7a12f59..54a387d31e1 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -267,7 +267,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, struct sk_buff *prev, *next; struct net_device *dev; int offset, end; - struct net *net = dev_net(skb->dst->dev); + struct net *net = dev_net(skb_dst(skb)->dev); if (fq->q.last_in & INET_FRAG_COMPLETE) goto err; @@ -277,7 +277,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1))); if ((unsigned int)end > IPV6_MAXPLEN) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, ((u8 *)&fhdr->frag_off - @@ -310,7 +310,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, /* RFC2460 says always send parameter problem in * this case. -DaveM */ - IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, offsetof(struct ipv6hdr, payload_len)); @@ -434,7 +434,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, return -1; err: - IP6_INC_STATS(net, ip6_dst_idev(skb->dst), + IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS); kfree_skb(skb); return -1; @@ -576,9 +576,9 @@ static int ipv6_frag_rcv(struct sk_buff *skb) struct frag_hdr *fhdr; struct frag_queue *fq; struct ipv6hdr *hdr = ipv6_hdr(skb); - struct net *net = dev_net(skb->dst->dev); + struct net *net = dev_net(skb_dst(skb)->dev); - IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMREQDS); + IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS); /* Jumbo payload inhibits frag. header */ if (hdr->payload_len==0) @@ -595,17 +595,17 @@ static int ipv6_frag_rcv(struct sk_buff *skb) /* It is not a fragmented frame */ skb->transport_header += sizeof(struct frag_hdr); IP6_INC_STATS_BH(net, - ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMOKS); + ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS); IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb); return 1; } if (atomic_read(&net->ipv6.frags.mem) > net->ipv6.frags.high_thresh) - ip6_evictor(net, ip6_dst_idev(skb->dst)); + ip6_evictor(net, ip6_dst_idev(skb_dst(skb))); if ((fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr, - ip6_dst_idev(skb->dst))) != NULL) { + ip6_dst_idev(skb_dst(skb)))) != NULL) { int ret; spin_lock(&fq->q.lock); @@ -617,12 +617,12 @@ static int ipv6_frag_rcv(struct sk_buff *skb) return ret; } - IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMFAILS); + IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS); kfree_skb(skb); return -1; fail_hdr: - IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb)); return -1; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 032a5ec391c..658293ea05b 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -800,7 +800,7 @@ void ip6_route_input(struct sk_buff *skb) if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG) flags |= RT6_LOOKUP_F_IFACE; - skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input); + skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input)); } static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, @@ -911,7 +911,7 @@ static void ip6_link_failure(struct sk_buff *skb) icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev); - rt = (struct rt6_info *) skb->dst; + rt = (struct rt6_info *) skb_dst(skb); if (rt) { if (rt->rt6i_flags&RTF_CACHE) { dst_set_expires(&rt->u.dst, 0); @@ -1868,7 +1868,7 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes) { int type; - struct dst_entry *dst = skb->dst; + struct dst_entry *dst = skb_dst(skb); switch (ipstats_mib_noroutes) { case IPSTATS_MIB_INNOROUTES: type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); @@ -1895,7 +1895,7 @@ static int ip6_pkt_discard(struct sk_buff *skb) static int ip6_pkt_discard_out(struct sk_buff *skb) { - skb->dev = skb->dst->dev; + skb->dev = skb_dst(skb)->dev; return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); } @@ -1908,7 +1908,7 @@ static int ip6_pkt_prohibit(struct sk_buff *skb) static int ip6_pkt_prohibit_out(struct sk_buff *skb) { - skb->dev = skb->dst->dev; + skb->dev = skb_dst(skb)->dev; return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); } @@ -2366,7 +2366,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl); - skb->dst = &rt->u.dst; + skb_dst_set(skb, &rt->u.dst); err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, RTM_NEWROUTE, NETLINK_CB(in_skb).pid, diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index b3a59bd40f0..68e52308e55 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -575,8 +575,7 @@ static int ipip6_rcv(struct sk_buff *skb) tunnel->dev->stats.rx_packets++; tunnel->dev->stats.rx_bytes += skb->len; skb->dev = tunnel->dev; - dst_release(skb->dst); - skb->dst = NULL; + skb_dst_drop(skb); nf_reset(skb); ipip6_ecn_decapsulate(iph, skb); netif_rx(skb); @@ -638,8 +637,8 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (dev->priv_flags & IFF_ISATAP) { struct neighbour *neigh = NULL; - if (skb->dst) - neigh = skb->dst->neighbour; + if (skb_dst(skb)) + neigh = skb_dst(skb)->neighbour; if (neigh == NULL) { if (net_ratelimit()) @@ -663,8 +662,8 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (!dst) { struct neighbour *neigh = NULL; - if (skb->dst) - neigh = skb->dst->neighbour; + if (skb_dst(skb)) + neigh = skb_dst(skb)->neighbour; if (neigh == NULL) { if (net_ratelimit()) @@ -714,7 +713,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (tiph->frag_off) mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); else - mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu; + mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; if (mtu < 68) { stats->collisions++; @@ -723,8 +722,8 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) } if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - if (tunnel->parms.iph.daddr && skb->dst) - skb->dst->ops->update_pmtu(skb->dst, mtu); + if (tunnel->parms.iph.daddr && skb_dst(skb)) + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); if (skb->len > mtu) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); @@ -768,8 +767,8 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) skb_reset_network_header(skb); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); IPCB(skb)->flags = 0; - dst_release(skb->dst); - skb->dst = &rt->u.dst; + skb_dst_drop(skb); + skb_dst_set(skb, &rt->u.dst); /* * Push down and install the IPIP header. diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index ea37741062a..53b6a4192b1 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -981,9 +981,10 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, struct tcphdr *th = tcp_hdr(skb), *t1; struct sk_buff *buff; struct flowi fl; - struct net *net = dev_net(skb->dst->dev); + struct net *net = dev_net(skb_dst(skb)->dev); struct sock *ctl_sk = net->ipv6.tcp_sk; unsigned int tot_len = sizeof(struct tcphdr); + struct dst_entry *dst; __be32 *topt; if (ts) @@ -1052,8 +1053,9 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, * Underlying function will use this to retrieve the network * namespace */ - if (!ip6_dst_lookup(ctl_sk, &buff->dst, &fl)) { - if (xfrm_lookup(net, &buff->dst, &fl, NULL, 0) >= 0) { + if (!ip6_dst_lookup(ctl_sk, &dst, &fl)) { + if (xfrm_lookup(net, &dst, &fl, NULL, 0) >= 0) { + skb_dst_set(buff, dst); ip6_xmit(ctl_sk, buff, &fl, NULL, 0); TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); if (rst) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 8905712cfbb..fc333d85472 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -177,10 +177,9 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, if (unlikely(sk = skb_steal_sock(skb))) return sk; - else - return __udp6_lib_lookup(dev_net(skb->dst->dev), &iph->saddr, sport, - &iph->daddr, dport, inet6_iif(skb), - udptable); + return __udp6_lib_lookup(dev_net(skb_dst(skb)->dev), &iph->saddr, sport, + &iph->daddr, dport, inet6_iif(skb), + udptable); } /* diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index e20529b4c82..3927832227b 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -31,7 +31,7 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb) */ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) { - struct dst_entry *dst = skb->dst; + struct dst_entry *dst = skb_dst(skb); struct ipv6hdr *top_iph; int dsfield; @@ -45,7 +45,7 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) memcpy(top_iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl, sizeof(top_iph->flow_lbl)); - top_iph->nexthdr = xfrm_af2proto(skb->dst->ops->family); + top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family); dsfield = XFRM_MODE_SKB_CB(skb)->tos; dsfield = INET_ECN_encapsulate(dsfield, dsfield); diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 5ee5a031bc9..c4f4eef032a 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -30,7 +30,7 @@ EXPORT_SYMBOL(xfrm6_find_1stfragopt); static int xfrm6_tunnel_check_size(struct sk_buff *skb) { int mtu, ret = 0; - struct dst_entry *dst = skb->dst; + struct dst_entry *dst = skb_dst(skb); mtu = dst_mtu(dst); if (mtu < IPV6_MIN_MTU) @@ -90,6 +90,6 @@ static int xfrm6_output_finish(struct sk_buff *skb) int xfrm6_output(struct sk_buff *skb) { - return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dst->dev, + return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb_dst(skb)->dev, xfrm6_output_finish); } diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 425ab144f15..5874657af7f 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -260,8 +260,8 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ip_send_check(ip_hdr(skb)); /* drop old route */ - dst_release(skb->dst); - skb->dst = &rt->u.dst; + skb_dst_drop(skb); + skb_dst_set(skb, &rt->u.dst); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; @@ -324,8 +324,8 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, } /* drop old route */ - dst_release(skb->dst); - skb->dst = &rt->u.dst; + skb_dst_drop(skb); + skb_dst_set(skb, &rt->u.dst); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; @@ -388,8 +388,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_put; /* drop old route */ - dst_release(skb->dst); - skb->dst = &rt->u.dst; + skb_dst_drop(skb); + skb_dst_set(skb, &rt->u.dst); /* mangle the packet */ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) @@ -465,8 +465,8 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_put; /* drop old route */ - dst_release(skb->dst); - skb->dst = &rt->u.dst; + skb_dst_drop(skb); + skb_dst_set(skb, &rt->u.dst); /* mangle the packet */ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) @@ -553,8 +553,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG_RL("ip_vs_tunnel_xmit(): mtu less than 68\n"); goto tx_error; } - if (skb->dst) - skb->dst->ops->update_pmtu(skb->dst, mtu); + if (skb_dst(skb)) + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); df |= (old_iph->frag_off & htons(IP_DF)); @@ -596,8 +596,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); /* drop old route */ - dst_release(skb->dst); - skb->dst = &rt->u.dst; + skb_dst_drop(skb); + skb_dst_set(skb, &rt->u.dst); /* * Push down and install the IPIP header. @@ -665,8 +665,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): mtu less than 1280\n"); goto tx_error; } - if (skb->dst) - skb->dst->ops->update_pmtu(skb->dst, mtu); + if (skb_dst(skb)) + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); @@ -702,8 +702,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); /* drop old route */ - dst_release(skb->dst); - skb->dst = &rt->u.dst; + skb_dst_drop(skb); + skb_dst_set(skb, &rt->u.dst); /* * Push down and install the IPIP header. @@ -775,8 +775,8 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ip_send_check(ip_hdr(skb)); /* drop old route */ - dst_release(skb->dst); - skb->dst = &rt->u.dst; + skb_dst_drop(skb); + skb_dst_set(skb, &rt->u.dst); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; @@ -828,8 +828,8 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, } /* drop old route */ - dst_release(skb->dst); - skb->dst = &rt->u.dst; + skb_dst_drop(skb); + skb_dst_set(skb, &rt->u.dst); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; @@ -900,8 +900,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_put; /* drop the old route when skb is not shared */ - dst_release(skb->dst); - skb->dst = &rt->u.dst; + skb_dst_drop(skb); + skb_dst_set(skb, &rt->u.dst); ip_vs_nat_icmp(skb, pp, cp, 0); @@ -975,8 +975,8 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_put; /* drop the old route when skb is not shared */ - dst_release(skb->dst); - skb->dst = &rt->u.dst; + skb_dst_drop(skb); + skb_dst_set(skb, &rt->u.dst); ip_vs_nat_icmp_v6(skb, pp, cp, 0); diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 117b80112fc..a6d6ec320fb 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -176,7 +176,7 @@ static bool gre_invert_tuple(struct nf_conntrack_tuple *tuple, static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct nf_conntrack_tuple *tuple) { - struct net *net = dev_net(skb->dev ? skb->dev : skb->dst->dev); + struct net *net = dev_net(skb->dev ? skb->dev : skb_dst(skb)->dev); const struct gre_hdr_pptp *pgrehdr; struct gre_hdr_pptp _pgrehdr; __be16 srckey; diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 4f3b1f80879..eda64c1cb1e 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -73,11 +73,11 @@ tcpmss_mangle_packet(struct sk_buff *skb, } if (info->mss == XT_TCPMSS_CLAMP_PMTU) { - if (dst_mtu(skb->dst) <= minlen) { + if (dst_mtu(skb_dst(skb)) <= minlen) { if (net_ratelimit()) printk(KERN_ERR "xt_TCPMSS: " "unknown or invalid path-MTU (%u)\n", - dst_mtu(skb->dst)); + dst_mtu(skb_dst(skb))); return -1; } if (in_mtu <= minlen) { @@ -86,7 +86,7 @@ tcpmss_mangle_packet(struct sk_buff *skb, "invalid path-MTU (%u)\n", in_mtu); return -1; } - newmss = min(dst_mtu(skb->dst), in_mtu) - minlen; + newmss = min(dst_mtu(skb_dst(skb)), in_mtu) - minlen; } else newmss = info->mss; diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c index 328bd20ddd2..4cbfebda8fa 100644 --- a/net/netfilter/xt_policy.c +++ b/net/netfilter/xt_policy.c @@ -86,7 +86,7 @@ match_policy_out(const struct sk_buff *skb, const struct xt_policy_info *info, unsigned short family) { const struct xt_policy_elem *e; - const struct dst_entry *dst = skb->dst; + const struct dst_entry *dst = skb_dst(skb); int strict = info->flags & XT_POLICY_MATCH_STRICT; int i, pos; diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c index 67419287bc7..484d1689bfd 100644 --- a/net/netfilter/xt_realm.c +++ b/net/netfilter/xt_realm.c @@ -25,7 +25,7 @@ static bool realm_mt(const struct sk_buff *skb, const struct xt_match_param *par) { const struct xt_realm_info *info = par->matchinfo; - const struct dst_entry *dst = skb->dst; + const struct dst_entry *dst = skb_dst(skb); return (info->id == (dst->tclassid & info->mask)) ^ info->invert; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 6da9f38ef5c..4f76e5552d8 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -372,8 +372,7 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct goto oom; /* drop any routing info */ - dst_release(skb->dst); - skb->dst = NULL; + skb_dst_drop(skb); /* drop conntrack reference */ nf_reset(skb); @@ -621,8 +620,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet skb_set_owner_r(skb, sk); skb->dev = NULL; - dst_release(skb->dst); - skb->dst = NULL; + skb_dst_drop(skb); /* drop conntrack reference */ nf_reset(skb); diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 0ef4e3065bc..9402a7fd378 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -84,7 +84,7 @@ static u32 flow_get_dst(const struct sk_buff *skb) case htons(ETH_P_IPV6): return ntohl(ipv6_hdr(skb)->daddr.s6_addr32[3]); default: - return addr_fold(skb->dst) ^ (__force u16)skb->protocol; + return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol; } } @@ -163,7 +163,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb) break; } default: - res = addr_fold(skb->dst) ^ (__force u16)skb->protocol; + res = addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol; } return res; @@ -251,8 +251,8 @@ fallback: static u32 flow_get_rtclassid(const struct sk_buff *skb) { #ifdef CONFIG_NET_CLS_ROUTE - if (skb->dst) - return skb->dst->tclassid; + if (skb_dst(skb)) + return skb_dst(skb)->tclassid; #endif return 0; } diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index bdf1f4172ee..dd872d5383e 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -137,7 +137,7 @@ static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp, u32 id, h; int iif, dont_cache = 0; - if ((dst = skb->dst) == NULL) + if ((dst = skb_dst(skb)) == NULL) goto failure; id = dst->tclassid; diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index b6b588bed4e..266151ae85a 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -246,11 +246,11 @@ META_COLLECTOR(int_tcindex) META_COLLECTOR(int_rtclassid) { - if (unlikely(skb->dst == NULL)) + if (unlikely(skb_dst(skb) == NULL)) *err = -1; else #ifdef CONFIG_NET_CLS_ROUTE - dst->value = skb->dst->tclassid; + dst->value = skb_dst(skb)->tclassid; #else dst->value = 0; #endif diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 33133d27b53..8706920a6d4 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -149,7 +149,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) break; } default: - h = (unsigned long)skb->dst ^ skb->protocol; + h = (unsigned long)skb_dst(skb) ^ skb->protocol; h2 = (unsigned long)skb->sk; } diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index a886496bdc3..cb1cb1e76b9 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -222,7 +222,7 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device * { struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0); struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc); - struct neighbour *mn = skb->dst->neighbour; + struct neighbour *mn = skb_dst(skb)->neighbour; struct neighbour *n = q->ncache; if (mn->tbl == NULL) @@ -262,8 +262,8 @@ static inline int teql_resolve(struct sk_buff *skb, return -ENODEV; if (dev->header_ops == NULL || - skb->dst == NULL || - skb->dst->neighbour == NULL) + skb_dst(skb) == NULL || + skb_dst(skb)->neighbour == NULL) return 0; return __teql_resolve(skb, skb_res, dev); } diff --git a/net/sctp/output.c b/net/sctp/output.c index f0c91df59d4..b7641144451 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -405,10 +405,10 @@ int sctp_packet_transmit(struct sctp_packet *packet) sctp_assoc_sync_pmtu(asoc); } } - nskb->dst = dst_clone(tp->dst); - if (!nskb->dst) + dst = dst_clone(tp->dst); + skb_dst_set(nskb, dst); + if (dst) goto no_route; - dst = nskb->dst; /* Build the SCTP header. */ sh = (struct sctphdr *)skb_push(nskb, sizeof(struct sctphdr)); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index e1859614601..6c2d6158655 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -918,7 +918,7 @@ static void xs_udp_data_ready(struct sock *sk, int len) UDPX_INC_STATS_BH(sk, UDP_MIB_INDATAGRAMS); /* Something worked... */ - dst_confirm(skb->dst); + dst_confirm(skb_dst(skb)); xprt_adjust_cwnd(task, copied); xprt_update_rtt(task); diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index b4a13178fb4..e0009c17d80 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -251,8 +251,7 @@ resume: nf_reset(skb); if (decaps) { - dst_release(skb->dst); - skb->dst = NULL; + skb_dst_drop(skb); netif_rx(skb); return 0; } else { diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index c235597ba8d..b9fe13138c0 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -22,7 +22,7 @@ static int xfrm_output2(struct sk_buff *skb); static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb) { - struct dst_entry *dst = skb->dst; + struct dst_entry *dst = skb_dst(skb); int nhead = dst->header_len + LL_RESERVED_SPACE(dst->dev) - skb_headroom(skb); int ntail = dst->dev->needed_tailroom - skb_tailroom(skb); @@ -39,7 +39,7 @@ static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb) static int xfrm_output_one(struct sk_buff *skb, int err) { - struct dst_entry *dst = skb->dst; + struct dst_entry *dst = skb_dst(skb); struct xfrm_state *x = dst->xfrm; struct net *net = xs_net(x); @@ -94,12 +94,13 @@ resume: goto error_nolock; } - if (!(skb->dst = dst_pop(dst))) { + dst = dst_pop(dst); + if (!dst) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); err = -EHOSTUNREACH; goto error_nolock; } - dst = skb->dst; + skb_dst_set(skb, dst); x = dst->xfrm; } while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL)); @@ -119,16 +120,16 @@ int xfrm_output_resume(struct sk_buff *skb, int err) while (likely((err = xfrm_output_one(skb, err)) == 0)) { nf_reset(skb); - err = skb->dst->ops->local_out(skb); + err = skb_dst(skb)->ops->local_out(skb); if (unlikely(err != 1)) goto out; - if (!skb->dst->xfrm) + if (!skb_dst(skb)->xfrm) return dst_output(skb); - err = nf_hook(skb->dst->ops->family, + err = nf_hook(skb_dst(skb)->ops->family, NF_INET_POST_ROUTING, skb, - NULL, skb->dst->dev, xfrm_output2); + NULL, skb_dst(skb)->dev, xfrm_output2); if (unlikely(err != 1)) goto out; } @@ -179,7 +180,7 @@ static int xfrm_output_gso(struct sk_buff *skb) int xfrm_output(struct sk_buff *skb) { - struct net *net = dev_net(skb->dst->dev); + struct net *net = dev_net(skb_dst(skb)->dev); int err; if (skb_is_gso(skb)) @@ -202,7 +203,7 @@ int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) struct xfrm_mode *inner_mode; if (x->sel.family == AF_UNSPEC) inner_mode = xfrm_ip2inner_mode(x, - xfrm_af2proto(skb->dst->ops->family)); + xfrm_af2proto(skb_dst(skb)->ops->family)); else inner_mode = x->inner_mode; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 9c068ab3a83..cb81ca35b0d 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2027,6 +2027,8 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) { struct net *net = dev_net(skb->dev); struct flowi fl; + struct dst_entry *dst; + int res; if (xfrm_decode_session(skb, &fl, family) < 0) { /* XXX: we should have something like FWDHDRERROR here. */ @@ -2034,7 +2036,11 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) return 0; } - return xfrm_lookup(net, &skb->dst, &fl, NULL, 0) == 0; + dst = skb_dst(skb); + + res = xfrm_lookup(net, &dst, &fl, NULL, 0) == 0; + skb_dst_set(skb, dst); + return res; } EXPORT_SYMBOL(__xfrm_route_forward); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 2fcad7c33ea..4bfc6153ad4 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -4503,7 +4503,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex, * when the packet is on it's final way out. * NOTE: there appear to be some IPv6 multicast cases where skb->dst * is NULL, in this case go ahead and apply access control. */ - if (skb->dst != NULL && skb->dst->xfrm != NULL) + if (skb_dst(skb) != NULL && skb_dst(skb)->xfrm != NULL) return NF_ACCEPT; #endif secmark_active = selinux_secmark_enabled(); diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index c0eb72013d6..72b18452e1a 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -447,7 +447,7 @@ int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb, struct dst_entry *dst; int rc = 0; - dst = skb->dst; + dst = skb_dst(skb); if (dst) { struct dst_entry *dst_test; -- cgit v1.2.3-70-g09d2 From e5b9215ef9a274eb9fb65f6aa4602ad82d10a6cb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 2 Jun 2009 05:20:21 +0000 Subject: net: skb cleanup Can remove anonymous union now it has one field. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9ef6eb20247..7305da92be8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -322,9 +322,7 @@ struct sk_buff { ktime_t tstamp; struct net_device *dev; - union { - unsigned long _skb_dst; - }; + unsigned long _skb_dst; #ifdef CONFIG_XFRM struct sec_path *sp; #endif -- cgit v1.2.3-70-g09d2 From a84db7949eab7a42e715192f62c55c554e195e54 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 7 Apr 2009 15:41:39 +0800 Subject: sctp: fix error cause codes of ADD-IP extension RFC5061 had changed the error cause codes for Dynamic Address Reconfiguration As the following: Cause Code Value Cause Code --------- ---------------- 0x00A0 Request to Delete Last Remaining IP Address 0x00A1 Operation Refused Due to Resource Shortage 0x00A2 Request to Delete Source IP Address 0x00A3 Association Aborted Due to Illegal ASCONF-ACK 0x00A4 Request Refused - No Authorization This patch fix the error cause codes. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich --- include/linux/sctp.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index c2731bfe04d..b464b9d3d24 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -487,17 +487,17 @@ typedef enum { * * Value Cause Code * --------- ---------------- - * 0x0100 Request to Delete Last Remaining IP Address. - * 0x0101 Operation Refused Due to Resource Shortage. - * 0x0102 Request to Delete Source IP Address. - * 0x0103 Association Aborted due to illegal ASCONF-ACK - * 0x0104 Request refused - no authorization. + * 0x00A0 Request to Delete Last Remaining IP Address. + * 0x00A1 Operation Refused Due to Resource Shortage. + * 0x00A2 Request to Delete Source IP Address. + * 0x00A3 Association Aborted due to illegal ASCONF-ACK + * 0x00A4 Request refused - no authorization. */ - SCTP_ERROR_DEL_LAST_IP = cpu_to_be16(0x0100), - SCTP_ERROR_RSRC_LOW = cpu_to_be16(0x0101), - SCTP_ERROR_DEL_SRC_IP = cpu_to_be16(0x0102), - SCTP_ERROR_ASCONF_ACK = cpu_to_be16(0x0103), - SCTP_ERROR_REQ_REFUSED = cpu_to_be16(0x0104), + SCTP_ERROR_DEL_LAST_IP = cpu_to_be16(0x00A0), + SCTP_ERROR_RSRC_LOW = cpu_to_be16(0x00A1), + SCTP_ERROR_DEL_SRC_IP = cpu_to_be16(0x00A2), + SCTP_ERROR_ASCONF_ACK = cpu_to_be16(0x00A3), + SCTP_ERROR_REQ_REFUSED = cpu_to_be16(0x00A4), /* AUTH Section 4. New Error Cause * -- cgit v1.2.3-70-g09d2 From 9919b455fc00c995ef8141848bdc0709ce50bf36 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 12 May 2009 21:52:51 +0800 Subject: sctp: fix to choose alternate destination when retransmit ASCONF chunk RFC 5061 Section 5.1 ASCONF Chunk Procedures said: B4) Re-transmit the ASCONF Chunk last sent and if possible choose an alternate destination address (please refer to [RFC4960], Section 6.4.1). An endpoint MUST NOT add new parameters to this chunk; it MUST be the same (including its Sequence Number) as the last ASCONF sent. An endpoint MAY, however, bundle an additional ASCONF with new ASCONF parameters with the next Sequence Number. For details, see Section 5.5. This patch fix to choose an alternate destination address when re-transmit the ASCONF chunk, with some dup codes cleanup. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich --- include/net/sctp/structs.h | 6 ++---- net/sctp/associola.c | 42 +++++++----------------------------------- net/sctp/sm_sideeffect.c | 8 +++++--- 3 files changed, 14 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 23f08fe1d50..edfcacf3250 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1939,10 +1939,8 @@ void sctp_association_free(struct sctp_association *); void sctp_association_put(struct sctp_association *); void sctp_association_hold(struct sctp_association *); -struct sctp_transport *sctp_assoc_choose_init_transport( - struct sctp_association *); -struct sctp_transport *sctp_assoc_choose_shutdown_transport( - struct sctp_association *); +struct sctp_transport *sctp_assoc_choose_alter_transport( + struct sctp_association *, struct sctp_transport *); void sctp_assoc_update_retran_path(struct sctp_association *); struct sctp_transport *sctp_assoc_lookup_paddr(const struct sctp_association *, const union sctp_addr *); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 8d3aef9d061..39f5166ae7a 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1284,49 +1284,21 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc) ntohs(t->ipaddr.v4.sin_port)); } -/* Choose the transport for sending a INIT packet. */ -struct sctp_transport *sctp_assoc_choose_init_transport( - struct sctp_association *asoc) +/* Choose the transport for sending retransmit packet. */ +struct sctp_transport *sctp_assoc_choose_alter_transport( + struct sctp_association *asoc, struct sctp_transport *last_sent_to) { - struct sctp_transport *t; - - /* Use the retran path. If the last INIT was sent over the + /* If this is the first time packet is sent, use the active path, + * else use the retran path. If the last packet was sent over the * retran path, update the retran path and use it. */ - if (!asoc->init_last_sent_to) { - t = asoc->peer.active_path; - } else { - if (asoc->init_last_sent_to == asoc->peer.retran_path) - sctp_assoc_update_retran_path(asoc); - t = asoc->peer.retran_path; - } - - SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_update_retran_path:association" - " %p addr: ", - " port: %d\n", - asoc, - (&t->ipaddr), - ntohs(t->ipaddr.v4.sin_port)); - - return t; -} - -/* Choose the transport for sending a SHUTDOWN packet. */ -struct sctp_transport *sctp_assoc_choose_shutdown_transport( - struct sctp_association *asoc) -{ - /* If this is the first time SHUTDOWN is sent, use the active path, - * else use the retran path. If the last SHUTDOWN was sent over the - * retran path, update the retran path and use it. - */ - if (!asoc->shutdown_last_sent_to) + if (!last_sent_to) return asoc->peer.active_path; else { - if (asoc->shutdown_last_sent_to == asoc->peer.retran_path) + if (last_sent_to == asoc->peer.retran_path) sctp_assoc_update_retran_path(asoc); return asoc->peer.retran_path; } - } /* Update the association's pmtu and frag_point by going through all the diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index e2020eb2c8c..86426aac160 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -686,7 +686,8 @@ static void sctp_cmd_setup_t2(sctp_cmd_seq_t *cmds, { struct sctp_transport *t; - t = sctp_assoc_choose_shutdown_transport(asoc); + t = sctp_assoc_choose_alter_transport(asoc, + asoc->shutdown_last_sent_to); asoc->shutdown_last_sent_to = t; asoc->timeouts[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN] = t->rto; chunk->transport = t; @@ -777,7 +778,7 @@ static void sctp_cmd_setup_t4(sctp_cmd_seq_t *cmds, { struct sctp_transport *t; - t = asoc->peer.active_path; + t = sctp_assoc_choose_alter_transport(asoc, chunk->transport); asoc->timeouts[SCTP_EVENT_TIMEOUT_T4_RTO] = t->rto; chunk->transport = t; } @@ -1379,7 +1380,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_INIT_CHOOSE_TRANSPORT: chunk = cmd->obj.ptr; - t = sctp_assoc_choose_init_transport(asoc); + t = sctp_assoc_choose_alter_transport(asoc, + asoc->init_last_sent_to); asoc->init_last_sent_to = t; chunk->transport = t; t->init_sent_count++; -- cgit v1.2.3-70-g09d2 From c6ba68a26645dbc5029a9faa5687ebe6fcfc53e4 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 1 Jun 2009 12:41:15 -0400 Subject: sctp: support non-blocking version of the new sctp_connectx() API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prior implementation of the new sctp_connectx() call that returns an association ID did not work correctly on non-blocking socket. This is because we could not return both a EINPROGRESS error and an association id. This is a new implementation that supports this. Originally from Ivan Skytte Jørgensen --- include/net/sctp/user.h | 2 ++ net/sctp/associola.c | 4 ++++ net/sctp/socket.c | 42 +++++++++++++++++++++++++++++++++++++++++- 3 files changed, 47 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h index b259fc5798f..1580c04f68b 100644 --- a/include/net/sctp/user.h +++ b/include/net/sctp/user.h @@ -147,6 +147,8 @@ enum sctp_optname { #define SCTP_GET_LOCAL_ADDRS SCTP_GET_LOCAL_ADDRS SCTP_SOCKOPT_CONNECTX, /* CONNECTX requests. */ #define SCTP_SOCKOPT_CONNECTX SCTP_SOCKOPT_CONNECTX + SCTP_SOCKOPT_CONNECTX3, /* CONNECTX requests. (new implementation) */ +#define SCTP_SOCKOPT_CONNECTX3 SCTP_SOCKOPT_CONNECTX3 }; /* diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 39f5166ae7a..525864bf4f0 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1470,6 +1470,10 @@ int sctp_assoc_set_id(struct sctp_association *asoc, gfp_t gfp) { int assoc_id; int error = 0; + + /* If the id is already assigned, keep it. */ + if (asoc->assoc_id) + return error; retry: if (unlikely(!idr_pre_get(&sctp_assocs_id, gfp))) return -ENOMEM; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 5fb3a8c9792..7c3dfd2d948 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1100,6 +1100,15 @@ static int __sctp_connect(struct sock* sk, goto out_free; } + /* In case the user of sctp_connectx() wants an association + * id back, assign one now. + */ + if (assoc_id) { + err = sctp_assoc_set_id(asoc, GFP_KERNEL); + if (err < 0) + goto out_free; + } + err = sctp_primitive_ASSOCIATE(asoc, NULL); if (err < 0) { goto out_free; @@ -1120,7 +1129,7 @@ static int __sctp_connect(struct sock* sk, timeo = sock_sndtimeo(sk, f_flags & O_NONBLOCK); err = sctp_wait_for_connect(asoc, &timeo); - if (!err && assoc_id) + if ((err == 0 || err == -EINPROGRESS) && assoc_id) *assoc_id = asoc->assoc_id; /* Don't free association on exit. */ @@ -1264,6 +1273,34 @@ SCTP_STATIC int sctp_setsockopt_connectx(struct sock* sk, return assoc_id; } +/* + * New (hopefully final) interface for the API. The option buffer is used + * both for the returned association id and the addresses. + */ +SCTP_STATIC int sctp_getsockopt_connectx3(struct sock* sk, int len, + char __user *optval, + int __user *optlen) +{ + sctp_assoc_t assoc_id = 0; + int err = 0; + + if (len < sizeof(assoc_id)) + return -EINVAL; + + err = __sctp_setsockopt_connectx(sk, + (struct sockaddr __user *)(optval + sizeof(assoc_id)), + len - sizeof(assoc_id), &assoc_id); + + if (err == 0 || err == -EINPROGRESS) { + if (copy_to_user(optval, &assoc_id, sizeof(assoc_id))) + return -EFAULT; + if (put_user(sizeof(assoc_id), optlen)) + return -EFAULT; + } + + return err; +} + /* API 3.1.4 close() - UDP Style Syntax * Applications use close() to perform graceful shutdown (as described in * Section 10.1 of [SCTP]) on ALL the associations currently represented @@ -5578,6 +5615,9 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, retval = sctp_getsockopt_local_addrs(sk, len, optval, optlen); break; + case SCTP_SOCKOPT_CONNECTX3: + retval = sctp_getsockopt_connectx3(sk, len, optval, optlen); + break; case SCTP_DEFAULT_SEND_PARAM: retval = sctp_getsockopt_default_send_param(sk, len, optval, optlen); -- cgit v1.2.3-70-g09d2 From 56d8bd3f0b98972312cad683947ec90b21011199 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 3 Jun 2009 14:52:03 +0100 Subject: tracing: fix multiple use of __print_flags and __print_symbolic Here is an updated patch to include the extra call to trace_seq_init() as requested. This is vs. the latest -tip tree and fixes the use of multiple __print_flags and __print_symbolic in a single tracer. Also tested to ensure its working now: mount.gfs2-2534 [000] 235.850587: gfs2_glock_queue: 8.7 glock 1:2 dequeue PR mount.gfs2-2534 [000] 235.850591: gfs2_demote_rq: 8.7 glock 1:0 demote EX to NL flags:DI mount.gfs2-2534 [000] 235.850591: gfs2_glock_queue: 8.7 glock 1:0 dequeue EX glock_workqueue-2529 [000] 235.850666: gfs2_glock_state_change: 8.7 glock 1:0 state EX => NL tgt:NL dmt:NL flags:lDpI glock_workqueue-2529 [000] 235.850672: gfs2_glock_put: 8.7 glock 1:0 state NL => IV flags:I Signed-off-by: Steven Whitehouse LKML-Reference: <1244037123.29604.603.camel@localhost.localdomain> Signed-off-by: Steven Rostedt --- include/trace/ftrace.h | 2 ++ kernel/trace/trace_output.c | 10 ++++------ 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index b5478dab579..40ede4db4d8 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -104,6 +104,7 @@ * field = (typeof(field))entry; * * p = get_cpu_var(ftrace_event_seq); + * trace_seq_init(p); * ret = trace_seq_printf(s, "\n"); * put_cpu(); * if (!ret) @@ -167,6 +168,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ field = (typeof(field))entry; \ \ p = &get_cpu_var(ftrace_event_seq); \ + trace_seq_init(p); \ ret = trace_seq_printf(s, #call ": " print); \ put_cpu(); \ if (!ret) \ diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 8dadbbbd2d5..8afeea412e7 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -223,10 +223,9 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim, { unsigned long mask; const char *str; + const char *ret = p->buffer + p->len; int i; - trace_seq_init(p); - for (i = 0; flag_array[i].name && flags; i++) { mask = flag_array[i].mask; @@ -249,7 +248,7 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim, trace_seq_putc(p, 0); - return p->buffer; + return ret; } EXPORT_SYMBOL(ftrace_print_flags_seq); @@ -258,8 +257,7 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, const struct trace_print_flags *symbol_array) { int i; - - trace_seq_init(p); + const char *ret = p->buffer + p->len; for (i = 0; symbol_array[i].name; i++) { @@ -275,7 +273,7 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, trace_seq_putc(p, 0); - return p->buffer; + return ret; } EXPORT_SYMBOL(ftrace_print_symbols_seq); -- cgit v1.2.3-70-g09d2 From e535c7566e1318ccfa015e297f0309994f7bc078 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 23 May 2009 11:18:45 +0200 Subject: mac80211: deprecate conf.beacon_int properly Ivo has updated the driver to no longer use the change flag, so we can remove that, but rt2x00 and ath5k still use the actual value so let's mark it as deprecated too. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 14 ++------------ net/mac80211/main.c | 12 ++---------- 2 files changed, 4 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index d72346ff324..0270aa6e08f 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -527,7 +527,6 @@ enum ieee80211_conf_flags { * enum ieee80211_conf_changed - denotes which configuration changed * * @IEEE80211_CONF_CHANGE_RADIO_ENABLED: the value of radio_enabled changed - * @_IEEE80211_CONF_CHANGE_BEACON_INTERVAL: DEPRECATED * @IEEE80211_CONF_CHANGE_LISTEN_INTERVAL: the listen interval changed * @IEEE80211_CONF_CHANGE_RADIOTAP: the radiotap flag changed * @IEEE80211_CONF_CHANGE_PS: the PS flag or dynamic PS timeout changed @@ -538,7 +537,6 @@ enum ieee80211_conf_flags { */ enum ieee80211_conf_changed { IEEE80211_CONF_CHANGE_RADIO_ENABLED = BIT(0), - _IEEE80211_CONF_CHANGE_BEACON_INTERVAL = BIT(1), IEEE80211_CONF_CHANGE_LISTEN_INTERVAL = BIT(2), IEEE80211_CONF_CHANGE_RADIOTAP = BIT(3), IEEE80211_CONF_CHANGE_PS = BIT(4), @@ -548,14 +546,6 @@ enum ieee80211_conf_changed { IEEE80211_CONF_CHANGE_IDLE = BIT(8), }; -static inline __deprecated enum ieee80211_conf_changed -__IEEE80211_CONF_CHANGE_BEACON_INTERVAL(void) -{ - return _IEEE80211_CONF_CHANGE_BEACON_INTERVAL; -} -#define IEEE80211_CONF_CHANGE_BEACON_INTERVAL \ - __IEEE80211_CONF_CHANGE_BEACON_INTERVAL() - /** * struct ieee80211_conf - configuration of the device * @@ -564,7 +554,7 @@ __IEEE80211_CONF_CHANGE_BEACON_INTERVAL(void) * @flags: configuration flags defined above * * @radio_enabled: when zero, driver is required to switch off the radio. - * @beacon_int: beacon interval (TODO make interface config) + * @beacon_int: DEPRECATED, DO NOT USE * * @listen_interval: listen interval in units of beacon interval * @max_sleep_period: the maximum number of beacon intervals to sleep for @@ -589,7 +579,7 @@ __IEEE80211_CONF_CHANGE_BEACON_INTERVAL(void) * number of transmissions not the number of retries */ struct ieee80211_conf { - int beacon_int; + int __deprecated beacon_int; u32 flags; int power_level, dynamic_ps_timeout; int max_sleep_period; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index e37770ced53..2683df91807 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -289,16 +289,8 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, drv_bss_info_changed(local, &sdata->vif, &sdata->vif.bss_conf, changed); - /* - * DEPRECATED - * - * ~changed is just there to not do this at resume time - */ - if (changed & BSS_CHANGED_BEACON_INT && ~changed) { - local->hw.conf.beacon_int = sdata->vif.bss_conf.beacon_int; - ieee80211_hw_config(local, - _IEEE80211_CONF_CHANGE_BEACON_INTERVAL); - } + /* DEPRECATED */ + local->hw.conf.beacon_int = sdata->vif.bss_conf.beacon_int; } u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata) -- cgit v1.2.3-70-g09d2 From 8fc0fee09245fe740d81aded36f4af6db1054015 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 24 May 2009 16:57:19 +0200 Subject: cfg80211: use key size constants Instead of hardcoding the key length for validation, use the constants Zhu Yi recently added and add one for AES_CMAC too. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 1 + net/wireless/util.c | 11 +++++------ 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 34de8b21f6d..a9173d5434d 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1092,6 +1092,7 @@ enum ieee80211_key_len { WLAN_KEY_LEN_WEP104 = 13, WLAN_KEY_LEN_CCMP = 16, WLAN_KEY_LEN_TKIP = 32, + WLAN_KEY_LEN_AES_CMAC = 16, }; /* diff --git a/net/wireless/util.c b/net/wireless/util.c index d072bff463a..5be9ed7ae1e 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -157,26 +157,25 @@ int cfg80211_validate_key_settings(struct key_params *params, int key_idx, params->cipher != WLAN_CIPHER_SUITE_WEP104) return -EINVAL; - /* TODO: add definitions for the lengths to linux/ieee80211.h */ switch (params->cipher) { case WLAN_CIPHER_SUITE_WEP40: - if (params->key_len != 5) + if (params->key_len != WLAN_KEY_LEN_WEP40) return -EINVAL; break; case WLAN_CIPHER_SUITE_TKIP: - if (params->key_len != 32) + if (params->key_len != WLAN_KEY_LEN_TKIP) return -EINVAL; break; case WLAN_CIPHER_SUITE_CCMP: - if (params->key_len != 16) + if (params->key_len != WLAN_KEY_LEN_CCMP) return -EINVAL; break; case WLAN_CIPHER_SUITE_WEP104: - if (params->key_len != 13) + if (params->key_len != WLAN_KEY_LEN_WEP104) return -EINVAL; break; case WLAN_CIPHER_SUITE_AES_CMAC: - if (params->key_len != 16) + if (params->key_len != WLAN_KEY_LEN_AES_CMAC) return -EINVAL; break; default: -- cgit v1.2.3-70-g09d2 From 3b8bcfd5d31ea0fec58681d035544ace707d2536 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 30 May 2009 01:39:53 +0200 Subject: net: introduce pre-up netdev notifier NETDEV_UP is called after the device is set UP, but sometimes it is useful to be able to veto the device UP. Introduce a new NETDEV_PRE_UP notifier that can be used for exactly this. The first use case will be cfg80211 denying interfaces to be set UP if the device is known to be rfkill'ed. Signed-off-by: Johannes Berg Acked-by: David S. Miller Signed-off-by: John W. Linville --- include/linux/notifier.h | 1 + net/core/dev.c | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/notifier.h b/include/linux/notifier.h index b86fa2ffca0..81bc252dc8a 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -198,6 +198,7 @@ static inline int notifier_to_errno(int ret) #define NETDEV_CHANGENAME 0x000A #define NETDEV_FEAT_CHANGE 0x000B #define NETDEV_BONDING_FAILOVER 0x000C +#define NETDEV_PRE_UP 0x000D #define SYS_DOWN 0x0001 /* Notify of system down */ #define SYS_RESTART SYS_DOWN diff --git a/net/core/dev.c b/net/core/dev.c index 34b49a6a22f..1f38401fc02 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1048,7 +1048,7 @@ void dev_load(struct net *net, const char *name) int dev_open(struct net_device *dev) { const struct net_device_ops *ops = dev->netdev_ops; - int ret = 0; + int ret; ASSERT_RTNL(); @@ -1065,6 +1065,11 @@ int dev_open(struct net_device *dev) if (!netif_device_present(dev)) return -ENODEV; + ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev); + ret = notifier_to_errno(ret); + if (ret) + return ret; + /* * Call device private open method */ -- cgit v1.2.3-70-g09d2 From 19d337dff95cbf76edd3ad95c0cee2732c3e1ec5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 2 Jun 2009 13:01:37 +0200 Subject: rfkill: rewrite This patch completely rewrites the rfkill core to address the following deficiencies: * all rfkill drivers need to implement polling where necessary rather than having one central implementation * updating the rfkill state cannot be done from arbitrary contexts, forcing drivers to use schedule_work and requiring lots of code * rfkill drivers need to keep track of soft/hard blocked internally -- the core should do this * the rfkill API has many unexpected quirks, for example being asymmetric wrt. alloc/free and register/unregister * rfkill can call back into a driver from within a function the driver called -- this is prone to deadlocks and generally should be avoided * rfkill-input pointlessly is a separate module * drivers need to #ifdef rfkill functions (unless they want to depend on or select RFKILL) -- rfkill should provide inlines that do nothing if it isn't compiled in * the rfkill structure is not opaque -- drivers need to initialise it correctly (lots of sanity checking code required) -- instead force drivers to pass the right variables to rfkill_alloc() * the documentation is hard to read because it always assumes the reader is completely clueless and contains way TOO MANY CAPS * the rfkill code needlessly uses a lot of locks and atomic operations in locked sections * fix LED trigger to actually change the LED when the radio state changes -- this wasn't done before Tested-by: Alan Jenkins Signed-off-by: Henrique de Moraes Holschuh [thinkpad] Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- Documentation/rfkill.txt | 597 +++---------------- MAINTAINERS | 6 +- arch/arm/mach-pxa/tosa-bt.c | 30 +- arch/arm/mach-pxa/tosa.c | 1 - drivers/net/usb/hso.c | 42 +- drivers/net/wireless/ath/ath9k/ath9k.h | 7 +- drivers/net/wireless/ath/ath9k/main.c | 115 +--- drivers/net/wireless/ath/ath9k/pci.c | 15 - drivers/net/wireless/b43/Kconfig | 2 +- drivers/net/wireless/b43/leds.c | 2 +- drivers/net/wireless/b43/main.c | 4 +- drivers/net/wireless/b43/phy_a.c | 4 +- drivers/net/wireless/b43/phy_common.c | 17 +- drivers/net/wireless/b43/phy_common.h | 4 +- drivers/net/wireless/b43/phy_g.c | 4 +- drivers/net/wireless/b43/phy_lp.c | 2 +- drivers/net/wireless/b43/phy_n.c | 2 +- drivers/net/wireless/b43/rfkill.c | 123 ++-- drivers/net/wireless/b43/rfkill.h | 5 +- drivers/net/wireless/b43legacy/Kconfig | 2 +- drivers/net/wireless/b43legacy/leds.c | 3 +- drivers/net/wireless/b43legacy/rfkill.c | 123 ++-- drivers/net/wireless/b43legacy/rfkill.h | 6 +- drivers/net/wireless/iwlwifi/Kconfig | 5 +- drivers/net/wireless/iwlwifi/iwl-rfkill.c | 69 +-- drivers/net/wireless/iwmc3200wifi/rfkill.c | 39 +- drivers/platform/x86/Kconfig | 14 +- drivers/platform/x86/acer-wmi.c | 50 +- drivers/platform/x86/dell-laptop.c | 101 ++-- drivers/platform/x86/eeepc-laptop.c | 99 +--- drivers/platform/x86/hp-wmi.c | 103 ++-- drivers/platform/x86/sony-laptop.c | 191 +++--- drivers/platform/x86/thinkpad_acpi.c | 873 ++++++++++++++-------------- drivers/platform/x86/toshiba_acpi.c | 159 ++--- include/linux/Kbuild | 1 + include/linux/rfkill.h | 325 ++++++++--- include/net/wimax.h | 8 +- net/rfkill/Kconfig | 21 +- net/rfkill/Makefile | 5 +- net/rfkill/core.c | 896 +++++++++++++++++++++++++++++ net/rfkill/input.c | 342 +++++++++++ net/rfkill/rfkill-input.c | 390 ------------- net/rfkill/rfkill-input.h | 21 - net/rfkill/rfkill.c | 855 --------------------------- net/rfkill/rfkill.h | 27 + net/wimax/Kconfig | 14 - net/wimax/op-rfkill.c | 123 +--- 47 files changed, 2555 insertions(+), 3292 deletions(-) create mode 100644 net/rfkill/core.c create mode 100644 net/rfkill/input.c delete mode 100644 net/rfkill/rfkill-input.c delete mode 100644 net/rfkill/rfkill-input.h delete mode 100644 net/rfkill/rfkill.c create mode 100644 net/rfkill/rfkill.h (limited to 'include') diff --git a/Documentation/rfkill.txt b/Documentation/rfkill.txt index 40c3a3f1081..de941e309d4 100644 --- a/Documentation/rfkill.txt +++ b/Documentation/rfkill.txt @@ -1,571 +1,130 @@ -rfkill - RF switch subsystem support -==================================== +rfkill - RF kill switch support +=============================== -1 Introduction -2 Implementation details -3 Kernel driver guidelines -3.1 wireless device drivers -3.2 platform/switch drivers -3.3 input device drivers -4 Kernel API -5 Userspace support +1. Introduction +2. Implementation details +3. Kernel driver guidelines +4. Kernel API +5. Userspace support -1. Introduction: +1. Introduction -The rfkill switch subsystem exists to add a generic interface to circuitry that -can enable or disable the signal output of a wireless *transmitter* of any -type. By far, the most common use is to disable radio-frequency transmitters. +The rfkill subsystem provides a generic interface to disabling any radio +transmitter in the system. When a transmitter is blocked, it shall not +radiate any power. -Note that disabling the signal output means that the the transmitter is to be -made to not emit any energy when "blocked". rfkill is not about blocking data -transmissions, it is about blocking energy emission. +The subsystem also provides the ability to react on button presses and +disable all transmitters of a certain type (or all). This is intended for +situations where transmitters need to be turned off, for example on +aircraft. -The rfkill subsystem offers support for keys and switches often found on -laptops to enable wireless devices like WiFi and Bluetooth, so that these keys -and switches actually perform an action in all wireless devices of a given type -attached to the system. -The buttons to enable and disable the wireless transmitters are important in -situations where the user is for example using his laptop on a location where -radio-frequency transmitters _must_ be disabled (e.g. airplanes). -Because of this requirement, userspace support for the keys should not be made -mandatory. Because userspace might want to perform some additional smarter -tasks when the key is pressed, rfkill provides userspace the possibility to -take over the task to handle the key events. - -=============================================================================== -2: Implementation details +2. Implementation details The rfkill subsystem is composed of various components: the rfkill class, the rfkill-input module (an input layer handler), and some specific input layer events. -The rfkill class provides kernel drivers with an interface that allows them to -know when they should enable or disable a wireless network device transmitter. -This is enabled by the CONFIG_RFKILL Kconfig option. - -The rfkill class support makes sure userspace will be notified of all state -changes on rfkill devices through uevents. It provides a notification chain -for interested parties in the kernel to also get notified of rfkill state -changes in other drivers. It creates several sysfs entries which can be used -by userspace. See section "Userspace support". - -The rfkill-input module provides the kernel with the ability to implement a -basic response when the user presses a key or button (or toggles a switch) -related to rfkill functionality. It is an in-kernel implementation of default -policy of reacting to rfkill-related input events and neither mandatory nor -required for wireless drivers to operate. It is enabled by the -CONFIG_RFKILL_INPUT Kconfig option. - -rfkill-input is a rfkill-related events input layer handler. This handler will -listen to all rfkill key events and will change the rfkill state of the -wireless devices accordingly. With this option enabled userspace could either -do nothing or simply perform monitoring tasks. - -The rfkill-input module also provides EPO (emergency power-off) functionality -for all wireless transmitters. This function cannot be overridden, and it is -always active. rfkill EPO is related to *_RFKILL_ALL input layer events. - - -Important terms for the rfkill subsystem: - -In order to avoid confusion, we avoid the term "switch" in rfkill when it is -referring to an electronic control circuit that enables or disables a -transmitter. We reserve it for the physical device a human manipulates -(which is an input device, by the way): - -rfkill switch: - - A physical device a human manipulates. Its state can be perceived by - the kernel either directly (through a GPIO pin, ACPI GPE) or by its - effect on a rfkill line of a wireless device. - -rfkill controller: - - A hardware circuit that controls the state of a rfkill line, which a - kernel driver can interact with *to modify* that state (i.e. it has - either write-only or read/write access). - -rfkill line: - - An input channel (hardware or software) of a wireless device, which - causes a wireless transmitter to stop emitting energy (BLOCK) when it - is active. Point of view is extremely important here: rfkill lines are - always seen from the PoV of a wireless device (and its driver). - -soft rfkill line/software rfkill line: - - A rfkill line the wireless device driver can directly change the state - of. Related to rfkill_state RFKILL_STATE_SOFT_BLOCKED. - -hard rfkill line/hardware rfkill line: - - A rfkill line that works fully in hardware or firmware, and that cannot - be overridden by the kernel driver. The hardware device or the - firmware just exports its status to the driver, but it is read-only. - Related to rfkill_state RFKILL_STATE_HARD_BLOCKED. - -The enum rfkill_state describes the rfkill state of a transmitter: - -When a rfkill line or rfkill controller is in the RFKILL_STATE_UNBLOCKED state, -the wireless transmitter (radio TX circuit for example) is *enabled*. When the -it is in the RFKILL_STATE_SOFT_BLOCKED or RFKILL_STATE_HARD_BLOCKED, the -wireless transmitter is to be *blocked* from operating. - -RFKILL_STATE_SOFT_BLOCKED indicates that a call to toggle_radio() can change -that state. RFKILL_STATE_HARD_BLOCKED indicates that a call to toggle_radio() -will not be able to change the state and will return with a suitable error if -attempts are made to set the state to RFKILL_STATE_UNBLOCKED. - -RFKILL_STATE_HARD_BLOCKED is used by drivers to signal that the device is -locked in the BLOCKED state by a hardwire rfkill line (typically an input pin -that, when active, forces the transmitter to be disabled) which the driver -CANNOT override. - -Full rfkill functionality requires two different subsystems to cooperate: the -input layer and the rfkill class. The input layer issues *commands* to the -entire system requesting that devices registered to the rfkill class change -state. The way this interaction happens is not complex, but it is not obvious -either: - -Kernel Input layer: - - * Generates KEY_WWAN, KEY_WLAN, KEY_BLUETOOTH, SW_RFKILL_ALL, and - other such events when the user presses certain keys, buttons, or - toggles certain physical switches. - - THE INPUT LAYER IS NEVER USED TO PROPAGATE STATUS, NOTIFICATIONS OR THE - KIND OF STUFF AN ON-SCREEN-DISPLAY APPLICATION WOULD REPORT. It is - used to issue *commands* for the system to change behaviour, and these - commands may or may not be carried out by some kernel driver or - userspace application. It follows that doing user feedback based only - on input events is broken, as there is no guarantee that an input event - will be acted upon. - - Most wireless communication device drivers implementing rfkill - functionality MUST NOT generate these events, and have no reason to - register themselves with the input layer. Doing otherwise is a common - misconception. There is an API to propagate rfkill status change - information, and it is NOT the input layer. - -rfkill class: - - * Calls a hook in a driver to effectively change the wireless - transmitter state; - * Keeps track of the wireless transmitter state (with help from - the driver); - * Generates userspace notifications (uevents) and a call to a - notification chain (kernel) when there is a wireless transmitter - state change; - * Connects a wireless communications driver with the common rfkill - control system, which, for example, allows actions such as - "switch all bluetooth devices offline" to be carried out by - userspace or by rfkill-input. - - THE RFKILL CLASS NEVER ISSUES INPUT EVENTS. THE RFKILL CLASS DOES - NOT LISTEN TO INPUT EVENTS. NO DRIVER USING THE RFKILL CLASS SHALL - EVER LISTEN TO, OR ACT ON RFKILL INPUT EVENTS. Doing otherwise is - a layering violation. - - Most wireless data communication drivers in the kernel have just to - implement the rfkill class API to work properly. Interfacing to the - input layer is not often required (and is very often a *bug*) on - wireless drivers. - - Platform drivers often have to attach to the input layer to *issue* - (but never to listen to) rfkill events for rfkill switches, and also to - the rfkill class to export a control interface for the platform rfkill - controllers to the rfkill subsystem. This does NOT mean the rfkill - switch is attached to a rfkill class (doing so is almost always wrong). - It just means the same kernel module is the driver for different - devices (rfkill switches and rfkill controllers). - - -Userspace input handlers (uevents) or kernel input handlers (rfkill-input): - - * Implements the policy of what should happen when one of the input - layer events related to rfkill operation is received. - * Uses the sysfs interface (userspace) or private rfkill API calls - to tell the devices registered with the rfkill class to change - their state (i.e. translates the input layer event into real - action). - - * rfkill-input implements EPO by handling EV_SW SW_RFKILL_ALL 0 - (power off all transmitters) in a special way: it ignores any - overrides and local state cache and forces all transmitters to the - RFKILL_STATE_SOFT_BLOCKED state (including those which are already - supposed to be BLOCKED). - * rfkill EPO will remain active until rfkill-input receives an - EV_SW SW_RFKILL_ALL 1 event. While the EPO is active, transmitters - are locked in the blocked state (rfkill will refuse to unblock them). - * rfkill-input implements different policies that the user can - select for handling EV_SW SW_RFKILL_ALL 1. It will unlock rfkill, - and either do nothing (leave transmitters blocked, but now unlocked), - restore the transmitters to their state before the EPO, or unblock - them all. - -Userspace uevent handler or kernel platform-specific drivers hooked to the -rfkill notifier chain: - - * Taps into the rfkill notifier chain or to KOBJ_CHANGE uevents, - in order to know when a device that is registered with the rfkill - class changes state; - * Issues feedback notifications to the user; - * In the rare platforms where this is required, synthesizes an input - event to command all *OTHER* rfkill devices to also change their - statues when a specific rfkill device changes state. - - -=============================================================================== -3: Kernel driver guidelines - -Remember: point-of-view is everything for a driver that connects to the rfkill -subsystem. All the details below must be measured/perceived from the point of -view of the specific driver being modified. - -The first thing one needs to know is whether his driver should be talking to -the rfkill class or to the input layer. In rare cases (platform drivers), it -could happen that you need to do both, as platform drivers often handle a -variety of devices in the same driver. - -Do not mistake input devices for rfkill controllers. The only type of "rfkill -switch" device that is to be registered with the rfkill class are those -directly controlling the circuits that cause a wireless transmitter to stop -working (or the software equivalent of them), i.e. what we call a rfkill -controller. Every other kind of "rfkill switch" is just an input device and -MUST NOT be registered with the rfkill class. - -A driver should register a device with the rfkill class when ALL of the -following conditions are met (they define a rfkill controller): - -1. The device is/controls a data communications wireless transmitter; - -2. The kernel can interact with the hardware/firmware to CHANGE the wireless - transmitter state (block/unblock TX operation); - -3. The transmitter can be made to not emit any energy when "blocked": - rfkill is not about blocking data transmissions, it is about blocking - energy emission; - -A driver should register a device with the input subsystem to issue -rfkill-related events (KEY_WLAN, KEY_BLUETOOTH, KEY_WWAN, KEY_WIMAX, -SW_RFKILL_ALL, etc) when ALL of the folowing conditions are met: - -1. It is directly related to some physical device the user interacts with, to - command the O.S./firmware/hardware to enable/disable a data communications - wireless transmitter. - - Examples of the physical device are: buttons, keys and switches the user - will press/touch/slide/switch to enable or disable the wireless - communication device. - -2. It is NOT slaved to another device, i.e. there is no other device that - issues rfkill-related input events in preference to this one. - - Please refer to the corner cases and examples section for more details. - -When in doubt, do not issue input events. For drivers that should generate -input events in some platforms, but not in others (e.g. b43), the best solution -is to NEVER generate input events in the first place. That work should be -deferred to a platform-specific kernel module (which will know when to generate -events through the rfkill notifier chain) or to userspace. This avoids the -usual maintenance problems with DMI whitelisting. - - -Corner cases and examples: -==================================== - -1. If the device is an input device that, because of hardware or firmware, -causes wireless transmitters to be blocked regardless of the kernel's will, it -is still just an input device, and NOT to be registered with the rfkill class. - -2. If the wireless transmitter switch control is read-only, it is an input -device and not to be registered with the rfkill class (and maybe not to be made -an input layer event source either, see below). - -3. If there is some other device driver *closer* to the actual hardware the -user interacted with (the button/switch/key) to issue an input event, THAT is -the device driver that should be issuing input events. - -E.g: - [RFKILL slider switch] -- [GPIO hardware] -- [WLAN card rf-kill input] - (platform driver) (wireless card driver) - -The user is closer to the RFKILL slide switch plaform driver, so the driver -which must issue input events is the platform driver looking at the GPIO -hardware, and NEVER the wireless card driver (which is just a slave). It is -very likely that there are other leaves than just the WLAN card rf-kill input -(e.g. a bluetooth card, etc)... - -On the other hand, some embedded devices do this: - - [RFKILL slider switch] -- [WLAN card rf-kill input] - (wireless card driver) - -In this situation, the wireless card driver *could* register itself as an input -device and issue rf-kill related input events... but in order to AVOID the need -for DMI whitelisting, the wireless card driver does NOT do it. Userspace (HAL) -or a platform driver (that exists only on these embedded devices) will do the -dirty job of issuing the input events. - - -COMMON MISTAKES in kernel drivers, related to rfkill: -==================================== - -1. NEVER confuse input device keys and buttons with input device switches. - - 1a. Switches are always set or reset. They report the current state - (on position or off position). - - 1b. Keys and buttons are either in the pressed or not-pressed state, and - that's it. A "button" that latches down when you press it, and - unlatches when you press it again is in fact a switch as far as input - devices go. - -Add the SW_* events you need for switches, do NOT try to emulate a button using -KEY_* events just because there is no such SW_* event yet. Do NOT try to use, -for example, KEY_BLUETOOTH when you should be using SW_BLUETOOTH instead. - -2. Input device switches (sources of EV_SW events) DO store their current state -(so you *must* initialize it by issuing a gratuitous input layer event on -driver start-up and also when resuming from sleep), and that state CAN be -queried from userspace through IOCTLs. There is no sysfs interface for this, -but that doesn't mean you should break things trying to hook it to the rfkill -class to get a sysfs interface :-) - -3. Do not issue *_RFKILL_ALL events by default, unless you are sure it is the -correct event for your switch/button. These events are emergency power-off -events when they are trying to turn the transmitters off. An example of an -input device which SHOULD generate *_RFKILL_ALL events is the wireless-kill -switch in a laptop which is NOT a hotkey, but a real sliding/rocker switch. -An example of an input device which SHOULD NOT generate *_RFKILL_ALL events by -default, is any sort of hot key that is type-specific (e.g. the one for WLAN). - - -3.1 Guidelines for wireless device drivers ------------------------------------------- - -(in this text, rfkill->foo means the foo field of struct rfkill). - -1. Each independent transmitter in a wireless device (usually there is only one -transmitter per device) should have a SINGLE rfkill class attached to it. - -2. If the device does not have any sort of hardware assistance to allow the -driver to rfkill the device, the driver should emulate it by taking all actions -required to silence the transmitter. - -3. If it is impossible to silence the transmitter (i.e. it still emits energy, -even if it is just in brief pulses, when there is no data to transmit and there -is no hardware support to turn it off) do NOT lie to the users. Do not attach -it to a rfkill class. The rfkill subsystem does not deal with data -transmission, it deals with energy emission. If the transmitter is emitting -energy, it is not blocked in rfkill terms. - -4. It doesn't matter if the device has multiple rfkill input lines affecting -the same transmitter, their combined state is to be exported as a single state -per transmitter (see rule 1). - -This rule exists because users of the rfkill subsystem expect to get (and set, -when possible) the overall transmitter rfkill state, not of a particular rfkill -line. +The rfkill class is provided for kernel drivers to register their radio +transmitter with the kernel, provide methods for turning it on and off and, +optionally, letting the system know about hardware-disabled states that may +be implemented on the device. This code is enabled with the CONFIG_RFKILL +Kconfig option, which drivers can "select". -5. The wireless device driver MUST NOT leave the transmitter enabled during -suspend and hibernation unless: +The rfkill class code also notifies userspace of state changes, this is +achieved via uevents. It also provides some sysfs files for userspace to +check the status of radio transmitters. See the "Userspace support" section +below. - 5.1. The transmitter has to be enabled for some sort of functionality - like wake-on-wireless-packet or autonomous packed forwarding in a mesh - network, and that functionality is enabled for this suspend/hibernation - cycle. -AND +The rfkill-input code implements a basic response to rfkill buttons -- it +implements turning on/off all devices of a certain class (or all). - 5.2. The device was not on a user-requested BLOCKED state before - the suspend (i.e. the driver must NOT unblock a device, not even - to support wake-on-wireless-packet or remain in the mesh). +When the device is hard-blocked (either by a call to rfkill_set_hw_state() +or from query_hw_block) set_block() will be invoked but drivers can well +ignore the method call since they can use the return value of the function +rfkill_set_hw_state() to sync the software state instead of keeping track +of calls to set_block(). -In other words, there is absolutely no allowed scenario where a driver can -automatically take action to unblock a rfkill controller (obviously, this deals -with scenarios where soft-blocking or both soft and hard blocking is happening. -Scenarios where hardware rfkill lines are the only ones blocking the -transmitter are outside of this rule, since the wireless device driver does not -control its input hardware rfkill lines in the first place). -6. During resume, rfkill will try to restore its previous state. +The entire functionality is spread over more than one subsystem: -7. After a rfkill class is suspended, it will *not* call rfkill->toggle_radio -until it is resumed. + * The kernel input layer generates KEY_WWAN, KEY_WLAN etc. and + SW_RFKILL_ALL -- when the user presses a button. Drivers for radio + transmitters generally do not register to the input layer, unless the + device really provides an input device (i.e. a button that has no + effect other than generating a button press event) + * The rfkill-input code hooks up to these events and switches the soft-block + of the various radio transmitters, depending on the button type. -Example of a WLAN wireless driver connected to the rfkill subsystem: --------------------------------------------------------------------- + * The rfkill drivers turn off/on their transmitters as requested. -A certain WLAN card has one input pin that causes it to block the transmitter -and makes the status of that input pin available (only for reading!) to the -kernel driver. This is a hard rfkill input line (it cannot be overridden by -the kernel driver). + * The rfkill class will generate userspace notifications (uevents) to tell + userspace what the current state is. -The card also has one PCI register that, if manipulated by the driver, causes -it to block the transmitter. This is a soft rfkill input line. -It has also a thermal protection circuitry that shuts down its transmitter if -the card overheats, and makes the status of that protection available (only for -reading!) to the kernel driver. This is also a hard rfkill input line. -If either one of these rfkill lines are active, the transmitter is blocked by -the hardware and forced offline. +3. Kernel driver guidelines -The driver should allocate and attach to its struct device *ONE* instance of -the rfkill class (there is only one transmitter). -It can implement the get_state() hook, and return RFKILL_STATE_HARD_BLOCKED if -either one of its two hard rfkill input lines are active. If the two hard -rfkill lines are inactive, it must return RFKILL_STATE_SOFT_BLOCKED if its soft -rfkill input line is active. Only if none of the rfkill input lines are -active, will it return RFKILL_STATE_UNBLOCKED. +Drivers for radio transmitters normally implement only the rfkill class. +These drivers may not unblock the transmitter based on own decisions, they +should act on information provided by the rfkill class only. -Since the device has a hardware rfkill line, it IS subject to state changes -external to rfkill. Therefore, the driver must make sure that it calls -rfkill_force_state() to keep the status always up-to-date, and it must do a -rfkill_force_state() on resume from sleep. +Platform drivers might implement input devices if the rfkill button is just +that, a button. If that button influences the hardware then you need to +implement an rfkill class instead. This also applies if the platform provides +a way to turn on/off the transmitter(s). -Every time the driver gets a notification from the card that one of its rfkill -lines changed state (polling might be needed on badly designed cards that don't -generate interrupts for such events), it recomputes the rfkill state as per -above, and calls rfkill_force_state() to update it. +During suspend/hibernation, transmitters should only be left enabled when +wake-on wlan or similar functionality requires it and the device wasn't +blocked before suspend/hibernate. Note that it may be necessary to update +the rfkill subsystem's idea of what the current state is at resume time if +the state may have changed over suspend. -The driver should implement the toggle_radio() hook, that: -1. Returns an error if one of the hardware rfkill lines are active, and the -caller asked for RFKILL_STATE_UNBLOCKED. -2. Activates the soft rfkill line if the caller asked for state -RFKILL_STATE_SOFT_BLOCKED. It should do this even if one of the hard rfkill -lines are active, effectively double-blocking the transmitter. - -3. Deactivates the soft rfkill line if none of the hardware rfkill lines are -active and the caller asked for RFKILL_STATE_UNBLOCKED. - -=============================================================================== -4: Kernel API +4. Kernel API To build a driver with rfkill subsystem support, the driver should depend on -(or select) the Kconfig symbol RFKILL; it should _not_ depend on RKFILL_INPUT. +(or select) the Kconfig symbol RFKILL. The hardware the driver talks to may be write-only (where the current state of the hardware is unknown), or read-write (where the hardware can be queried about its current state). -The rfkill class will call the get_state hook of a device every time it needs -to know the *real* current state of the hardware. This can happen often, but -it does not do any polling, so it is not enough on hardware that is subject -to state changes outside of the rfkill subsystem. - -Therefore, calling rfkill_force_state() when a state change happens is -mandatory when the device has a hardware rfkill line, or when something else -like the firmware could cause its state to be changed without going through the -rfkill class. - -Some hardware provides events when its status changes. In these cases, it is -best for the driver to not provide a get_state hook, and instead register the -rfkill class *already* with the correct status, and keep it updated using -rfkill_force_state() when it gets an event from the hardware. - -rfkill_force_state() must be used on the device resume handlers to update the -rfkill status, should there be any chance of the device status changing during -the sleep. - -There is no provision for a statically-allocated rfkill struct. You must -use rfkill_allocate() to allocate one. - -You should: - - rfkill_allocate() - - modify rfkill fields (flags, name) - - modify state to the current hardware state (THIS IS THE ONLY TIME - YOU CAN ACCESS state DIRECTLY) - - rfkill_register() +Calling rfkill_set_hw_state() when a state change happens is required from +rfkill drivers that control devices that can be hard-blocked unless they also +assign the poll_hw_block() callback (then the rfkill core will poll the +device). Don't do this unless you cannot get the event in any other way. -The only way to set a device to the RFKILL_STATE_HARD_BLOCKED state is through -a suitable return of get_state() or through rfkill_force_state(). -When a device is in the RFKILL_STATE_HARD_BLOCKED state, the only way to switch -it to a different state is through a suitable return of get_state() or through -rfkill_force_state(). -If toggle_radio() is called to set a device to state RFKILL_STATE_SOFT_BLOCKED -when that device is already at the RFKILL_STATE_HARD_BLOCKED state, it should -not return an error. Instead, it should try to double-block the transmitter, -so that its state will change from RFKILL_STATE_HARD_BLOCKED to -RFKILL_STATE_SOFT_BLOCKED should the hardware blocking cease. - -Please refer to the source for more documentation. - -=============================================================================== -5: Userspace support - -rfkill devices issue uevents (with an action of "change"), with the following -environment variables set: - -RFKILL_NAME -RFKILL_STATE -RFKILL_TYPE +5. Userspace support -The ABI for these variables is defined by the sysfs attributes. It is best -to take a quick look at the source to make sure of the possible values. - -It is expected that HAL will trap those, and bridge them to DBUS, etc. These -events CAN and SHOULD be used to give feedback to the user about the rfkill -status of the system. - -Input devices may issue events that are related to rfkill. These are the -various KEY_* events and SW_* events supported by rfkill-input.c. - -Userspace may not change the state of an rfkill switch in response to an -input event, it should refrain from changing states entirely. - -Userspace cannot assume it is the only source of control for rfkill switches. -Their state can change due to firmware actions, direct user actions, and the -rfkill-input EPO override for *_RFKILL_ALL. - -When rfkill-input is not active, userspace must initiate a rfkill status -change by writing to the "state" attribute in order for anything to happen. - -Take particular care to implement EV_SW SW_RFKILL_ALL properly. When that -switch is set to OFF, *every* rfkill device *MUST* be immediately put into the -RFKILL_STATE_SOFT_BLOCKED state, no questions asked. - -The following sysfs entries will be created: +The following sysfs entries exist for every rfkill device: name: Name assigned by driver to this key (interface or driver name). type: Name of the key type ("wlan", "bluetooth", etc). state: Current state of the transmitter 0: RFKILL_STATE_SOFT_BLOCKED - transmitter is forced off, but one can override it - by a write to the state attribute; + transmitter is turned off by software 1: RFKILL_STATE_UNBLOCKED - transmiter is NOT forced off, and may operate if - all other conditions for such operation are met - (such as interface is up and configured, etc); + transmiter is (potentially) active 2: RFKILL_STATE_HARD_BLOCKED transmitter is forced off by something outside of - the driver's control. One cannot set a device to - this state through writes to the state attribute; - claim: 1: Userspace handles events, 0: Kernel handles events - -Both the "state" and "claim" entries are also writable. For the "state" entry -this means that when 1 or 0 is written, the device rfkill state (if not yet in -the requested state), will be will be toggled accordingly. - -For the "claim" entry writing 1 to it means that the kernel no longer handles -key events even though RFKILL_INPUT input was enabled. When "claim" has been -set to 0, userspace should make sure that it listens for the input events or -check the sysfs "state" entry regularly to correctly perform the required tasks -when the rkfill key is pressed. - -A note about input devices and EV_SW events: - -In order to know the current state of an input device switch (like -SW_RFKILL_ALL), you will need to use an IOCTL. That information is not -available through sysfs in a generic way at this time, and it is not available -through the rfkill class AT ALL. + the driver's control. + claim: 0: Kernel handles events (currently always reads that value) + +rfkill devices also issue uevents (with an action of "change"), with the +following environment variables set: + +RFKILL_NAME +RFKILL_STATE +RFKILL_TYPE + +The contents of these variables corresponds to the "name", "state" and +"type" sysfs files explained above. diff --git a/MAINTAINERS b/MAINTAINERS index e18baa410b5..2f6a8fcfb1f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4753,9 +4753,9 @@ S: Supported F: fs/reiserfs/ RFKILL -P: Ivo van Doorn -M: IvDoorn@gmail.com -L: netdev@vger.kernel.org +P: Johannes Berg +M: johannes@sipsolutions.net +L: linux-wireless@vger.kernel.org S: Maintained F Documentation/rfkill.txt F: net/rfkill/ diff --git a/arch/arm/mach-pxa/tosa-bt.c b/arch/arm/mach-pxa/tosa-bt.c index bde42aa2937..c31e601eb49 100644 --- a/arch/arm/mach-pxa/tosa-bt.c +++ b/arch/arm/mach-pxa/tosa-bt.c @@ -35,21 +35,25 @@ static void tosa_bt_off(struct tosa_bt_data *data) gpio_set_value(data->gpio_reset, 0); } -static int tosa_bt_toggle_radio(void *data, enum rfkill_state state) +static int tosa_bt_set_block(void *data, bool blocked) { - pr_info("BT_RADIO going: %s\n", - state == RFKILL_STATE_UNBLOCKED ? "on" : "off"); + pr_info("BT_RADIO going: %s\n", blocked ? "off" : "on"); - if (state == RFKILL_STATE_UNBLOCKED) { + if (!blocked) { pr_info("TOSA_BT: going ON\n"); tosa_bt_on(data); } else { pr_info("TOSA_BT: going OFF\n"); tosa_bt_off(data); } + return 0; } +static const struct rfkill_ops tosa_bt_rfkill_ops = { + .set_block = tosa_bt_set_block, +}; + static int tosa_bt_probe(struct platform_device *dev) { int rc; @@ -70,18 +74,14 @@ static int tosa_bt_probe(struct platform_device *dev) if (rc) goto err_pwr_dir; - rfk = rfkill_allocate(&dev->dev, RFKILL_TYPE_BLUETOOTH); + rfk = rfkill_alloc("tosa-bt", &dev->dev, RFKILL_TYPE_BLUETOOTH, + &tosa_bt_rfkill_ops, data); if (!rfk) { rc = -ENOMEM; goto err_rfk_alloc; } - rfk->name = "tosa-bt"; - rfk->toggle_radio = tosa_bt_toggle_radio; - rfk->data = data; -#ifdef CONFIG_RFKILL_LEDS - rfk->led_trigger.name = "tosa-bt"; -#endif + rfkill_set_led_trigger_name(rfk, "tosa-bt"); rc = rfkill_register(rfk); if (rc) @@ -92,9 +92,7 @@ static int tosa_bt_probe(struct platform_device *dev) return 0; err_rfkill: - if (rfk) - rfkill_free(rfk); - rfk = NULL; + rfkill_destroy(rfk); err_rfk_alloc: tosa_bt_off(data); err_pwr_dir: @@ -113,8 +111,10 @@ static int __devexit tosa_bt_remove(struct platform_device *dev) platform_set_drvdata(dev, NULL); - if (rfk) + if (rfk) { rfkill_unregister(rfk); + rfkill_destroy(rfk); + } rfk = NULL; tosa_bt_off(data); diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c index afac5b6d3d7..58ce807fe44 100644 --- a/arch/arm/mach-pxa/tosa.c +++ b/arch/arm/mach-pxa/tosa.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index 837135f0390..5ddd8c4f901 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -2481,10 +2481,10 @@ static int add_net_device(struct hso_device *hso_dev) return 0; } -static int hso_radio_toggle(void *data, enum rfkill_state state) +static int hso_rfkill_set_block(void *data, bool blocked) { struct hso_device *hso_dev = data; - int enabled = (state == RFKILL_STATE_UNBLOCKED); + int enabled = !blocked; int rv; mutex_lock(&hso_dev->mutex); @@ -2498,6 +2498,10 @@ static int hso_radio_toggle(void *data, enum rfkill_state state) return rv; } +static const struct rfkill_ops hso_rfkill_ops = { + .set_block = hso_rfkill_set_block, +}; + /* Creates and sets up everything for rfkill */ static void hso_create_rfkill(struct hso_device *hso_dev, struct usb_interface *interface) @@ -2506,29 +2510,25 @@ static void hso_create_rfkill(struct hso_device *hso_dev, struct device *dev = &hso_net->net->dev; char *rfkn; - hso_net->rfkill = rfkill_allocate(&interface_to_usbdev(interface)->dev, - RFKILL_TYPE_WWAN); - if (!hso_net->rfkill) { - dev_err(dev, "%s - Out of memory\n", __func__); - return; - } rfkn = kzalloc(20, GFP_KERNEL); - if (!rfkn) { - rfkill_free(hso_net->rfkill); - hso_net->rfkill = NULL; + if (!rfkn) dev_err(dev, "%s - Out of memory\n", __func__); - return; - } + snprintf(rfkn, 20, "hso-%d", interface->altsetting->desc.bInterfaceNumber); - hso_net->rfkill->name = rfkn; - hso_net->rfkill->state = RFKILL_STATE_UNBLOCKED; - hso_net->rfkill->data = hso_dev; - hso_net->rfkill->toggle_radio = hso_radio_toggle; + + hso_net->rfkill = rfkill_alloc(rfkn, + &interface_to_usbdev(interface)->dev, + RFKILL_TYPE_WWAN, + &hso_rfkill_ops, hso_dev); + if (!hso_net->rfkill) { + dev_err(dev, "%s - Out of memory\n", __func__); + kfree(rfkn); + return; + } if (rfkill_register(hso_net->rfkill) < 0) { + rfkill_destroy(hso_net->rfkill); kfree(rfkn); - hso_net->rfkill->name = NULL; - rfkill_free(hso_net->rfkill); hso_net->rfkill = NULL; dev_err(dev, "%s - Failed to register rfkill\n", __func__); return; @@ -3165,8 +3165,10 @@ static void hso_free_interface(struct usb_interface *interface) hso_stop_net_device(network_table[i]); cancel_work_sync(&network_table[i]->async_put_intf); cancel_work_sync(&network_table[i]->async_get_intf); - if (rfk) + if (rfk) { rfkill_unregister(rfk); + rfkill_destroy(rfk); + } hso_free_net_device(network_table[i]); } } diff --git a/drivers/net/wireless/ath/ath9k/ath9k.h b/drivers/net/wireless/ath/ath9k/ath9k.h index 796a3adffea..515880aa211 100644 --- a/drivers/net/wireless/ath/ath9k/ath9k.h +++ b/drivers/net/wireless/ath/ath9k/ath9k.h @@ -460,12 +460,9 @@ struct ath_led { bool registered; }; -/* Rfkill */ -#define ATH_RFKILL_POLL_INTERVAL 2000 /* msecs */ - struct ath_rfkill { struct rfkill *rfkill; - struct delayed_work rfkill_poll; + struct rfkill_ops ops; char rfkill_name[32]; }; @@ -509,8 +506,6 @@ struct ath_rfkill { #define SC_OP_RXFLUSH BIT(7) #define SC_OP_LED_ASSOCIATED BIT(8) #define SC_OP_RFKILL_REGISTERED BIT(9) -#define SC_OP_RFKILL_SW_BLOCKED BIT(10) -#define SC_OP_RFKILL_HW_BLOCKED BIT(11) #define SC_OP_WAIT_FOR_BEACON BIT(12) #define SC_OP_LED_ON BIT(13) #define SC_OP_SCANNING BIT(14) diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 61da08a1648..f7baa406918 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -1192,120 +1192,69 @@ static bool ath_is_rfkill_set(struct ath_softc *sc) ah->rfkill_polarity; } -/* h/w rfkill poll function */ -static void ath_rfkill_poll(struct work_struct *work) +/* s/w rfkill handlers */ +static int ath_rfkill_set_block(void *data, bool blocked) { - struct ath_softc *sc = container_of(work, struct ath_softc, - rf_kill.rfkill_poll.work); - bool radio_on; - - if (sc->sc_flags & SC_OP_INVALID) - return; - - radio_on = !ath_is_rfkill_set(sc); - - /* - * enable/disable radio only when there is a - * state change in RF switch - */ - if (radio_on == !!(sc->sc_flags & SC_OP_RFKILL_HW_BLOCKED)) { - enum rfkill_state state; - - if (sc->sc_flags & SC_OP_RFKILL_SW_BLOCKED) { - state = radio_on ? RFKILL_STATE_SOFT_BLOCKED - : RFKILL_STATE_HARD_BLOCKED; - } else if (radio_on) { - ath_radio_enable(sc); - state = RFKILL_STATE_UNBLOCKED; - } else { - ath_radio_disable(sc); - state = RFKILL_STATE_HARD_BLOCKED; - } - - if (state == RFKILL_STATE_HARD_BLOCKED) - sc->sc_flags |= SC_OP_RFKILL_HW_BLOCKED; - else - sc->sc_flags &= ~SC_OP_RFKILL_HW_BLOCKED; + struct ath_softc *sc = data; - rfkill_force_state(sc->rf_kill.rfkill, state); - } + if (blocked) + ath_radio_disable(sc); + else + ath_radio_enable(sc); - queue_delayed_work(sc->hw->workqueue, &sc->rf_kill.rfkill_poll, - msecs_to_jiffies(ATH_RFKILL_POLL_INTERVAL)); + return 0; } -/* s/w rfkill handler */ -static int ath_sw_toggle_radio(void *data, enum rfkill_state state) +static void ath_rfkill_poll_state(struct rfkill *rfkill, void *data) { struct ath_softc *sc = data; + bool blocked = !!ath_is_rfkill_set(sc); - switch (state) { - case RFKILL_STATE_SOFT_BLOCKED: - if (!(sc->sc_flags & (SC_OP_RFKILL_HW_BLOCKED | - SC_OP_RFKILL_SW_BLOCKED))) - ath_radio_disable(sc); - sc->sc_flags |= SC_OP_RFKILL_SW_BLOCKED; - return 0; - case RFKILL_STATE_UNBLOCKED: - if ((sc->sc_flags & SC_OP_RFKILL_SW_BLOCKED)) { - sc->sc_flags &= ~SC_OP_RFKILL_SW_BLOCKED; - if (sc->sc_flags & SC_OP_RFKILL_HW_BLOCKED) { - DPRINTF(sc, ATH_DBG_FATAL, "Can't turn on the" - "radio as it is disabled by h/w\n"); - return -EPERM; - } - ath_radio_enable(sc); - } - return 0; - default: - return -EINVAL; - } + if (rfkill_set_hw_state(rfkill, blocked)) + ath_radio_disable(sc); + else + ath_radio_enable(sc); } /* Init s/w rfkill */ static int ath_init_sw_rfkill(struct ath_softc *sc) { - sc->rf_kill.rfkill = rfkill_allocate(wiphy_dev(sc->hw->wiphy), - RFKILL_TYPE_WLAN); + sc->rf_kill.ops.set_block = ath_rfkill_set_block; + if (sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_RFSILENT) + sc->rf_kill.ops.poll = ath_rfkill_poll_state; + + snprintf(sc->rf_kill.rfkill_name, sizeof(sc->rf_kill.rfkill_name), + "ath9k-%s::rfkill", wiphy_name(sc->hw->wiphy)); + + sc->rf_kill.rfkill = rfkill_alloc(sc->rf_kill.rfkill_name, + wiphy_dev(sc->hw->wiphy), + RFKILL_TYPE_WLAN, + &sc->rf_kill.ops, sc); if (!sc->rf_kill.rfkill) { DPRINTF(sc, ATH_DBG_FATAL, "Failed to allocate rfkill\n"); return -ENOMEM; } - snprintf(sc->rf_kill.rfkill_name, sizeof(sc->rf_kill.rfkill_name), - "ath9k-%s::rfkill", wiphy_name(sc->hw->wiphy)); - sc->rf_kill.rfkill->name = sc->rf_kill.rfkill_name; - sc->rf_kill.rfkill->data = sc; - sc->rf_kill.rfkill->toggle_radio = ath_sw_toggle_radio; - sc->rf_kill.rfkill->state = RFKILL_STATE_UNBLOCKED; - return 0; } /* Deinitialize rfkill */ static void ath_deinit_rfkill(struct ath_softc *sc) { - if (sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_RFSILENT) - cancel_delayed_work_sync(&sc->rf_kill.rfkill_poll); - if (sc->sc_flags & SC_OP_RFKILL_REGISTERED) { rfkill_unregister(sc->rf_kill.rfkill); + rfkill_destroy(sc->rf_kill.rfkill); sc->sc_flags &= ~SC_OP_RFKILL_REGISTERED; - sc->rf_kill.rfkill = NULL; } } static int ath_start_rfkill_poll(struct ath_softc *sc) { - if (sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_RFSILENT) - queue_delayed_work(sc->hw->workqueue, - &sc->rf_kill.rfkill_poll, 0); - if (!(sc->sc_flags & SC_OP_RFKILL_REGISTERED)) { if (rfkill_register(sc->rf_kill.rfkill)) { DPRINTF(sc, ATH_DBG_FATAL, "Unable to register rfkill\n"); - rfkill_free(sc->rf_kill.rfkill); + rfkill_destroy(sc->rf_kill.rfkill); /* Deinitialize the device */ ath_cleanup(sc); @@ -1678,10 +1627,6 @@ int ath_attach(u16 devid, struct ath_softc *sc) goto error_attach; #if defined(CONFIG_RFKILL) || defined(CONFIG_RFKILL_MODULE) - /* Initialze h/w Rfkill */ - if (sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_RFSILENT) - INIT_DELAYED_WORK(&sc->rf_kill.rfkill_poll, ath_rfkill_poll); - /* Initialize s/w rfkill */ error = ath_init_sw_rfkill(sc); if (error) @@ -2214,10 +2159,8 @@ static void ath9k_stop(struct ieee80211_hw *hw) } else sc->rx.rxlink = NULL; -#if defined(CONFIG_RFKILL) || defined(CONFIG_RFKILL_MODULE) - if (sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_RFSILENT) - cancel_delayed_work_sync(&sc->rf_kill.rfkill_poll); -#endif + rfkill_pause_polling(sc->rf_kill.rfkill); + /* disable HAL and put h/w to sleep */ ath9k_hw_disable(sc->sc_ah); ath9k_hw_configpcipowersave(sc->sc_ah, 1); diff --git a/drivers/net/wireless/ath/ath9k/pci.c b/drivers/net/wireless/ath/ath9k/pci.c index 168411d322a..ccdf20a2e9b 100644 --- a/drivers/net/wireless/ath/ath9k/pci.c +++ b/drivers/net/wireless/ath/ath9k/pci.c @@ -227,11 +227,6 @@ static int ath_pci_suspend(struct pci_dev *pdev, pm_message_t state) ath9k_hw_set_gpio(sc->sc_ah, ATH_LED_PIN, 1); -#if defined(CONFIG_RFKILL) || defined(CONFIG_RFKILL_MODULE) - if (sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_RFSILENT) - cancel_delayed_work_sync(&sc->rf_kill.rfkill_poll); -#endif - pci_save_state(pdev); pci_disable_device(pdev); pci_set_power_state(pdev, PCI_D3hot); @@ -256,16 +251,6 @@ static int ath_pci_resume(struct pci_dev *pdev) AR_GPIO_OUTPUT_MUX_AS_OUTPUT); ath9k_hw_set_gpio(sc->sc_ah, ATH_LED_PIN, 1); -#if defined(CONFIG_RFKILL) || defined(CONFIG_RFKILL_MODULE) - /* - * check the h/w rfkill state on resume - * and start the rfkill poll timer - */ - if (sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_RFSILENT) - queue_delayed_work(sc->hw->workqueue, - &sc->rf_kill.rfkill_poll, 0); -#endif - return 0; } diff --git a/drivers/net/wireless/b43/Kconfig b/drivers/net/wireless/b43/Kconfig index 21572e40b79..07a99e3faf9 100644 --- a/drivers/net/wireless/b43/Kconfig +++ b/drivers/net/wireless/b43/Kconfig @@ -102,7 +102,7 @@ config B43_LEDS # if it's possible. config B43_RFKILL bool - depends on B43 && (RFKILL = y || RFKILL = B43) && RFKILL_INPUT && (INPUT_POLLDEV = y || INPUT_POLLDEV = B43) + depends on B43 && (RFKILL = y || RFKILL = B43) default y # This config option automatically enables b43 HW-RNG support, diff --git a/drivers/net/wireless/b43/leds.c b/drivers/net/wireless/b43/leds.c index 76f4c7bad8b..9a498d3fc65 100644 --- a/drivers/net/wireless/b43/leds.c +++ b/drivers/net/wireless/b43/leds.c @@ -87,7 +87,7 @@ static void b43_led_brightness_set(struct led_classdev *led_dev, } static int b43_register_led(struct b43_wldev *dev, struct b43_led *led, - const char *name, char *default_trigger, + const char *name, const char *default_trigger, u8 led_index, bool activelow) { int err; diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index cb4a8712946..1d3e40095ad 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -3470,7 +3470,7 @@ static int b43_op_config(struct ieee80211_hw *hw, u32 changed) if (!!conf->radio_enabled != phy->radio_on) { if (conf->radio_enabled) { - b43_software_rfkill(dev, RFKILL_STATE_UNBLOCKED); + b43_software_rfkill(dev, false); b43info(dev->wl, "Radio turned on by software\n"); if (!dev->radio_hw_enable) { b43info(dev->wl, "The hardware RF-kill button " @@ -3478,7 +3478,7 @@ static int b43_op_config(struct ieee80211_hw *hw, u32 changed) "Press the button to turn it on.\n"); } } else { - b43_software_rfkill(dev, RFKILL_STATE_SOFT_BLOCKED); + b43_software_rfkill(dev, true); b43info(dev->wl, "Radio turned off by software\n"); } } diff --git a/drivers/net/wireless/b43/phy_a.c b/drivers/net/wireless/b43/phy_a.c index c836c077d51..816e028a262 100644 --- a/drivers/net/wireless/b43/phy_a.c +++ b/drivers/net/wireless/b43/phy_a.c @@ -480,11 +480,11 @@ static bool b43_aphy_op_supports_hwpctl(struct b43_wldev *dev) } static void b43_aphy_op_software_rfkill(struct b43_wldev *dev, - enum rfkill_state state) + bool blocked) { struct b43_phy *phy = &dev->phy; - if (state == RFKILL_STATE_UNBLOCKED) { + if (!blocked) { if (phy->radio_on) return; b43_radio_write16(dev, 0x0004, 0x00C0); diff --git a/drivers/net/wireless/b43/phy_common.c b/drivers/net/wireless/b43/phy_common.c index e176b6e0d9c..6d241622210 100644 --- a/drivers/net/wireless/b43/phy_common.c +++ b/drivers/net/wireless/b43/phy_common.c @@ -84,7 +84,7 @@ int b43_phy_init(struct b43_wldev *dev) phy->channel = ops->get_default_chan(dev); - ops->software_rfkill(dev, RFKILL_STATE_UNBLOCKED); + ops->software_rfkill(dev, false); err = ops->init(dev); if (err) { b43err(dev->wl, "PHY init failed\n"); @@ -104,7 +104,7 @@ err_phy_exit: if (ops->exit) ops->exit(dev); err_block_rf: - ops->software_rfkill(dev, RFKILL_STATE_SOFT_BLOCKED); + ops->software_rfkill(dev, true); return err; } @@ -113,7 +113,7 @@ void b43_phy_exit(struct b43_wldev *dev) { const struct b43_phy_operations *ops = dev->phy.ops; - ops->software_rfkill(dev, RFKILL_STATE_SOFT_BLOCKED); + ops->software_rfkill(dev, true); if (ops->exit) ops->exit(dev); } @@ -295,18 +295,13 @@ err_restore_cookie: return err; } -void b43_software_rfkill(struct b43_wldev *dev, enum rfkill_state state) +void b43_software_rfkill(struct b43_wldev *dev, bool blocked) { struct b43_phy *phy = &dev->phy; - if (state == RFKILL_STATE_HARD_BLOCKED) { - /* We cannot hardware-block the device */ - state = RFKILL_STATE_SOFT_BLOCKED; - } - b43_mac_suspend(dev); - phy->ops->software_rfkill(dev, state); - phy->radio_on = (state == RFKILL_STATE_UNBLOCKED); + phy->ops->software_rfkill(dev, blocked); + phy->radio_on = !blocked; b43_mac_enable(dev); } diff --git a/drivers/net/wireless/b43/phy_common.h b/drivers/net/wireless/b43/phy_common.h index b2d99101947..f4c2d79cbc8 100644 --- a/drivers/net/wireless/b43/phy_common.h +++ b/drivers/net/wireless/b43/phy_common.h @@ -159,7 +159,7 @@ struct b43_phy_operations { /* Radio */ bool (*supports_hwpctl)(struct b43_wldev *dev); - void (*software_rfkill)(struct b43_wldev *dev, enum rfkill_state state); + void (*software_rfkill)(struct b43_wldev *dev, bool blocked); void (*switch_analog)(struct b43_wldev *dev, bool on); int (*switch_channel)(struct b43_wldev *dev, unsigned int new_channel); unsigned int (*get_default_chan)(struct b43_wldev *dev); @@ -364,7 +364,7 @@ int b43_switch_channel(struct b43_wldev *dev, unsigned int new_channel); /** * b43_software_rfkill - Turn the radio ON or OFF in software. */ -void b43_software_rfkill(struct b43_wldev *dev, enum rfkill_state state); +void b43_software_rfkill(struct b43_wldev *dev, bool blocked); /** * b43_phy_txpower_check - Check TX power output. diff --git a/drivers/net/wireless/b43/phy_g.c b/drivers/net/wireless/b43/phy_g.c index e7b98f013b0..5300232449f 100644 --- a/drivers/net/wireless/b43/phy_g.c +++ b/drivers/net/wireless/b43/phy_g.c @@ -2592,7 +2592,7 @@ static bool b43_gphy_op_supports_hwpctl(struct b43_wldev *dev) } static void b43_gphy_op_software_rfkill(struct b43_wldev *dev, - enum rfkill_state state) + bool blocked) { struct b43_phy *phy = &dev->phy; struct b43_phy_g *gphy = phy->g; @@ -2600,7 +2600,7 @@ static void b43_gphy_op_software_rfkill(struct b43_wldev *dev, might_sleep(); - if (state == RFKILL_STATE_UNBLOCKED) { + if (!blocked) { /* Turn radio ON */ if (phy->radio_on) return; diff --git a/drivers/net/wireless/b43/phy_lp.c b/drivers/net/wireless/b43/phy_lp.c index 58e319d6b1e..ea0d3a3a6a6 100644 --- a/drivers/net/wireless/b43/phy_lp.c +++ b/drivers/net/wireless/b43/phy_lp.c @@ -488,7 +488,7 @@ static void b43_lpphy_op_radio_write(struct b43_wldev *dev, u16 reg, u16 value) } static void b43_lpphy_op_software_rfkill(struct b43_wldev *dev, - enum rfkill_state state) + bool blocked) { //TODO } diff --git a/drivers/net/wireless/b43/phy_n.c b/drivers/net/wireless/b43/phy_n.c index 8bcfda5f3f0..be7b5604947 100644 --- a/drivers/net/wireless/b43/phy_n.c +++ b/drivers/net/wireless/b43/phy_n.c @@ -579,7 +579,7 @@ static void b43_nphy_op_radio_write(struct b43_wldev *dev, u16 reg, u16 value) } static void b43_nphy_op_software_rfkill(struct b43_wldev *dev, - enum rfkill_state state) + bool blocked) {//TODO } diff --git a/drivers/net/wireless/b43/rfkill.c b/drivers/net/wireless/b43/rfkill.c index 9e1d00bc24d..96047843cd5 100644 --- a/drivers/net/wireless/b43/rfkill.c +++ b/drivers/net/wireless/b43/rfkill.c @@ -45,12 +45,11 @@ static bool b43_is_hw_radio_enabled(struct b43_wldev *dev) } /* The poll callback for the hardware button. */ -static void b43_rfkill_poll(struct input_polled_dev *poll_dev) +static void b43_rfkill_poll(struct rfkill *rfkill, void *data) { - struct b43_wldev *dev = poll_dev->private; + struct b43_wldev *dev = data; struct b43_wl *wl = dev->wl; bool enabled; - bool report_change = 0; mutex_lock(&wl->mutex); if (unlikely(b43_status(dev) < B43_STAT_INITIALIZED)) { @@ -60,68 +59,55 @@ static void b43_rfkill_poll(struct input_polled_dev *poll_dev) enabled = b43_is_hw_radio_enabled(dev); if (unlikely(enabled != dev->radio_hw_enable)) { dev->radio_hw_enable = enabled; - report_change = 1; b43info(wl, "Radio hardware status changed to %s\n", enabled ? "ENABLED" : "DISABLED"); + enabled = !rfkill_set_hw_state(rfkill, !enabled); + if (enabled != dev->phy.radio_on) + b43_software_rfkill(dev, !enabled); } mutex_unlock(&wl->mutex); - - /* send the radio switch event to the system - note both a key press - * and a release are required */ - if (unlikely(report_change)) { - input_report_key(poll_dev->input, KEY_WLAN, 1); - input_report_key(poll_dev->input, KEY_WLAN, 0); - } } /* Called when the RFKILL toggled in software. */ -static int b43_rfkill_soft_toggle(void *data, enum rfkill_state state) +static int b43_rfkill_soft_set(void *data, bool blocked) { struct b43_wldev *dev = data; struct b43_wl *wl = dev->wl; - int err = -EBUSY; + int err = -EINVAL; - if (!wl->rfkill.registered) - return 0; + if (WARN_ON(!wl->rfkill.registered)) + return -EINVAL; mutex_lock(&wl->mutex); + if (b43_status(dev) < B43_STAT_INITIALIZED) goto out_unlock; + + if (!dev->radio_hw_enable) + goto out_unlock; + + if (!blocked != dev->phy.radio_on) + b43_software_rfkill(dev, blocked); err = 0; - switch (state) { - case RFKILL_STATE_UNBLOCKED: - if (!dev->radio_hw_enable) { - /* No luck. We can't toggle the hardware RF-kill - * button from software. */ - err = -EBUSY; - goto out_unlock; - } - if (!dev->phy.radio_on) - b43_software_rfkill(dev, state); - break; - case RFKILL_STATE_SOFT_BLOCKED: - if (dev->phy.radio_on) - b43_software_rfkill(dev, state); - break; - default: - b43warn(wl, "Received unexpected rfkill state %d.\n", state); - break; - } out_unlock: mutex_unlock(&wl->mutex); - return err; } -char *b43_rfkill_led_name(struct b43_wldev *dev) +const char *b43_rfkill_led_name(struct b43_wldev *dev) { struct b43_rfkill *rfk = &(dev->wl->rfkill); if (!rfk->registered) return NULL; - return rfkill_get_led_name(rfk->rfkill); + return rfkill_get_led_trigger_name(rfk->rfkill); } +static const struct rfkill_ops b43_rfkill_ops = { + .set_block = b43_rfkill_soft_set, + .poll = b43_rfkill_poll, +}; + void b43_rfkill_init(struct b43_wldev *dev) { struct b43_wl *wl = dev->wl; @@ -130,65 +116,26 @@ void b43_rfkill_init(struct b43_wldev *dev) rfk->registered = 0; - rfk->rfkill = rfkill_allocate(dev->dev->dev, RFKILL_TYPE_WLAN); - if (!rfk->rfkill) - goto out_error; snprintf(rfk->name, sizeof(rfk->name), "b43-%s", wiphy_name(wl->hw->wiphy)); - rfk->rfkill->name = rfk->name; - rfk->rfkill->state = RFKILL_STATE_UNBLOCKED; - rfk->rfkill->data = dev; - rfk->rfkill->toggle_radio = b43_rfkill_soft_toggle; - - rfk->poll_dev = input_allocate_polled_device(); - if (!rfk->poll_dev) { - rfkill_free(rfk->rfkill); - goto err_freed_rfk; - } - - rfk->poll_dev->private = dev; - rfk->poll_dev->poll = b43_rfkill_poll; - rfk->poll_dev->poll_interval = 1000; /* msecs */ - rfk->poll_dev->input->name = rfk->name; - rfk->poll_dev->input->id.bustype = BUS_HOST; - rfk->poll_dev->input->id.vendor = dev->dev->bus->boardinfo.vendor; - rfk->poll_dev->input->evbit[0] = BIT(EV_KEY); - set_bit(KEY_WLAN, rfk->poll_dev->input->keybit); + rfk->rfkill = rfkill_alloc(rfk->name, + dev->dev->dev, + RFKILL_TYPE_WLAN, + &b43_rfkill_ops, dev); + if (!rfk->rfkill) + goto out_error; err = rfkill_register(rfk->rfkill); if (err) - goto err_free_polldev; - -#ifdef CONFIG_RFKILL_INPUT_MODULE - /* B43 RF-kill isn't useful without the rfkill-input subsystem. - * Try to load the module. */ - err = request_module("rfkill-input"); - if (err) - b43warn(wl, "Failed to load the rfkill-input module. " - "The built-in radio LED will not work.\n"); -#endif /* CONFIG_RFKILL_INPUT */ - -#if !defined(CONFIG_RFKILL_INPUT) && !defined(CONFIG_RFKILL_INPUT_MODULE) - b43warn(wl, "The rfkill-input subsystem is not available. " - "The built-in radio LED will not work.\n"); -#endif - - err = input_register_polled_device(rfk->poll_dev); - if (err) - goto err_unreg_rfk; + goto err_free; rfk->registered = 1; return; -err_unreg_rfk: - rfkill_unregister(rfk->rfkill); -err_free_polldev: - input_free_polled_device(rfk->poll_dev); - rfk->poll_dev = NULL; -err_freed_rfk: - rfk->rfkill = NULL; -out_error: + err_free: + rfkill_destroy(rfk->rfkill); + out_error: rfk->registered = 0; b43warn(wl, "RF-kill button init failed\n"); } @@ -201,9 +148,7 @@ void b43_rfkill_exit(struct b43_wldev *dev) return; rfk->registered = 0; - input_unregister_polled_device(rfk->poll_dev); rfkill_unregister(rfk->rfkill); - input_free_polled_device(rfk->poll_dev); - rfk->poll_dev = NULL; + rfkill_destroy(rfk->rfkill); rfk->rfkill = NULL; } diff --git a/drivers/net/wireless/b43/rfkill.h b/drivers/net/wireless/b43/rfkill.h index adacf936d81..da497e01bbb 100644 --- a/drivers/net/wireless/b43/rfkill.h +++ b/drivers/net/wireless/b43/rfkill.h @@ -7,14 +7,11 @@ struct b43_wldev; #ifdef CONFIG_B43_RFKILL #include -#include struct b43_rfkill { /* The RFKILL subsystem data structure */ struct rfkill *rfkill; - /* The poll device for the RFKILL input button */ - struct input_polled_dev *poll_dev; /* Did initialization succeed? Used for freeing. */ bool registered; /* The unique name of this rfkill switch */ @@ -26,7 +23,7 @@ struct b43_rfkill { void b43_rfkill_init(struct b43_wldev *dev); void b43_rfkill_exit(struct b43_wldev *dev); -char * b43_rfkill_led_name(struct b43_wldev *dev); +const char *b43_rfkill_led_name(struct b43_wldev *dev); #else /* CONFIG_B43_RFKILL */ diff --git a/drivers/net/wireless/b43legacy/Kconfig b/drivers/net/wireless/b43legacy/Kconfig index d4f628a74bb..6893f439df7 100644 --- a/drivers/net/wireless/b43legacy/Kconfig +++ b/drivers/net/wireless/b43legacy/Kconfig @@ -47,7 +47,7 @@ config B43LEGACY_LEDS # if it's possible. config B43LEGACY_RFKILL bool - depends on B43LEGACY && (RFKILL = y || RFKILL = B43LEGACY) && RFKILL_INPUT && (INPUT_POLLDEV = y || INPUT_POLLDEV = B43LEGACY) + depends on B43LEGACY && (RFKILL = y || RFKILL = B43LEGACY) default y # This config option automatically enables b43 HW-RNG support, diff --git a/drivers/net/wireless/b43legacy/leds.c b/drivers/net/wireless/b43legacy/leds.c index 3ea55b18c70..538d3117594 100644 --- a/drivers/net/wireless/b43legacy/leds.c +++ b/drivers/net/wireless/b43legacy/leds.c @@ -86,7 +86,8 @@ static void b43legacy_led_brightness_set(struct led_classdev *led_dev, static int b43legacy_register_led(struct b43legacy_wldev *dev, struct b43legacy_led *led, - const char *name, char *default_trigger, + const char *name, + const char *default_trigger, u8 led_index, bool activelow) { int err; diff --git a/drivers/net/wireless/b43legacy/rfkill.c b/drivers/net/wireless/b43legacy/rfkill.c index 4b0c7d27a51..c6230a64505 100644 --- a/drivers/net/wireless/b43legacy/rfkill.c +++ b/drivers/net/wireless/b43legacy/rfkill.c @@ -45,12 +45,11 @@ static bool b43legacy_is_hw_radio_enabled(struct b43legacy_wldev *dev) } /* The poll callback for the hardware button. */ -static void b43legacy_rfkill_poll(struct input_polled_dev *poll_dev) +static void b43legacy_rfkill_poll(struct rfkill *rfkill, void *data) { - struct b43legacy_wldev *dev = poll_dev->private; + struct b43legacy_wldev *dev = data; struct b43legacy_wl *wl = dev->wl; bool enabled; - bool report_change = 0; mutex_lock(&wl->mutex); if (unlikely(b43legacy_status(dev) < B43legacy_STAT_INITIALIZED)) { @@ -60,71 +59,64 @@ static void b43legacy_rfkill_poll(struct input_polled_dev *poll_dev) enabled = b43legacy_is_hw_radio_enabled(dev); if (unlikely(enabled != dev->radio_hw_enable)) { dev->radio_hw_enable = enabled; - report_change = 1; b43legacyinfo(wl, "Radio hardware status changed to %s\n", enabled ? "ENABLED" : "DISABLED"); + enabled = !rfkill_set_hw_state(rfkill, !enabled); + if (enabled != dev->phy.radio_on) { + if (enabled) + b43legacy_radio_turn_on(dev); + else + b43legacy_radio_turn_off(dev, 0); + } } mutex_unlock(&wl->mutex); - - /* send the radio switch event to the system - note both a key press - * and a release are required */ - if (unlikely(report_change)) { - input_report_key(poll_dev->input, KEY_WLAN, 1); - input_report_key(poll_dev->input, KEY_WLAN, 0); - } } /* Called when the RFKILL toggled in software. * This is called without locking. */ -static int b43legacy_rfkill_soft_toggle(void *data, enum rfkill_state state) +static int b43legacy_rfkill_soft_set(void *data, bool blocked) { struct b43legacy_wldev *dev = data; struct b43legacy_wl *wl = dev->wl; - int err = -EBUSY; + int ret = -EINVAL; if (!wl->rfkill.registered) - return 0; + return -EINVAL; mutex_lock(&wl->mutex); if (b43legacy_status(dev) < B43legacy_STAT_INITIALIZED) goto out_unlock; - err = 0; - switch (state) { - case RFKILL_STATE_UNBLOCKED: - if (!dev->radio_hw_enable) { - /* No luck. We can't toggle the hardware RF-kill - * button from software. */ - err = -EBUSY; - goto out_unlock; - } - if (!dev->phy.radio_on) + + if (!dev->radio_hw_enable) + goto out_unlock; + + if (!blocked != dev->phy.radio_on) { + if (!blocked) b43legacy_radio_turn_on(dev); - break; - case RFKILL_STATE_SOFT_BLOCKED: - if (dev->phy.radio_on) + else b43legacy_radio_turn_off(dev, 0); - break; - default: - b43legacywarn(wl, "Received unexpected rfkill state %d.\n", - state); - break; } + ret = 0; out_unlock: mutex_unlock(&wl->mutex); - - return err; + return ret; } -char *b43legacy_rfkill_led_name(struct b43legacy_wldev *dev) +const char *b43legacy_rfkill_led_name(struct b43legacy_wldev *dev) { struct b43legacy_rfkill *rfk = &(dev->wl->rfkill); if (!rfk->registered) return NULL; - return rfkill_get_led_name(rfk->rfkill); + return rfkill_get_led_trigger_name(rfk->rfkill); } +static const struct rfkill_ops b43legacy_rfkill_ops = { + .set_block = b43legacy_rfkill_soft_set, + .poll = b43legacy_rfkill_poll, +}; + void b43legacy_rfkill_init(struct b43legacy_wldev *dev) { struct b43legacy_wl *wl = dev->wl; @@ -133,60 +125,25 @@ void b43legacy_rfkill_init(struct b43legacy_wldev *dev) rfk->registered = 0; - rfk->rfkill = rfkill_allocate(dev->dev->dev, RFKILL_TYPE_WLAN); - if (!rfk->rfkill) - goto out_error; snprintf(rfk->name, sizeof(rfk->name), "b43legacy-%s", wiphy_name(wl->hw->wiphy)); - rfk->rfkill->name = rfk->name; - rfk->rfkill->state = RFKILL_STATE_UNBLOCKED; - rfk->rfkill->data = dev; - rfk->rfkill->toggle_radio = b43legacy_rfkill_soft_toggle; - - rfk->poll_dev = input_allocate_polled_device(); - if (!rfk->poll_dev) { - rfkill_free(rfk->rfkill); - goto err_freed_rfk; - } - - rfk->poll_dev->private = dev; - rfk->poll_dev->poll = b43legacy_rfkill_poll; - rfk->poll_dev->poll_interval = 1000; /* msecs */ - - rfk->poll_dev->input->name = rfk->name; - rfk->poll_dev->input->id.bustype = BUS_HOST; - rfk->poll_dev->input->id.vendor = dev->dev->bus->boardinfo.vendor; - rfk->poll_dev->input->evbit[0] = BIT(EV_KEY); - set_bit(KEY_WLAN, rfk->poll_dev->input->keybit); + rfk->rfkill = rfkill_alloc(rfk->name, + dev->dev->dev, + RFKILL_TYPE_WLAN, + &b43legacy_rfkill_ops, dev); + if (!rfk->rfkill) + goto out_error; err = rfkill_register(rfk->rfkill); if (err) - goto err_free_polldev; - -#ifdef CONFIG_RFKILL_INPUT_MODULE - /* B43legacy RF-kill isn't useful without the rfkill-input subsystem. - * Try to load the module. */ - err = request_module("rfkill-input"); - if (err) - b43legacywarn(wl, "Failed to load the rfkill-input module." - "The built-in radio LED will not work.\n"); -#endif /* CONFIG_RFKILL_INPUT */ - - err = input_register_polled_device(rfk->poll_dev); - if (err) - goto err_unreg_rfk; + goto err_free; rfk->registered = 1; return; -err_unreg_rfk: - rfkill_unregister(rfk->rfkill); -err_free_polldev: - input_free_polled_device(rfk->poll_dev); - rfk->poll_dev = NULL; -err_freed_rfk: - rfk->rfkill = NULL; -out_error: + err_free: + rfkill_destroy(rfk->rfkill); + out_error: rfk->registered = 0; b43legacywarn(wl, "RF-kill button init failed\n"); } @@ -199,10 +156,8 @@ void b43legacy_rfkill_exit(struct b43legacy_wldev *dev) return; rfk->registered = 0; - input_unregister_polled_device(rfk->poll_dev); rfkill_unregister(rfk->rfkill); - input_free_polled_device(rfk->poll_dev); - rfk->poll_dev = NULL; + rfkill_destroy(rfk->rfkill); rfk->rfkill = NULL; } diff --git a/drivers/net/wireless/b43legacy/rfkill.h b/drivers/net/wireless/b43legacy/rfkill.h index 11150a8032f..adffc503a6a 100644 --- a/drivers/net/wireless/b43legacy/rfkill.h +++ b/drivers/net/wireless/b43legacy/rfkill.h @@ -6,16 +6,12 @@ struct b43legacy_wldev; #ifdef CONFIG_B43LEGACY_RFKILL #include -#include -#include struct b43legacy_rfkill { /* The RFKILL subsystem data structure */ struct rfkill *rfkill; - /* The poll device for the RFKILL input button */ - struct input_polled_dev *poll_dev; /* Did initialization succeed? Used for freeing. */ bool registered; /* The unique name of this rfkill switch */ @@ -27,7 +23,7 @@ struct b43legacy_rfkill { void b43legacy_rfkill_init(struct b43legacy_wldev *dev); void b43legacy_rfkill_exit(struct b43legacy_wldev *dev); -char *b43legacy_rfkill_led_name(struct b43legacy_wldev *dev); +const char *b43legacy_rfkill_led_name(struct b43legacy_wldev *dev); #else /* CONFIG_B43LEGACY_RFKILL */ diff --git a/drivers/net/wireless/iwlwifi/Kconfig b/drivers/net/wireless/iwlwifi/Kconfig index 8304f6406a1..6fe259fcfb8 100644 --- a/drivers/net/wireless/iwlwifi/Kconfig +++ b/drivers/net/wireless/iwlwifi/Kconfig @@ -5,15 +5,14 @@ config IWLWIFI select FW_LOADER select MAC80211_LEDS if IWLWIFI_LEDS select LEDS_CLASS if IWLWIFI_LEDS - select RFKILL if IWLWIFI_RFKILL config IWLWIFI_LEDS bool "Enable LED support in iwlagn and iwl3945 drivers" depends on IWLWIFI config IWLWIFI_RFKILL - bool "Enable RF kill support in iwlagn and iwl3945 drivers" - depends on IWLWIFI + def_bool y + depends on IWLWIFI && RFKILL config IWLWIFI_SPECTRUM_MEASUREMENT bool "Enable Spectrum Measurement in iwlagn driver" diff --git a/drivers/net/wireless/iwlwifi/iwl-rfkill.c b/drivers/net/wireless/iwlwifi/iwl-rfkill.c index 65605ad44e4..13149936fd2 100644 --- a/drivers/net/wireless/iwlwifi/iwl-rfkill.c +++ b/drivers/net/wireless/iwlwifi/iwl-rfkill.c @@ -36,42 +36,37 @@ #include "iwl-core.h" /* software rf-kill from user */ -static int iwl_rfkill_soft_rf_kill(void *data, enum rfkill_state state) +static int iwl_rfkill_soft_rf_kill(void *data, bool blocked) { struct iwl_priv *priv = data; - int err = 0; if (!priv->rfkill) - return 0; + return -EINVAL; if (test_bit(STATUS_EXIT_PENDING, &priv->status)) return 0; - IWL_DEBUG_RF_KILL(priv, "we received soft RFKILL set to state %d\n", state); + IWL_DEBUG_RF_KILL(priv, "received soft RFKILL: block=%d\n", blocked); + mutex_lock(&priv->mutex); - switch (state) { - case RFKILL_STATE_UNBLOCKED: - if (iwl_is_rfkill_hw(priv)) { - err = -EBUSY; - goto out_unlock; - } + if (iwl_is_rfkill_hw(priv)) + goto out_unlock; + + if (!blocked) iwl_radio_kill_sw_enable_radio(priv); - break; - case RFKILL_STATE_SOFT_BLOCKED: + else iwl_radio_kill_sw_disable_radio(priv); - break; - default: - IWL_WARN(priv, "we received unexpected RFKILL state %d\n", - state); - break; - } + out_unlock: mutex_unlock(&priv->mutex); - - return err; + return 0; } +static const struct rfkill_ops iwl_rfkill_ops = { + .set_block = iwl_rfkill_soft_rf_kill, +}; + int iwl_rfkill_init(struct iwl_priv *priv) { struct device *device = wiphy_dev(priv->hw->wiphy); @@ -80,21 +75,16 @@ int iwl_rfkill_init(struct iwl_priv *priv) BUG_ON(device == NULL); IWL_DEBUG_RF_KILL(priv, "Initializing RFKILL.\n"); - priv->rfkill = rfkill_allocate(device, RFKILL_TYPE_WLAN); + priv->rfkill = rfkill_alloc(priv->cfg->name, + device, + RFKILL_TYPE_WLAN, + &iwl_rfkill_ops, priv); if (!priv->rfkill) { IWL_ERR(priv, "Unable to allocate RFKILL device.\n"); ret = -ENOMEM; goto error; } - priv->rfkill->name = priv->cfg->name; - priv->rfkill->data = priv; - priv->rfkill->state = RFKILL_STATE_UNBLOCKED; - priv->rfkill->toggle_radio = iwl_rfkill_soft_rf_kill; - - priv->rfkill->dev.class->suspend = NULL; - priv->rfkill->dev.class->resume = NULL; - ret = rfkill_register(priv->rfkill); if (ret) { IWL_ERR(priv, "Unable to register RFKILL: %d\n", ret); @@ -102,11 +92,10 @@ int iwl_rfkill_init(struct iwl_priv *priv) } IWL_DEBUG_RF_KILL(priv, "RFKILL initialization complete.\n"); - return ret; + return 0; free_rfkill: - if (priv->rfkill != NULL) - rfkill_free(priv->rfkill); + rfkill_destroy(priv->rfkill); priv->rfkill = NULL; error: @@ -118,8 +107,10 @@ EXPORT_SYMBOL(iwl_rfkill_init); void iwl_rfkill_unregister(struct iwl_priv *priv) { - if (priv->rfkill) + if (priv->rfkill) { rfkill_unregister(priv->rfkill); + rfkill_destroy(priv->rfkill); + } priv->rfkill = NULL; } @@ -131,14 +122,10 @@ void iwl_rfkill_set_hw_state(struct iwl_priv *priv) if (!priv->rfkill) return; - if (iwl_is_rfkill_hw(priv)) { - rfkill_force_state(priv->rfkill, RFKILL_STATE_HARD_BLOCKED); - return; - } - - if (!iwl_is_rfkill_sw(priv)) - rfkill_force_state(priv->rfkill, RFKILL_STATE_UNBLOCKED); + if (rfkill_set_hw_state(priv->rfkill, + !!iwl_is_rfkill_hw(priv))) + iwl_radio_kill_sw_disable_radio(priv); else - rfkill_force_state(priv->rfkill, RFKILL_STATE_SOFT_BLOCKED); + iwl_radio_kill_sw_enable_radio(priv); } EXPORT_SYMBOL(iwl_rfkill_set_hw_state); diff --git a/drivers/net/wireless/iwmc3200wifi/rfkill.c b/drivers/net/wireless/iwmc3200wifi/rfkill.c index 4ca8b495f82..8ee2c3c09a0 100644 --- a/drivers/net/wireless/iwmc3200wifi/rfkill.c +++ b/drivers/net/wireless/iwmc3200wifi/rfkill.c @@ -25,47 +25,42 @@ #include "iwm.h" -static int iwm_rfkill_soft_toggle(void *data, enum rfkill_state state) +static int iwm_rfkill_set_block(void *data, bool blocked) { struct iwm_priv *iwm = data; - switch (state) { - case RFKILL_STATE_UNBLOCKED: + if (!blocked) { if (test_bit(IWM_RADIO_RFKILL_HW, &iwm->radio)) return -EBUSY; if (test_and_clear_bit(IWM_RADIO_RFKILL_SW, &iwm->radio) && (iwm_to_ndev(iwm)->flags & IFF_UP)) - iwm_up(iwm); - - break; - case RFKILL_STATE_SOFT_BLOCKED: + return iwm_up(iwm); + } else { if (!test_and_set_bit(IWM_RADIO_RFKILL_SW, &iwm->radio)) - iwm_down(iwm); - - break; - default: - break; + return iwm_down(iwm); } return 0; } +static const struct rfkill_ops iwm_rfkill_ops = { + .set_block = iwm_rfkill_set_block, +}; + int iwm_rfkill_init(struct iwm_priv *iwm) { int ret; - iwm->rfkill = rfkill_allocate(iwm_to_dev(iwm), RFKILL_TYPE_WLAN); + iwm->rfkill = rfkill_alloc(KBUILD_MODNAME, + iwm_to_dev(iwm), + RFKILL_TYPE_WLAN, + &iwm_rfkill_ops, iwm); if (!iwm->rfkill) { IWM_ERR(iwm, "Unable to allocate rfkill device\n"); return -ENOMEM; } - iwm->rfkill->name = KBUILD_MODNAME; - iwm->rfkill->data = iwm; - iwm->rfkill->state = RFKILL_STATE_UNBLOCKED; - iwm->rfkill->toggle_radio = iwm_rfkill_soft_toggle; - ret = rfkill_register(iwm->rfkill); if (ret) { IWM_ERR(iwm, "Failed to register rfkill device\n"); @@ -74,15 +69,15 @@ int iwm_rfkill_init(struct iwm_priv *iwm) return 0; fail: - rfkill_free(iwm->rfkill); + rfkill_destroy(iwm->rfkill); return ret; } void iwm_rfkill_exit(struct iwm_priv *iwm) { - if (iwm->rfkill) + if (iwm->rfkill) { rfkill_unregister(iwm->rfkill); - - rfkill_free(iwm->rfkill); + rfkill_destroy(iwm->rfkill); + } iwm->rfkill = NULL; } diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 284ebaca6e4..c682ac53641 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -21,7 +21,7 @@ config ACER_WMI depends on NEW_LEDS depends on BACKLIGHT_CLASS_DEVICE depends on SERIO_I8042 - depends on RFKILL + depends on RFKILL || RFKILL = n select ACPI_WMI ---help--- This is a driver for newer Acer (and Wistron) laptops. It adds @@ -60,7 +60,7 @@ config DELL_LAPTOP depends on DCDBAS depends on EXPERIMENTAL depends on BACKLIGHT_CLASS_DEVICE - depends on RFKILL + depends on RFKILL || RFKILL = n depends on POWER_SUPPLY default n ---help--- @@ -117,7 +117,7 @@ config HP_WMI tristate "HP WMI extras" depends on ACPI_WMI depends on INPUT - depends on RFKILL + depends on RFKILL || RFKILL = n help Say Y here if you want to support WMI-based hotkeys on HP laptops and to read data from WMI such as docking or ambient light sensor state. @@ -196,14 +196,13 @@ config THINKPAD_ACPI tristate "ThinkPad ACPI Laptop Extras" depends on ACPI depends on INPUT + depends on RFKILL || RFKILL = n select BACKLIGHT_LCD_SUPPORT select BACKLIGHT_CLASS_DEVICE select HWMON select NVRAM select NEW_LEDS select LEDS_CLASS - select NET - select RFKILL ---help--- This is a driver for the IBM and Lenovo ThinkPad laptops. It adds support for Fn-Fx key combinations, Bluetooth control, video @@ -338,9 +337,9 @@ config EEEPC_LAPTOP depends on ACPI depends on INPUT depends on EXPERIMENTAL + depends on RFKILL || RFKILL = n select BACKLIGHT_CLASS_DEVICE select HWMON - select RFKILL ---help--- This driver supports the Fn-Fx keys on Eee PC laptops. It also adds the ability to switch camera/wlan on/off. @@ -405,9 +404,8 @@ config ACPI_TOSHIBA tristate "Toshiba Laptop Extras" depends on ACPI depends on INPUT + depends on RFKILL || RFKILL = n select INPUT_POLLDEV - select NET - select RFKILL select BACKLIGHT_CLASS_DEVICE ---help--- This driver adds support for access to certain system settings diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c index 62d02b3c998..b618fa51db2 100644 --- a/drivers/platform/x86/acer-wmi.c +++ b/drivers/platform/x86/acer-wmi.c @@ -958,58 +958,50 @@ static void acer_rfkill_update(struct work_struct *ignored) status = get_u32(&state, ACER_CAP_WIRELESS); if (ACPI_SUCCESS(status)) - rfkill_force_state(wireless_rfkill, state ? - RFKILL_STATE_UNBLOCKED : RFKILL_STATE_SOFT_BLOCKED); + rfkill_set_sw_state(wireless_rfkill, !!state); if (has_cap(ACER_CAP_BLUETOOTH)) { status = get_u32(&state, ACER_CAP_BLUETOOTH); if (ACPI_SUCCESS(status)) - rfkill_force_state(bluetooth_rfkill, state ? - RFKILL_STATE_UNBLOCKED : - RFKILL_STATE_SOFT_BLOCKED); + rfkill_set_sw_state(bluetooth_rfkill, !!state); } schedule_delayed_work(&acer_rfkill_work, round_jiffies_relative(HZ)); } -static int acer_rfkill_set(void *data, enum rfkill_state state) +static int acer_rfkill_set(void *data, bool blocked) { acpi_status status; - u32 *cap = data; - status = set_u32((u32) (state == RFKILL_STATE_UNBLOCKED), *cap); + u32 cap = (unsigned long)data; + status = set_u32(!!blocked, cap); if (ACPI_FAILURE(status)) return -ENODEV; return 0; } -static struct rfkill * acer_rfkill_register(struct device *dev, -enum rfkill_type type, char *name, u32 cap) +static const struct rfkill_ops acer_rfkill_ops = { + .set_block = acer_rfkill_set, +}; + +static struct rfkill *acer_rfkill_register(struct device *dev, + enum rfkill_type type, + char *name, u32 cap) { int err; u32 state; - u32 *data; struct rfkill *rfkill_dev; - rfkill_dev = rfkill_allocate(dev, type); + rfkill_dev = rfkill_alloc(name, dev, type, + &acer_rfkill_ops, + (void *)(unsigned long)cap); if (!rfkill_dev) return ERR_PTR(-ENOMEM); - rfkill_dev->name = name; get_u32(&state, cap); - rfkill_dev->state = state ? RFKILL_STATE_UNBLOCKED : - RFKILL_STATE_SOFT_BLOCKED; - data = kzalloc(sizeof(u32), GFP_KERNEL); - if (!data) { - rfkill_free(rfkill_dev); - return ERR_PTR(-ENOMEM); - } - *data = cap; - rfkill_dev->data = data; - rfkill_dev->toggle_radio = acer_rfkill_set; + rfkill_set_sw_state(rfkill_dev, !state); err = rfkill_register(rfkill_dev); if (err) { - kfree(rfkill_dev->data); - rfkill_free(rfkill_dev); + rfkill_destroy(rfkill_dev); return ERR_PTR(err); } return rfkill_dev; @@ -1027,8 +1019,8 @@ static int acer_rfkill_init(struct device *dev) RFKILL_TYPE_BLUETOOTH, "acer-bluetooth", ACER_CAP_BLUETOOTH); if (IS_ERR(bluetooth_rfkill)) { - kfree(wireless_rfkill->data); rfkill_unregister(wireless_rfkill); + rfkill_destroy(wireless_rfkill); return PTR_ERR(bluetooth_rfkill); } } @@ -1041,11 +1033,13 @@ static int acer_rfkill_init(struct device *dev) static void acer_rfkill_exit(void) { cancel_delayed_work_sync(&acer_rfkill_work); - kfree(wireless_rfkill->data); + rfkill_unregister(wireless_rfkill); + rfkill_destroy(wireless_rfkill); + if (has_cap(ACER_CAP_BLUETOOTH)) { - kfree(bluetooth_rfkill->data); rfkill_unregister(bluetooth_rfkill); + rfkill_destroy(bluetooth_rfkill); } return; } diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c index af9f4302117..2faf0e14f05 100644 --- a/drivers/platform/x86/dell-laptop.c +++ b/drivers/platform/x86/dell-laptop.c @@ -174,10 +174,11 @@ dell_send_request(struct calling_interface_buffer *buffer, int class, result[3]: NVRAM format version number */ -static int dell_rfkill_set(int radio, enum rfkill_state state) +static int dell_rfkill_set(void *data, bool blocked) { struct calling_interface_buffer buffer; - int disable = (state == RFKILL_STATE_UNBLOCKED) ? 0 : 1; + int disable = blocked ? 0 : 1; + unsigned long radio = (unsigned long)data; memset(&buffer, 0, sizeof(struct calling_interface_buffer)); buffer.input[0] = (1 | (radio<<8) | (disable << 16)); @@ -186,56 +187,24 @@ static int dell_rfkill_set(int radio, enum rfkill_state state) return 0; } -static int dell_wifi_set(void *data, enum rfkill_state state) -{ - return dell_rfkill_set(1, state); -} - -static int dell_bluetooth_set(void *data, enum rfkill_state state) -{ - return dell_rfkill_set(2, state); -} - -static int dell_wwan_set(void *data, enum rfkill_state state) -{ - return dell_rfkill_set(3, state); -} - -static int dell_rfkill_get(int bit, enum rfkill_state *state) +static void dell_rfkill_query(struct rfkill *rfkill, void *data) { struct calling_interface_buffer buffer; int status; - int new_state = RFKILL_STATE_HARD_BLOCKED; + int bit = (unsigned long)data + 16; memset(&buffer, 0, sizeof(struct calling_interface_buffer)); dell_send_request(&buffer, 17, 11); status = buffer.output[1]; - if (status & (1<<16)) - new_state = RFKILL_STATE_SOFT_BLOCKED; - - if (status & (1<name = "dell-wifi"; - wifi_rfkill->toggle_radio = dell_wifi_set; - wifi_rfkill->get_state = dell_wifi_get; + } ret = rfkill_register(wifi_rfkill); if (ret) goto err_wifi; } if ((status & (1<<3|1<<9)) == (1<<3|1<<9)) { - bluetooth_rfkill = rfkill_allocate(NULL, RFKILL_TYPE_BLUETOOTH); - if (!bluetooth_rfkill) + bluetooth_rfkill = rfkill_alloc("dell-bluetooth", NULL, + RFKILL_TYPE_BLUETOOTH, + &dell_rfkill_ops, (void *) 2); + if (!bluetooth_rfkill) { + ret = -ENOMEM; goto err_bluetooth; - bluetooth_rfkill->name = "dell-bluetooth"; - bluetooth_rfkill->toggle_radio = dell_bluetooth_set; - bluetooth_rfkill->get_state = dell_bluetooth_get; + } ret = rfkill_register(bluetooth_rfkill); if (ret) goto err_bluetooth; } if ((status & (1<<4|1<<10)) == (1<<4|1<<10)) { - wwan_rfkill = rfkill_allocate(NULL, RFKILL_TYPE_WWAN); - if (!wwan_rfkill) + wwan_rfkill = rfkill_alloc("dell-wwan", NULL, RFKILL_TYPE_WWAN, + &dell_rfkill_ops, (void *) 3); + if (!wwan_rfkill) { + ret = -ENOMEM; goto err_wwan; - wwan_rfkill->name = "dell-wwan"; - wwan_rfkill->toggle_radio = dell_wwan_set; - wwan_rfkill->get_state = dell_wwan_get; + } ret = rfkill_register(wwan_rfkill); if (ret) goto err_wwan; @@ -285,22 +255,15 @@ static int dell_setup_rfkill(void) return 0; err_wwan: - if (wwan_rfkill) - rfkill_free(wwan_rfkill); - if (bluetooth_rfkill) { + rfkill_destroy(wwan_rfkill); + if (bluetooth_rfkill) rfkill_unregister(bluetooth_rfkill); - bluetooth_rfkill = NULL; - } err_bluetooth: - if (bluetooth_rfkill) - rfkill_free(bluetooth_rfkill); - if (wifi_rfkill) { + rfkill_destroy(bluetooth_rfkill); + if (wifi_rfkill) rfkill_unregister(wifi_rfkill); - wifi_rfkill = NULL; - } err_wifi: - if (wifi_rfkill) - rfkill_free(wifi_rfkill); + rfkill_destroy(wifi_rfkill); return ret; } diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c index 353a898c369..1208d0cedd1 100644 --- a/drivers/platform/x86/eeepc-laptop.c +++ b/drivers/platform/x86/eeepc-laptop.c @@ -299,39 +299,22 @@ static int update_bl_status(struct backlight_device *bd) * Rfkill helpers */ -static int eeepc_wlan_rfkill_set(void *data, enum rfkill_state state) -{ - if (state == RFKILL_STATE_SOFT_BLOCKED) - return set_acpi(CM_ASL_WLAN, 0); - else - return set_acpi(CM_ASL_WLAN, 1); -} - -static int eeepc_wlan_rfkill_state(void *data, enum rfkill_state *state) +static bool eeepc_wlan_rfkill_blocked(void) { if (get_acpi(CM_ASL_WLAN) == 1) - *state = RFKILL_STATE_UNBLOCKED; - else - *state = RFKILL_STATE_SOFT_BLOCKED; - return 0; + return false; + return true; } -static int eeepc_bluetooth_rfkill_set(void *data, enum rfkill_state state) +static int eeepc_rfkill_set(void *data, bool blocked) { - if (state == RFKILL_STATE_SOFT_BLOCKED) - return set_acpi(CM_ASL_BLUETOOTH, 0); - else - return set_acpi(CM_ASL_BLUETOOTH, 1); + unsigned long asl = (unsigned long)data; + return set_acpi(asl, !blocked); } -static int eeepc_bluetooth_rfkill_state(void *data, enum rfkill_state *state) -{ - if (get_acpi(CM_ASL_BLUETOOTH) == 1) - *state = RFKILL_STATE_UNBLOCKED; - else - *state = RFKILL_STATE_SOFT_BLOCKED; - return 0; -} +static const struct rfkill_ops eeepc_rfkill_ops = { + .set_block = eeepc_rfkill_set, +}; /* * Sys helpers @@ -531,9 +514,9 @@ static int notify_brn(void) static void eeepc_rfkill_notify(acpi_handle handle, u32 event, void *data) { - enum rfkill_state state; struct pci_dev *dev; struct pci_bus *bus = pci_find_bus(0, 1); + bool blocked; if (event != ACPI_NOTIFY_BUS_CHECK) return; @@ -543,9 +526,8 @@ static void eeepc_rfkill_notify(acpi_handle handle, u32 event, void *data) return; } - eeepc_wlan_rfkill_state(ehotk->eeepc_wlan_rfkill, &state); - - if (state == RFKILL_STATE_UNBLOCKED) { + blocked = eeepc_wlan_rfkill_blocked(); + if (!blocked) { dev = pci_get_slot(bus, 0); if (dev) { /* Device already present */ @@ -566,7 +548,7 @@ static void eeepc_rfkill_notify(acpi_handle handle, u32 event, void *data) } } - rfkill_force_state(ehotk->eeepc_wlan_rfkill, state); + rfkill_set_sw_state(ehotk->eeepc_wlan_rfkill, blocked); } static void eeepc_hotk_notify(acpi_handle handle, u32 event, void *data) @@ -684,26 +666,17 @@ static int eeepc_hotk_add(struct acpi_device *device) eeepc_register_rfkill_notifier("\\_SB.PCI0.P0P7"); if (get_acpi(CM_ASL_WLAN) != -1) { - ehotk->eeepc_wlan_rfkill = rfkill_allocate(&device->dev, - RFKILL_TYPE_WLAN); + ehotk->eeepc_wlan_rfkill = rfkill_alloc("eeepc-wlan", + &device->dev, + RFKILL_TYPE_WLAN, + &eeepc_rfkill_ops, + (void *)CM_ASL_WLAN); if (!ehotk->eeepc_wlan_rfkill) goto wlan_fail; - ehotk->eeepc_wlan_rfkill->name = "eeepc-wlan"; - ehotk->eeepc_wlan_rfkill->toggle_radio = eeepc_wlan_rfkill_set; - ehotk->eeepc_wlan_rfkill->get_state = eeepc_wlan_rfkill_state; - if (get_acpi(CM_ASL_WLAN) == 1) { - ehotk->eeepc_wlan_rfkill->state = - RFKILL_STATE_UNBLOCKED; - rfkill_set_default(RFKILL_TYPE_WLAN, - RFKILL_STATE_UNBLOCKED); - } else { - ehotk->eeepc_wlan_rfkill->state = - RFKILL_STATE_SOFT_BLOCKED; - rfkill_set_default(RFKILL_TYPE_WLAN, - RFKILL_STATE_SOFT_BLOCKED); - } + rfkill_set_global_sw_state(RFKILL_TYPE_WLAN, + get_acpi(CM_ASL_WLAN) != 1); result = rfkill_register(ehotk->eeepc_wlan_rfkill); if (result) goto wlan_fail; @@ -711,28 +684,17 @@ static int eeepc_hotk_add(struct acpi_device *device) if (get_acpi(CM_ASL_BLUETOOTH) != -1) { ehotk->eeepc_bluetooth_rfkill = - rfkill_allocate(&device->dev, RFKILL_TYPE_BLUETOOTH); + rfkill_alloc("eeepc-bluetooth", + &device->dev, + RFKILL_TYPE_BLUETOOTH, + &eeepc_rfkill_ops, + (void *)CM_ASL_BLUETOOTH); if (!ehotk->eeepc_bluetooth_rfkill) goto bluetooth_fail; - ehotk->eeepc_bluetooth_rfkill->name = "eeepc-bluetooth"; - ehotk->eeepc_bluetooth_rfkill->toggle_radio = - eeepc_bluetooth_rfkill_set; - ehotk->eeepc_bluetooth_rfkill->get_state = - eeepc_bluetooth_rfkill_state; - if (get_acpi(CM_ASL_BLUETOOTH) == 1) { - ehotk->eeepc_bluetooth_rfkill->state = - RFKILL_STATE_UNBLOCKED; - rfkill_set_default(RFKILL_TYPE_BLUETOOTH, - RFKILL_STATE_UNBLOCKED); - } else { - ehotk->eeepc_bluetooth_rfkill->state = - RFKILL_STATE_SOFT_BLOCKED; - rfkill_set_default(RFKILL_TYPE_BLUETOOTH, - RFKILL_STATE_SOFT_BLOCKED); - } - + rfkill_set_global_sw_state(RFKILL_TYPE_BLUETOOTH, + get_acpi(CM_ASL_BLUETOOTH) != 1); result = rfkill_register(ehotk->eeepc_bluetooth_rfkill); if (result) goto bluetooth_fail; @@ -741,13 +703,10 @@ static int eeepc_hotk_add(struct acpi_device *device) return 0; bluetooth_fail: - if (ehotk->eeepc_bluetooth_rfkill) - rfkill_free(ehotk->eeepc_bluetooth_rfkill); + rfkill_destroy(ehotk->eeepc_bluetooth_rfkill); rfkill_unregister(ehotk->eeepc_wlan_rfkill); - ehotk->eeepc_wlan_rfkill = NULL; wlan_fail: - if (ehotk->eeepc_wlan_rfkill) - rfkill_free(ehotk->eeepc_wlan_rfkill); + rfkill_destroy(ehotk->eeepc_wlan_rfkill); eeepc_unregister_rfkill_notifier("\\_SB.PCI0.P0P6"); eeepc_unregister_rfkill_notifier("\\_SB.PCI0.P0P7"); ehotk_fail: diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index fe171fad12c..8d931145cbf 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -154,58 +154,46 @@ static int hp_wmi_dock_state(void) return hp_wmi_perform_query(HPWMI_DOCK_QUERY, 0, 0); } -static int hp_wmi_wifi_set(void *data, enum rfkill_state state) +static int hp_wmi_set_block(void *data, bool blocked) { - if (state) - return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x101); - else - return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x100); -} + unsigned long b = (unsigned long) data; + int query = BIT(b + 8) | ((!!blocked) << b); -static int hp_wmi_bluetooth_set(void *data, enum rfkill_state state) -{ - if (state) - return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x202); - else - return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x200); + return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, query); } -static int hp_wmi_wwan_set(void *data, enum rfkill_state state) -{ - if (state) - return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x404); - else - return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x400); -} +static const struct rfkill_ops hp_wmi_rfkill_ops = { + .set_block = hp_wmi_set_block, +}; -static int hp_wmi_wifi_state(void) +static bool hp_wmi_wifi_state(void) { int wireless = hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 0, 0); if (wireless & 0x100) - return RFKILL_STATE_UNBLOCKED; + return false; else - return RFKILL_STATE_SOFT_BLOCKED; + return true; } -static int hp_wmi_bluetooth_state(void) +static bool hp_wmi_bluetooth_state(void) { int wireless = hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 0, 0); if (wireless & 0x10000) - return RFKILL_STATE_UNBLOCKED; + return false; else - return RFKILL_STATE_SOFT_BLOCKED; + return true; } -static int hp_wmi_wwan_state(void) +static bool hp_wmi_wwan_state(void) { int wireless = hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 0, 0); if (wireless & 0x1000000) - return RFKILL_STATE_UNBLOCKED; + return false; else - return RFKILL_STATE_SOFT_BLOCKED; + return true; } static ssize_t show_display(struct device *dev, struct device_attribute *attr, @@ -347,14 +335,14 @@ static void hp_wmi_notify(u32 value, void *context) } } else if (eventcode == 0x5) { if (wifi_rfkill) - rfkill_force_state(wifi_rfkill, - hp_wmi_wifi_state()); + rfkill_set_sw_state(wifi_rfkill, + hp_wmi_wifi_state()); if (bluetooth_rfkill) - rfkill_force_state(bluetooth_rfkill, - hp_wmi_bluetooth_state()); + rfkill_set_sw_state(bluetooth_rfkill, + hp_wmi_bluetooth_state()); if (wwan_rfkill) - rfkill_force_state(wwan_rfkill, - hp_wmi_wwan_state()); + rfkill_set_sw_state(wwan_rfkill, + hp_wmi_wwan_state()); } else printk(KERN_INFO "HP WMI: Unknown key pressed - %x\n", eventcode); @@ -430,31 +418,34 @@ static int __init hp_wmi_bios_setup(struct platform_device *device) goto add_sysfs_error; if (wireless & 0x1) { - wifi_rfkill = rfkill_allocate(&device->dev, RFKILL_TYPE_WLAN); - wifi_rfkill->name = "hp-wifi"; - wifi_rfkill->state = hp_wmi_wifi_state(); - wifi_rfkill->toggle_radio = hp_wmi_wifi_set; + wifi_rfkill = rfkill_alloc("hp-wifi", &device->dev, + RFKILL_TYPE_WLAN, + &hp_wmi_rfkill_ops, + (void *) 0); + rfkill_set_sw_state(wifi_rfkill, hp_wmi_wifi_state()); err = rfkill_register(wifi_rfkill); if (err) - goto add_sysfs_error; + goto register_wifi_error; } if (wireless & 0x2) { - bluetooth_rfkill = rfkill_allocate(&device->dev, - RFKILL_TYPE_BLUETOOTH); - bluetooth_rfkill->name = "hp-bluetooth"; - bluetooth_rfkill->state = hp_wmi_bluetooth_state(); - bluetooth_rfkill->toggle_radio = hp_wmi_bluetooth_set; + bluetooth_rfkill = rfkill_alloc("hp-bluetooth", &device->dev, + RFKILL_TYPE_BLUETOOTH, + &hp_wmi_rfkill_ops, + (void *) 1); + rfkill_set_sw_state(bluetooth_rfkill, + hp_wmi_bluetooth_state()); err = rfkill_register(bluetooth_rfkill); if (err) goto register_bluetooth_error; } if (wireless & 0x4) { - wwan_rfkill = rfkill_allocate(&device->dev, RFKILL_TYPE_WWAN); - wwan_rfkill->name = "hp-wwan"; - wwan_rfkill->state = hp_wmi_wwan_state(); - wwan_rfkill->toggle_radio = hp_wmi_wwan_set; + wwan_rfkill = rfkill_alloc("hp-wwan", &device->dev, + RFKILL_TYPE_WWAN, + &hp_wmi_rfkill_ops, + (void *) 2); + rfkill_set_sw_state(wwan_rfkill, hp_wmi_wwan_state()); err = rfkill_register(wwan_rfkill); if (err) goto register_wwan_err; @@ -462,11 +453,15 @@ static int __init hp_wmi_bios_setup(struct platform_device *device) return 0; register_wwan_err: + rfkill_destroy(wwan_rfkill); if (bluetooth_rfkill) rfkill_unregister(bluetooth_rfkill); register_bluetooth_error: + rfkill_destroy(bluetooth_rfkill); if (wifi_rfkill) rfkill_unregister(wifi_rfkill); +register_wifi_error: + rfkill_destroy(wifi_rfkill); add_sysfs_error: cleanup_sysfs(device); return err; @@ -476,12 +471,18 @@ static int __exit hp_wmi_bios_remove(struct platform_device *device) { cleanup_sysfs(device); - if (wifi_rfkill) + if (wifi_rfkill) { rfkill_unregister(wifi_rfkill); - if (bluetooth_rfkill) + rfkill_destroy(wifi_rfkill); + } + if (bluetooth_rfkill) { rfkill_unregister(bluetooth_rfkill); - if (wwan_rfkill) + rfkill_destroy(wifi_rfkill); + } + if (wwan_rfkill) { rfkill_unregister(wwan_rfkill); + rfkill_destroy(wwan_rfkill); + } return 0; } diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c index f1963b05175..aec0b27fd77 100644 --- a/drivers/platform/x86/sony-laptop.c +++ b/drivers/platform/x86/sony-laptop.c @@ -128,11 +128,11 @@ enum sony_nc_rfkill { SONY_BLUETOOTH, SONY_WWAN, SONY_WIMAX, - SONY_RFKILL_MAX, + N_SONY_RFKILL, }; -static struct rfkill *sony_rfkill_devices[SONY_RFKILL_MAX]; -static int sony_rfkill_address[SONY_RFKILL_MAX] = {0x300, 0x500, 0x700, 0x900}; +static struct rfkill *sony_rfkill_devices[N_SONY_RFKILL]; +static int sony_rfkill_address[N_SONY_RFKILL] = {0x300, 0x500, 0x700, 0x900}; static void sony_nc_rfkill_update(void); /*********** Input Devices ***********/ @@ -1051,147 +1051,98 @@ static void sony_nc_rfkill_cleanup(void) { int i; - for (i = 0; i < SONY_RFKILL_MAX; i++) { - if (sony_rfkill_devices[i]) + for (i = 0; i < N_SONY_RFKILL; i++) { + if (sony_rfkill_devices[i]) { rfkill_unregister(sony_rfkill_devices[i]); + rfkill_destroy(sony_rfkill_devices[i]); + } } } -static int sony_nc_rfkill_get(void *data, enum rfkill_state *state) -{ - int result; - int argument = sony_rfkill_address[(long) data]; - - sony_call_snc_handle(0x124, 0x200, &result); - if (result & 0x1) { - sony_call_snc_handle(0x124, argument, &result); - if (result & 0xf) - *state = RFKILL_STATE_UNBLOCKED; - else - *state = RFKILL_STATE_SOFT_BLOCKED; - } else { - *state = RFKILL_STATE_HARD_BLOCKED; - } - - return 0; -} - -static int sony_nc_rfkill_set(void *data, enum rfkill_state state) +static int sony_nc_rfkill_set(void *data, bool blocked) { int result; int argument = sony_rfkill_address[(long) data] + 0x100; - if (state == RFKILL_STATE_UNBLOCKED) + if (!blocked) argument |= 0xff0000; return sony_call_snc_handle(0x124, argument, &result); } -static int sony_nc_setup_wifi_rfkill(struct acpi_device *device) -{ - int err = 0; - struct rfkill *sony_wifi_rfkill; - - sony_wifi_rfkill = rfkill_allocate(&device->dev, RFKILL_TYPE_WLAN); - if (!sony_wifi_rfkill) - return -1; - sony_wifi_rfkill->name = "sony-wifi"; - sony_wifi_rfkill->toggle_radio = sony_nc_rfkill_set; - sony_wifi_rfkill->get_state = sony_nc_rfkill_get; - sony_wifi_rfkill->data = (void *)SONY_WIFI; - err = rfkill_register(sony_wifi_rfkill); - if (err) - rfkill_free(sony_wifi_rfkill); - else { - sony_rfkill_devices[SONY_WIFI] = sony_wifi_rfkill; - sony_nc_rfkill_set(sony_wifi_rfkill->data, - RFKILL_STATE_UNBLOCKED); - } - return err; -} +static const struct rfkill_ops sony_rfkill_ops = { + .set_block = sony_nc_rfkill_set, +}; -static int sony_nc_setup_bluetooth_rfkill(struct acpi_device *device) +static int sony_nc_setup_rfkill(struct acpi_device *device, + enum sony_nc_rfkill nc_type) { int err = 0; - struct rfkill *sony_bluetooth_rfkill; - - sony_bluetooth_rfkill = rfkill_allocate(&device->dev, - RFKILL_TYPE_BLUETOOTH); - if (!sony_bluetooth_rfkill) - return -1; - sony_bluetooth_rfkill->name = "sony-bluetooth"; - sony_bluetooth_rfkill->toggle_radio = sony_nc_rfkill_set; - sony_bluetooth_rfkill->get_state = sony_nc_rfkill_get; - sony_bluetooth_rfkill->data = (void *)SONY_BLUETOOTH; - err = rfkill_register(sony_bluetooth_rfkill); - if (err) - rfkill_free(sony_bluetooth_rfkill); - else { - sony_rfkill_devices[SONY_BLUETOOTH] = sony_bluetooth_rfkill; - sony_nc_rfkill_set(sony_bluetooth_rfkill->data, - RFKILL_STATE_UNBLOCKED); + struct rfkill *rfk; + enum rfkill_type type; + const char *name; + + switch (nc_type) { + case SONY_WIFI: + type = RFKILL_TYPE_WLAN; + name = "sony-wifi"; + break; + case SONY_BLUETOOTH: + type = RFKILL_TYPE_BLUETOOTH; + name = "sony-bluetooth"; + break; + case SONY_WWAN: + type = RFKILL_TYPE_WWAN; + name = "sony-wwan"; + break; + case SONY_WIMAX: + type = RFKILL_TYPE_WIMAX; + name = "sony-wimax"; + break; + default: + return -EINVAL; } - return err; -} -static int sony_nc_setup_wwan_rfkill(struct acpi_device *device) -{ - int err = 0; - struct rfkill *sony_wwan_rfkill; + rfk = rfkill_alloc(name, &device->dev, type, + &sony_rfkill_ops, (void *)nc_type); + if (!rfk) + return -ENOMEM; - sony_wwan_rfkill = rfkill_allocate(&device->dev, RFKILL_TYPE_WWAN); - if (!sony_wwan_rfkill) - return -1; - sony_wwan_rfkill->name = "sony-wwan"; - sony_wwan_rfkill->toggle_radio = sony_nc_rfkill_set; - sony_wwan_rfkill->get_state = sony_nc_rfkill_get; - sony_wwan_rfkill->data = (void *)SONY_WWAN; - err = rfkill_register(sony_wwan_rfkill); - if (err) - rfkill_free(sony_wwan_rfkill); - else { - sony_rfkill_devices[SONY_WWAN] = sony_wwan_rfkill; - sony_nc_rfkill_set(sony_wwan_rfkill->data, - RFKILL_STATE_UNBLOCKED); + err = rfkill_register(rfk); + if (err) { + rfkill_destroy(rfk); + return err; } + sony_rfkill_devices[nc_type] = rfk; + sony_nc_rfkill_set((void *)nc_type, false); return err; } -static int sony_nc_setup_wimax_rfkill(struct acpi_device *device) +static void sony_nc_rfkill_update() { - int err = 0; - struct rfkill *sony_wimax_rfkill; + enum sony_nc_rfkill i; + int result; + bool hwblock; - sony_wimax_rfkill = rfkill_allocate(&device->dev, RFKILL_TYPE_WIMAX); - if (!sony_wimax_rfkill) - return -1; - sony_wimax_rfkill->name = "sony-wimax"; - sony_wimax_rfkill->toggle_radio = sony_nc_rfkill_set; - sony_wimax_rfkill->get_state = sony_nc_rfkill_get; - sony_wimax_rfkill->data = (void *)SONY_WIMAX; - err = rfkill_register(sony_wimax_rfkill); - if (err) - rfkill_free(sony_wimax_rfkill); - else { - sony_rfkill_devices[SONY_WIMAX] = sony_wimax_rfkill; - sony_nc_rfkill_set(sony_wimax_rfkill->data, - RFKILL_STATE_UNBLOCKED); - } - return err; -} + sony_call_snc_handle(0x124, 0x200, &result); + hwblock = !(result & 0x1); -static void sony_nc_rfkill_update() -{ - int i; - enum rfkill_state state; + for (i = 0; i < N_SONY_RFKILL; i++) { + int argument = sony_rfkill_address[i]; - for (i = 0; i < SONY_RFKILL_MAX; i++) { - if (sony_rfkill_devices[i]) { - sony_rfkill_devices[i]-> - get_state(sony_rfkill_devices[i]->data, - &state); - rfkill_force_state(sony_rfkill_devices[i], state); + if (!sony_rfkill_devices[i]) + continue; + + if (hwblock) { + if (rfkill_set_hw_state(sony_rfkill_devices[i], true)) + sony_nc_rfkill_set(sony_rfkill_devices[i], + true); + continue; } + + sony_call_snc_handle(0x124, argument, &result); + rfkill_set_states(sony_rfkill_devices[i], + !(result & 0xf), false); } } @@ -1210,13 +1161,13 @@ static int sony_nc_rfkill_setup(struct acpi_device *device) } if (result & 0x1) - sony_nc_setup_wifi_rfkill(device); + sony_nc_setup_rfkill(device, SONY_WIFI); if (result & 0x2) - sony_nc_setup_bluetooth_rfkill(device); + sony_nc_setup_rfkill(device, SONY_BLUETOOTH); if (result & 0x1c) - sony_nc_setup_wwan_rfkill(device); + sony_nc_setup_rfkill(device, SONY_WWAN); if (result & 0x20) - sony_nc_setup_wimax_rfkill(device); + sony_nc_setup_rfkill(device, SONY_WIMAX); return 0; } diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 912be65b626..cfcafa4e947 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -166,13 +166,6 @@ enum { #define TPACPI_MAX_ACPI_ARGS 3 -/* rfkill switches */ -enum { - TPACPI_RFK_BLUETOOTH_SW_ID = 0, - TPACPI_RFK_WWAN_SW_ID, - TPACPI_RFK_UWB_SW_ID, -}; - /* printk headers */ #define TPACPI_LOG TPACPI_FILE ": " #define TPACPI_EMERG KERN_EMERG TPACPI_LOG @@ -1005,67 +998,237 @@ static int __init tpacpi_check_std_acpi_brightness_support(void) return 0; } -static int __init tpacpi_new_rfkill(const unsigned int id, - struct rfkill **rfk, +static void printk_deprecated_attribute(const char * const what, + const char * const details) +{ + tpacpi_log_usertask("deprecated sysfs attribute"); + printk(TPACPI_WARN "WARNING: sysfs attribute %s is deprecated and " + "will be removed. %s\n", + what, details); +} + +/************************************************************************* + * rfkill and radio control support helpers + */ + +/* + * ThinkPad-ACPI firmware handling model: + * + * WLSW (master wireless switch) is event-driven, and is common to all + * firmware-controlled radios. It cannot be controlled, just monitored, + * as expected. It overrides all radio state in firmware + * + * The kernel, a masked-off hotkey, and WLSW can change the radio state + * (TODO: verify how WLSW interacts with the returned radio state). + * + * The only time there are shadow radio state changes, is when + * masked-off hotkeys are used. + */ + +/* + * Internal driver API for radio state: + * + * int: < 0 = error, otherwise enum tpacpi_rfkill_state + * bool: true means radio blocked (off) + */ +enum tpacpi_rfkill_state { + TPACPI_RFK_RADIO_OFF = 0, + TPACPI_RFK_RADIO_ON +}; + +/* rfkill switches */ +enum tpacpi_rfk_id { + TPACPI_RFK_BLUETOOTH_SW_ID = 0, + TPACPI_RFK_WWAN_SW_ID, + TPACPI_RFK_UWB_SW_ID, + TPACPI_RFK_SW_MAX +}; + +static const char *tpacpi_rfkill_names[] = { + [TPACPI_RFK_BLUETOOTH_SW_ID] = "bluetooth", + [TPACPI_RFK_WWAN_SW_ID] = "wwan", + [TPACPI_RFK_UWB_SW_ID] = "uwb", + [TPACPI_RFK_SW_MAX] = NULL +}; + +/* ThinkPad-ACPI rfkill subdriver */ +struct tpacpi_rfk { + struct rfkill *rfkill; + enum tpacpi_rfk_id id; + const struct tpacpi_rfk_ops *ops; +}; + +struct tpacpi_rfk_ops { + /* firmware interface */ + int (*get_status)(void); + int (*set_status)(const enum tpacpi_rfkill_state); +}; + +static struct tpacpi_rfk *tpacpi_rfkill_switches[TPACPI_RFK_SW_MAX]; + +/* Query FW and update rfkill sw state for a given rfkill switch */ +static int tpacpi_rfk_update_swstate(const struct tpacpi_rfk *tp_rfk) +{ + int status; + + if (!tp_rfk) + return -ENODEV; + + status = (tp_rfk->ops->get_status)(); + if (status < 0) + return status; + + rfkill_set_sw_state(tp_rfk->rfkill, + (status == TPACPI_RFK_RADIO_OFF)); + + return status; +} + +/* Query FW and update rfkill sw state for all rfkill switches */ +static void tpacpi_rfk_update_swstate_all(void) +{ + unsigned int i; + + for (i = 0; i < TPACPI_RFK_SW_MAX; i++) + tpacpi_rfk_update_swstate(tpacpi_rfkill_switches[i]); +} + +/* + * Sync the HW-blocking state of all rfkill switches, + * do notice it causes the rfkill core to schedule uevents + */ +static void tpacpi_rfk_update_hwblock_state(bool blocked) +{ + unsigned int i; + struct tpacpi_rfk *tp_rfk; + + for (i = 0; i < TPACPI_RFK_SW_MAX; i++) { + tp_rfk = tpacpi_rfkill_switches[i]; + if (tp_rfk) { + if (rfkill_set_hw_state(tp_rfk->rfkill, + blocked)) { + /* ignore -- we track sw block */ + } + } + } +} + +/* Call to get the WLSW state from the firmware */ +static int hotkey_get_wlsw(void); + +/* Call to query WLSW state and update all rfkill switches */ +static bool tpacpi_rfk_check_hwblock_state(void) +{ + int res = hotkey_get_wlsw(); + int hw_blocked; + + /* When unknown or unsupported, we have to assume it is unblocked */ + if (res < 0) + return false; + + hw_blocked = (res == TPACPI_RFK_RADIO_OFF); + tpacpi_rfk_update_hwblock_state(hw_blocked); + + return hw_blocked; +} + +static int tpacpi_rfk_hook_set_block(void *data, bool blocked) +{ + struct tpacpi_rfk *tp_rfk = data; + int res; + + dbg_printk(TPACPI_DBG_RFKILL, + "request to change radio state to %s\n", + blocked ? "blocked" : "unblocked"); + + /* try to set radio state */ + res = (tp_rfk->ops->set_status)(blocked ? + TPACPI_RFK_RADIO_OFF : TPACPI_RFK_RADIO_ON); + + /* and update the rfkill core with whatever the FW really did */ + tpacpi_rfk_update_swstate(tp_rfk); + + return (res < 0) ? res : 0; +} + +static const struct rfkill_ops tpacpi_rfk_rfkill_ops = { + .set_block = tpacpi_rfk_hook_set_block, +}; + +static int __init tpacpi_new_rfkill(const enum tpacpi_rfk_id id, + const struct tpacpi_rfk_ops *tp_rfkops, const enum rfkill_type rfktype, const char *name, - const bool set_default, - int (*toggle_radio)(void *, enum rfkill_state), - int (*get_state)(void *, enum rfkill_state *)) + const bool set_default) { + struct tpacpi_rfk *atp_rfk; int res; - enum rfkill_state initial_state = RFKILL_STATE_SOFT_BLOCKED; + bool initial_sw_state = false; + int initial_sw_status; - res = get_state(NULL, &initial_state); - if (res < 0) { + BUG_ON(id >= TPACPI_RFK_SW_MAX || tpacpi_rfkill_switches[id]); + + initial_sw_status = (tp_rfkops->get_status)(); + if (initial_sw_status < 0) { printk(TPACPI_ERR "failed to read initial state for %s, error %d; " - "will turn radio off\n", name, res); - } else if (set_default) { - /* try to set the initial state as the default for the rfkill - * type, since we ask the firmware to preserve it across S5 in - * NVRAM */ - if (rfkill_set_default(rfktype, - (initial_state == RFKILL_STATE_UNBLOCKED) ? - RFKILL_STATE_UNBLOCKED : - RFKILL_STATE_SOFT_BLOCKED) == -EPERM) - vdbg_printk(TPACPI_DBG_RFKILL, - "Default state for %s cannot be changed\n", - name); - } - - *rfk = rfkill_allocate(&tpacpi_pdev->dev, rfktype); - if (!*rfk) { + "will turn radio off\n", name, initial_sw_status); + } else { + initial_sw_state = (initial_sw_status == TPACPI_RFK_RADIO_OFF); + if (set_default) { + /* try to set the initial state as the default for the + * rfkill type, since we ask the firmware to preserve + * it across S5 in NVRAM */ + rfkill_set_global_sw_state(rfktype, initial_sw_state); + } + } + + atp_rfk = kzalloc(sizeof(struct tpacpi_rfk), GFP_KERNEL); + if (atp_rfk) + atp_rfk->rfkill = rfkill_alloc(name, + &tpacpi_pdev->dev, + rfktype, + &tpacpi_rfk_rfkill_ops, + atp_rfk); + if (!atp_rfk || !atp_rfk->rfkill) { printk(TPACPI_ERR "failed to allocate memory for rfkill class\n"); + kfree(atp_rfk); return -ENOMEM; } - (*rfk)->name = name; - (*rfk)->get_state = get_state; - (*rfk)->toggle_radio = toggle_radio; - (*rfk)->state = initial_state; + atp_rfk->id = id; + atp_rfk->ops = tp_rfkops; + + rfkill_set_states(atp_rfk->rfkill, initial_sw_state, + tpacpi_rfk_check_hwblock_state()); - res = rfkill_register(*rfk); + res = rfkill_register(atp_rfk->rfkill); if (res < 0) { printk(TPACPI_ERR "failed to register %s rfkill switch: %d\n", name, res); - rfkill_free(*rfk); - *rfk = NULL; + rfkill_destroy(atp_rfk->rfkill); + kfree(atp_rfk); return res; } + tpacpi_rfkill_switches[id] = atp_rfk; return 0; } -static void printk_deprecated_attribute(const char * const what, - const char * const details) +static void tpacpi_destroy_rfkill(const enum tpacpi_rfk_id id) { - tpacpi_log_usertask("deprecated sysfs attribute"); - printk(TPACPI_WARN "WARNING: sysfs attribute %s is deprecated and " - "will be removed. %s\n", - what, details); + struct tpacpi_rfk *tp_rfk; + + BUG_ON(id >= TPACPI_RFK_SW_MAX); + + tp_rfk = tpacpi_rfkill_switches[id]; + if (tp_rfk) { + rfkill_unregister(tp_rfk->rfkill); + tpacpi_rfkill_switches[id] = NULL; + kfree(tp_rfk); + } } static void printk_deprecated_rfkill_attribute(const char * const what) @@ -1074,6 +1237,112 @@ static void printk_deprecated_rfkill_attribute(const char * const what) "Please switch to generic rfkill before year 2010"); } +/* sysfs enable ------------------------------------------------ */ +static ssize_t tpacpi_rfk_sysfs_enable_show(const enum tpacpi_rfk_id id, + struct device_attribute *attr, + char *buf) +{ + int status; + + printk_deprecated_rfkill_attribute(attr->attr.name); + + /* This is in the ABI... */ + if (tpacpi_rfk_check_hwblock_state()) { + status = TPACPI_RFK_RADIO_OFF; + } else { + status = tpacpi_rfk_update_swstate(tpacpi_rfkill_switches[id]); + if (status < 0) + return status; + } + + return snprintf(buf, PAGE_SIZE, "%d\n", + (status == TPACPI_RFK_RADIO_ON) ? 1 : 0); +} + +static ssize_t tpacpi_rfk_sysfs_enable_store(const enum tpacpi_rfk_id id, + struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned long t; + int res; + + printk_deprecated_rfkill_attribute(attr->attr.name); + + if (parse_strtoul(buf, 1, &t)) + return -EINVAL; + + tpacpi_disclose_usertask(attr->attr.name, "set to %ld\n", t); + + /* This is in the ABI... */ + if (tpacpi_rfk_check_hwblock_state() && !!t) + return -EPERM; + + res = tpacpi_rfkill_switches[id]->ops->set_status((!!t) ? + TPACPI_RFK_RADIO_ON : TPACPI_RFK_RADIO_OFF); + tpacpi_rfk_update_swstate(tpacpi_rfkill_switches[id]); + + return (res < 0) ? res : count; +} + +/* procfs -------------------------------------------------------------- */ +static int tpacpi_rfk_procfs_read(const enum tpacpi_rfk_id id, char *p) +{ + int len = 0; + + if (id >= TPACPI_RFK_SW_MAX) + len += sprintf(p + len, "status:\t\tnot supported\n"); + else { + int status; + + /* This is in the ABI... */ + if (tpacpi_rfk_check_hwblock_state()) { + status = TPACPI_RFK_RADIO_OFF; + } else { + status = tpacpi_rfk_update_swstate( + tpacpi_rfkill_switches[id]); + if (status < 0) + return status; + } + + len += sprintf(p + len, "status:\t\t%s\n", + (status == TPACPI_RFK_RADIO_ON) ? + "enabled" : "disabled"); + len += sprintf(p + len, "commands:\tenable, disable\n"); + } + + return len; +} + +static int tpacpi_rfk_procfs_write(const enum tpacpi_rfk_id id, char *buf) +{ + char *cmd; + int status = -1; + int res = 0; + + if (id >= TPACPI_RFK_SW_MAX) + return -ENODEV; + + while ((cmd = next_cmd(&buf))) { + if (strlencmp(cmd, "enable") == 0) + status = TPACPI_RFK_RADIO_ON; + else if (strlencmp(cmd, "disable") == 0) + status = TPACPI_RFK_RADIO_OFF; + else + return -EINVAL; + } + + if (status != -1) { + tpacpi_disclose_usertask("procfs", "attempt to %s %s\n", + (status == TPACPI_RFK_RADIO_ON) ? + "enable" : "disable", + tpacpi_rfkill_names[id]); + res = (tpacpi_rfkill_switches[id]->ops->set_status)(status); + tpacpi_rfk_update_swstate(tpacpi_rfkill_switches[id]); + } + + return res; +} + /************************************************************************* * thinkpad-acpi driver attributes */ @@ -1127,8 +1396,6 @@ static DRIVER_ATTR(version, S_IRUGO, #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES -static void tpacpi_send_radiosw_update(void); - /* wlsw_emulstate ------------------------------------------------------ */ static ssize_t tpacpi_driver_wlsw_emulstate_show(struct device_driver *drv, char *buf) @@ -1144,11 +1411,10 @@ static ssize_t tpacpi_driver_wlsw_emulstate_store(struct device_driver *drv, if (parse_strtoul(buf, 1, &t)) return -EINVAL; - if (tpacpi_wlsw_emulstate != t) { - tpacpi_wlsw_emulstate = !!t; - tpacpi_send_radiosw_update(); - } else + if (tpacpi_wlsw_emulstate != !!t) { tpacpi_wlsw_emulstate = !!t; + tpacpi_rfk_update_hwblock_state(!t); /* negative logic */ + } return count; } @@ -1463,17 +1729,23 @@ static struct attribute_set *hotkey_dev_attributes; /* HKEY.MHKG() return bits */ #define TP_HOTKEY_TABLET_MASK (1 << 3) -static int hotkey_get_wlsw(int *status) +static int hotkey_get_wlsw(void) { + int status; + + if (!tp_features.hotkey_wlsw) + return -ENODEV; + #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES - if (dbg_wlswemul) { - *status = !!tpacpi_wlsw_emulstate; - return 0; - } + if (dbg_wlswemul) + return (tpacpi_wlsw_emulstate) ? + TPACPI_RFK_RADIO_ON : TPACPI_RFK_RADIO_OFF; #endif - if (!acpi_evalf(hkey_handle, status, "WLSW", "d")) + + if (!acpi_evalf(hkey_handle, &status, "WLSW", "d")) return -EIO; - return 0; + + return (status) ? TPACPI_RFK_RADIO_ON : TPACPI_RFK_RADIO_OFF; } static int hotkey_get_tablet_mode(int *status) @@ -2107,12 +2379,16 @@ static ssize_t hotkey_radio_sw_show(struct device *dev, struct device_attribute *attr, char *buf) { - int res, s; - res = hotkey_get_wlsw(&s); + int res; + res = hotkey_get_wlsw(); if (res < 0) return res; - return snprintf(buf, PAGE_SIZE, "%d\n", !!s); + /* Opportunistic update */ + tpacpi_rfk_update_hwblock_state((res == TPACPI_RFK_RADIO_OFF)); + + return snprintf(buf, PAGE_SIZE, "%d\n", + (res == TPACPI_RFK_RADIO_OFF) ? 0 : 1); } static struct device_attribute dev_attr_hotkey_radio_sw = @@ -2223,30 +2499,52 @@ static struct attribute *hotkey_mask_attributes[] __initdata = { &dev_attr_hotkey_wakeup_hotunplug_complete.attr, }; -static void bluetooth_update_rfk(void); -static void wan_update_rfk(void); -static void uwb_update_rfk(void); +/* + * Sync both the hw and sw blocking state of all switches + */ static void tpacpi_send_radiosw_update(void) { int wlsw; - /* Sync these BEFORE sending any rfkill events */ - if (tp_features.bluetooth) - bluetooth_update_rfk(); - if (tp_features.wan) - wan_update_rfk(); - if (tp_features.uwb) - uwb_update_rfk(); + /* + * We must sync all rfkill controllers *before* issuing any + * rfkill input events, or we will race the rfkill core input + * handler. + * + * tpacpi_inputdev_send_mutex works as a syncronization point + * for the above. + * + * We optimize to avoid numerous calls to hotkey_get_wlsw. + */ - if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&wlsw)) { + wlsw = hotkey_get_wlsw(); + + /* Sync hw blocking state first if it is hw-blocked */ + if (wlsw == TPACPI_RFK_RADIO_OFF) + tpacpi_rfk_update_hwblock_state(true); + + /* Sync sw blocking state */ + tpacpi_rfk_update_swstate_all(); + + /* Sync hw blocking state last if it is hw-unblocked */ + if (wlsw == TPACPI_RFK_RADIO_ON) + tpacpi_rfk_update_hwblock_state(false); + + /* Issue rfkill input event for WLSW switch */ + if (!(wlsw < 0)) { mutex_lock(&tpacpi_inputdev_send_mutex); input_report_switch(tpacpi_inputdev, - SW_RFKILL_ALL, !!wlsw); + SW_RFKILL_ALL, (wlsw > 0)); input_sync(tpacpi_inputdev); mutex_unlock(&tpacpi_inputdev_send_mutex); } + + /* + * this can be unconditional, as we will poll state again + * if userspace uses the notify to read data + */ hotkey_radio_sw_notify_change(); } @@ -3056,8 +3354,6 @@ enum { #define TPACPI_RFK_BLUETOOTH_SW_NAME "tpacpi_bluetooth_sw" -static struct rfkill *tpacpi_bluetooth_rfkill; - static void bluetooth_suspend(pm_message_t state) { /* Try to make sure radio will resume powered off */ @@ -3067,83 +3363,47 @@ static void bluetooth_suspend(pm_message_t state) "bluetooth power down on resume request failed\n"); } -static int bluetooth_get_radiosw(void) +static int bluetooth_get_status(void) { int status; - if (!tp_features.bluetooth) - return -ENODEV; - - /* WLSW overrides bluetooth in firmware/hardware, reflect that */ - if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&status) && !status) - return RFKILL_STATE_HARD_BLOCKED; - #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES if (dbg_bluetoothemul) return (tpacpi_bluetooth_emulstate) ? - RFKILL_STATE_UNBLOCKED : RFKILL_STATE_SOFT_BLOCKED; + TPACPI_RFK_RADIO_ON : TPACPI_RFK_RADIO_OFF; #endif if (!acpi_evalf(hkey_handle, &status, "GBDC", "d")) return -EIO; return ((status & TP_ACPI_BLUETOOTH_RADIOSSW) != 0) ? - RFKILL_STATE_UNBLOCKED : RFKILL_STATE_SOFT_BLOCKED; + TPACPI_RFK_RADIO_ON : TPACPI_RFK_RADIO_OFF; } -static void bluetooth_update_rfk(void) +static int bluetooth_set_status(enum tpacpi_rfkill_state state) { int status; - if (!tpacpi_bluetooth_rfkill) - return; - - status = bluetooth_get_radiosw(); - if (status < 0) - return; - rfkill_force_state(tpacpi_bluetooth_rfkill, status); - vdbg_printk(TPACPI_DBG_RFKILL, - "forced rfkill state to %d\n", - status); -} - -static int bluetooth_set_radiosw(int radio_on, int update_rfk) -{ - int status; - - if (!tp_features.bluetooth) - return -ENODEV; - - /* WLSW overrides bluetooth in firmware/hardware, but there is no - * reason to risk weird behaviour. */ - if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&status) && !status - && radio_on) - return -EPERM; - - vdbg_printk(TPACPI_DBG_RFKILL, - "will %s bluetooth\n", radio_on ? "enable" : "disable"); + "will attempt to %s bluetooth\n", + (state == TPACPI_RFK_RADIO_ON) ? "enable" : "disable"); #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES if (dbg_bluetoothemul) { - tpacpi_bluetooth_emulstate = !!radio_on; - if (update_rfk) - bluetooth_update_rfk(); + tpacpi_bluetooth_emulstate = (state == TPACPI_RFK_RADIO_ON); return 0; } #endif /* We make sure to keep TP_ACPI_BLUETOOTH_RESUMECTRL off */ - if (radio_on) + if (state == TPACPI_RFK_RADIO_ON) status = TP_ACPI_BLUETOOTH_RADIOSSW; else status = 0; + if (!acpi_evalf(hkey_handle, NULL, "SBDC", "vd", status)) return -EIO; - if (update_rfk) - bluetooth_update_rfk(); - return 0; } @@ -3152,35 +3412,16 @@ static ssize_t bluetooth_enable_show(struct device *dev, struct device_attribute *attr, char *buf) { - int status; - - printk_deprecated_rfkill_attribute("bluetooth_enable"); - - status = bluetooth_get_radiosw(); - if (status < 0) - return status; - - return snprintf(buf, PAGE_SIZE, "%d\n", - (status == RFKILL_STATE_UNBLOCKED) ? 1 : 0); + return tpacpi_rfk_sysfs_enable_show(TPACPI_RFK_BLUETOOTH_SW_ID, + attr, buf); } static ssize_t bluetooth_enable_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - unsigned long t; - int res; - - printk_deprecated_rfkill_attribute("bluetooth_enable"); - - if (parse_strtoul(buf, 1, &t)) - return -EINVAL; - - tpacpi_disclose_usertask("bluetooth_enable", "set to %ld\n", t); - - res = bluetooth_set_radiosw(t, 1); - - return (res) ? res : count; + return tpacpi_rfk_sysfs_enable_store(TPACPI_RFK_BLUETOOTH_SW_ID, + attr, buf, count); } static struct device_attribute dev_attr_bluetooth_enable = @@ -3198,23 +3439,10 @@ static const struct attribute_group bluetooth_attr_group = { .attrs = bluetooth_attributes, }; -static int tpacpi_bluetooth_rfk_get(void *data, enum rfkill_state *state) -{ - int bts = bluetooth_get_radiosw(); - - if (bts < 0) - return bts; - - *state = bts; - return 0; -} - -static int tpacpi_bluetooth_rfk_set(void *data, enum rfkill_state state) -{ - dbg_printk(TPACPI_DBG_RFKILL, - "request to change radio state to %d\n", state); - return bluetooth_set_radiosw((state == RFKILL_STATE_UNBLOCKED), 0); -} +static const struct tpacpi_rfk_ops bluetooth_tprfk_ops = { + .get_status = bluetooth_get_status, + .set_status = bluetooth_set_status, +}; static void bluetooth_shutdown(void) { @@ -3230,13 +3458,12 @@ static void bluetooth_shutdown(void) static void bluetooth_exit(void) { - bluetooth_shutdown(); - - if (tpacpi_bluetooth_rfkill) - rfkill_unregister(tpacpi_bluetooth_rfkill); - sysfs_remove_group(&tpacpi_pdev->dev.kobj, &bluetooth_attr_group); + + tpacpi_destroy_rfkill(TPACPI_RFK_BLUETOOTH_SW_ID); + + bluetooth_shutdown(); } static int __init bluetooth_init(struct ibm_init_struct *iibm) @@ -3277,20 +3504,18 @@ static int __init bluetooth_init(struct ibm_init_struct *iibm) if (!tp_features.bluetooth) return 1; - res = sysfs_create_group(&tpacpi_pdev->dev.kobj, - &bluetooth_attr_group); - if (res) - return res; - res = tpacpi_new_rfkill(TPACPI_RFK_BLUETOOTH_SW_ID, - &tpacpi_bluetooth_rfkill, + &bluetooth_tprfk_ops, RFKILL_TYPE_BLUETOOTH, TPACPI_RFK_BLUETOOTH_SW_NAME, - true, - tpacpi_bluetooth_rfk_set, - tpacpi_bluetooth_rfk_get); + true); + if (res) + return res; + + res = sysfs_create_group(&tpacpi_pdev->dev.kobj, + &bluetooth_attr_group); if (res) { - bluetooth_exit(); + tpacpi_destroy_rfkill(TPACPI_RFK_BLUETOOTH_SW_ID); return res; } @@ -3300,46 +3525,12 @@ static int __init bluetooth_init(struct ibm_init_struct *iibm) /* procfs -------------------------------------------------------------- */ static int bluetooth_read(char *p) { - int len = 0; - int status = bluetooth_get_radiosw(); - - if (!tp_features.bluetooth) - len += sprintf(p + len, "status:\t\tnot supported\n"); - else { - len += sprintf(p + len, "status:\t\t%s\n", - (status == RFKILL_STATE_UNBLOCKED) ? - "enabled" : "disabled"); - len += sprintf(p + len, "commands:\tenable, disable\n"); - } - - return len; + return tpacpi_rfk_procfs_read(TPACPI_RFK_BLUETOOTH_SW_ID, p); } static int bluetooth_write(char *buf) { - char *cmd; - int state = -1; - - if (!tp_features.bluetooth) - return -ENODEV; - - while ((cmd = next_cmd(&buf))) { - if (strlencmp(cmd, "enable") == 0) { - state = 1; - } else if (strlencmp(cmd, "disable") == 0) { - state = 0; - } else - return -EINVAL; - } - - if (state != -1) { - tpacpi_disclose_usertask("procfs bluetooth", - "attempt to %s\n", - state ? "enable" : "disable"); - bluetooth_set_radiosw(state, 1); - } - - return 0; + return tpacpi_rfk_procfs_write(TPACPI_RFK_BLUETOOTH_SW_ID, buf); } static struct ibm_struct bluetooth_driver_data = { @@ -3365,8 +3556,6 @@ enum { #define TPACPI_RFK_WWAN_SW_NAME "tpacpi_wwan_sw" -static struct rfkill *tpacpi_wan_rfkill; - static void wan_suspend(pm_message_t state) { /* Try to make sure radio will resume powered off */ @@ -3376,83 +3565,47 @@ static void wan_suspend(pm_message_t state) "WWAN power down on resume request failed\n"); } -static int wan_get_radiosw(void) +static int wan_get_status(void) { int status; - if (!tp_features.wan) - return -ENODEV; - - /* WLSW overrides WWAN in firmware/hardware, reflect that */ - if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&status) && !status) - return RFKILL_STATE_HARD_BLOCKED; - #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES if (dbg_wwanemul) return (tpacpi_wwan_emulstate) ? - RFKILL_STATE_UNBLOCKED : RFKILL_STATE_SOFT_BLOCKED; + TPACPI_RFK_RADIO_ON : TPACPI_RFK_RADIO_OFF; #endif if (!acpi_evalf(hkey_handle, &status, "GWAN", "d")) return -EIO; return ((status & TP_ACPI_WANCARD_RADIOSSW) != 0) ? - RFKILL_STATE_UNBLOCKED : RFKILL_STATE_SOFT_BLOCKED; -} - -static void wan_update_rfk(void) -{ - int status; - - if (!tpacpi_wan_rfkill) - return; - - status = wan_get_radiosw(); - if (status < 0) - return; - rfkill_force_state(tpacpi_wan_rfkill, status); - - vdbg_printk(TPACPI_DBG_RFKILL, - "forced rfkill state to %d\n", - status); + TPACPI_RFK_RADIO_ON : TPACPI_RFK_RADIO_OFF; } -static int wan_set_radiosw(int radio_on, int update_rfk) +static int wan_set_status(enum tpacpi_rfkill_state state) { int status; - if (!tp_features.wan) - return -ENODEV; - - /* WLSW overrides bluetooth in firmware/hardware, but there is no - * reason to risk weird behaviour. */ - if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&status) && !status - && radio_on) - return -EPERM; - vdbg_printk(TPACPI_DBG_RFKILL, - "will %s WWAN\n", radio_on ? "enable" : "disable"); + "will attempt to %s wwan\n", + (state == TPACPI_RFK_RADIO_ON) ? "enable" : "disable"); #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES if (dbg_wwanemul) { - tpacpi_wwan_emulstate = !!radio_on; - if (update_rfk) - wan_update_rfk(); + tpacpi_wwan_emulstate = (state == TPACPI_RFK_RADIO_ON); return 0; } #endif /* We make sure to keep TP_ACPI_WANCARD_RESUMECTRL off */ - if (radio_on) + if (state == TPACPI_RFK_RADIO_ON) status = TP_ACPI_WANCARD_RADIOSSW; else status = 0; + if (!acpi_evalf(hkey_handle, NULL, "SWAN", "vd", status)) return -EIO; - if (update_rfk) - wan_update_rfk(); - return 0; } @@ -3461,35 +3614,16 @@ static ssize_t wan_enable_show(struct device *dev, struct device_attribute *attr, char *buf) { - int status; - - printk_deprecated_rfkill_attribute("wwan_enable"); - - status = wan_get_radiosw(); - if (status < 0) - return status; - - return snprintf(buf, PAGE_SIZE, "%d\n", - (status == RFKILL_STATE_UNBLOCKED) ? 1 : 0); + return tpacpi_rfk_sysfs_enable_show(TPACPI_RFK_WWAN_SW_ID, + attr, buf); } static ssize_t wan_enable_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - unsigned long t; - int res; - - printk_deprecated_rfkill_attribute("wwan_enable"); - - if (parse_strtoul(buf, 1, &t)) - return -EINVAL; - - tpacpi_disclose_usertask("wwan_enable", "set to %ld\n", t); - - res = wan_set_radiosw(t, 1); - - return (res) ? res : count; + return tpacpi_rfk_sysfs_enable_store(TPACPI_RFK_WWAN_SW_ID, + attr, buf, count); } static struct device_attribute dev_attr_wan_enable = @@ -3507,23 +3641,10 @@ static const struct attribute_group wan_attr_group = { .attrs = wan_attributes, }; -static int tpacpi_wan_rfk_get(void *data, enum rfkill_state *state) -{ - int wans = wan_get_radiosw(); - - if (wans < 0) - return wans; - - *state = wans; - return 0; -} - -static int tpacpi_wan_rfk_set(void *data, enum rfkill_state state) -{ - dbg_printk(TPACPI_DBG_RFKILL, - "request to change radio state to %d\n", state); - return wan_set_radiosw((state == RFKILL_STATE_UNBLOCKED), 0); -} +static const struct tpacpi_rfk_ops wan_tprfk_ops = { + .get_status = wan_get_status, + .set_status = wan_set_status, +}; static void wan_shutdown(void) { @@ -3539,13 +3660,12 @@ static void wan_shutdown(void) static void wan_exit(void) { - wan_shutdown(); - - if (tpacpi_wan_rfkill) - rfkill_unregister(tpacpi_wan_rfkill); - sysfs_remove_group(&tpacpi_pdev->dev.kobj, &wan_attr_group); + + tpacpi_destroy_rfkill(TPACPI_RFK_WWAN_SW_ID); + + wan_shutdown(); } static int __init wan_init(struct ibm_init_struct *iibm) @@ -3584,20 +3704,19 @@ static int __init wan_init(struct ibm_init_struct *iibm) if (!tp_features.wan) return 1; - res = sysfs_create_group(&tpacpi_pdev->dev.kobj, - &wan_attr_group); - if (res) - return res; - res = tpacpi_new_rfkill(TPACPI_RFK_WWAN_SW_ID, - &tpacpi_wan_rfkill, + &wan_tprfk_ops, RFKILL_TYPE_WWAN, TPACPI_RFK_WWAN_SW_NAME, - true, - tpacpi_wan_rfk_set, - tpacpi_wan_rfk_get); + true); + if (res) + return res; + + res = sysfs_create_group(&tpacpi_pdev->dev.kobj, + &wan_attr_group); + if (res) { - wan_exit(); + tpacpi_destroy_rfkill(TPACPI_RFK_WWAN_SW_ID); return res; } @@ -3607,48 +3726,12 @@ static int __init wan_init(struct ibm_init_struct *iibm) /* procfs -------------------------------------------------------------- */ static int wan_read(char *p) { - int len = 0; - int status = wan_get_radiosw(); - - tpacpi_disclose_usertask("procfs wan", "read"); - - if (!tp_features.wan) - len += sprintf(p + len, "status:\t\tnot supported\n"); - else { - len += sprintf(p + len, "status:\t\t%s\n", - (status == RFKILL_STATE_UNBLOCKED) ? - "enabled" : "disabled"); - len += sprintf(p + len, "commands:\tenable, disable\n"); - } - - return len; + return tpacpi_rfk_procfs_read(TPACPI_RFK_WWAN_SW_ID, p); } static int wan_write(char *buf) { - char *cmd; - int state = -1; - - if (!tp_features.wan) - return -ENODEV; - - while ((cmd = next_cmd(&buf))) { - if (strlencmp(cmd, "enable") == 0) { - state = 1; - } else if (strlencmp(cmd, "disable") == 0) { - state = 0; - } else - return -EINVAL; - } - - if (state != -1) { - tpacpi_disclose_usertask("procfs wan", - "attempt to %s\n", - state ? "enable" : "disable"); - wan_set_radiosw(state, 1); - } - - return 0; + return tpacpi_rfk_procfs_write(TPACPI_RFK_WWAN_SW_ID, buf); } static struct ibm_struct wan_driver_data = { @@ -3672,108 +3755,59 @@ enum { #define TPACPI_RFK_UWB_SW_NAME "tpacpi_uwb_sw" -static struct rfkill *tpacpi_uwb_rfkill; - -static int uwb_get_radiosw(void) +static int uwb_get_status(void) { int status; - if (!tp_features.uwb) - return -ENODEV; - - /* WLSW overrides UWB in firmware/hardware, reflect that */ - if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&status) && !status) - return RFKILL_STATE_HARD_BLOCKED; - #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES if (dbg_uwbemul) return (tpacpi_uwb_emulstate) ? - RFKILL_STATE_UNBLOCKED : RFKILL_STATE_SOFT_BLOCKED; + TPACPI_RFK_RADIO_ON : TPACPI_RFK_RADIO_OFF; #endif if (!acpi_evalf(hkey_handle, &status, "GUWB", "d")) return -EIO; return ((status & TP_ACPI_UWB_RADIOSSW) != 0) ? - RFKILL_STATE_UNBLOCKED : RFKILL_STATE_SOFT_BLOCKED; + TPACPI_RFK_RADIO_ON : TPACPI_RFK_RADIO_OFF; } -static void uwb_update_rfk(void) +static int uwb_set_status(enum tpacpi_rfkill_state state) { int status; - if (!tpacpi_uwb_rfkill) - return; - - status = uwb_get_radiosw(); - if (status < 0) - return; - rfkill_force_state(tpacpi_uwb_rfkill, status); - vdbg_printk(TPACPI_DBG_RFKILL, - "forced rfkill state to %d\n", - status); -} - -static int uwb_set_radiosw(int radio_on, int update_rfk) -{ - int status; - - if (!tp_features.uwb) - return -ENODEV; - - /* WLSW overrides UWB in firmware/hardware, but there is no - * reason to risk weird behaviour. */ - if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&status) && !status - && radio_on) - return -EPERM; - - vdbg_printk(TPACPI_DBG_RFKILL, - "will %s UWB\n", radio_on ? "enable" : "disable"); + "will attempt to %s UWB\n", + (state == TPACPI_RFK_RADIO_ON) ? "enable" : "disable"); #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES if (dbg_uwbemul) { - tpacpi_uwb_emulstate = !!radio_on; - if (update_rfk) - uwb_update_rfk(); + tpacpi_uwb_emulstate = (state == TPACPI_RFK_RADIO_ON); return 0; } #endif - status = (radio_on) ? TP_ACPI_UWB_RADIOSSW : 0; + if (state == TPACPI_RFK_RADIO_ON) + status = TP_ACPI_UWB_RADIOSSW; + else + status = 0; + if (!acpi_evalf(hkey_handle, NULL, "SUWB", "vd", status)) return -EIO; - if (update_rfk) - uwb_update_rfk(); - return 0; } /* --------------------------------------------------------------------- */ -static int tpacpi_uwb_rfk_get(void *data, enum rfkill_state *state) -{ - int uwbs = uwb_get_radiosw(); - - if (uwbs < 0) - return uwbs; - - *state = uwbs; - return 0; -} - -static int tpacpi_uwb_rfk_set(void *data, enum rfkill_state state) -{ - dbg_printk(TPACPI_DBG_RFKILL, - "request to change radio state to %d\n", state); - return uwb_set_radiosw((state == RFKILL_STATE_UNBLOCKED), 0); -} +static const struct tpacpi_rfk_ops uwb_tprfk_ops = { + .get_status = uwb_get_status, + .set_status = uwb_set_status, +}; static void uwb_exit(void) { - if (tpacpi_uwb_rfkill) - rfkill_unregister(tpacpi_uwb_rfkill); + tpacpi_destroy_rfkill(TPACPI_RFK_UWB_SW_ID); } static int __init uwb_init(struct ibm_init_struct *iibm) @@ -3813,13 +3847,10 @@ static int __init uwb_init(struct ibm_init_struct *iibm) return 1; res = tpacpi_new_rfkill(TPACPI_RFK_UWB_SW_ID, - &tpacpi_uwb_rfkill, + &uwb_tprfk_ops, RFKILL_TYPE_UWB, TPACPI_RFK_UWB_SW_NAME, - false, - tpacpi_uwb_rfk_set, - tpacpi_uwb_rfk_get); - + false); return res; } diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c index 4345089f517..81d31ea507d 100644 --- a/drivers/platform/x86/toshiba_acpi.c +++ b/drivers/platform/x86/toshiba_acpi.c @@ -45,7 +45,6 @@ #include #include #include -#include #include @@ -250,21 +249,15 @@ static acpi_status hci_read2(u32 reg, u32 *out1, u32 *out2, u32 *result) struct toshiba_acpi_dev { struct platform_device *p_dev; - struct rfkill *rfk_dev; - struct input_polled_dev *poll_dev; + struct rfkill *bt_rfk; const char *bt_name; - const char *rfk_name; - - bool last_rfk_state; struct mutex mutex; }; static struct toshiba_acpi_dev toshiba_acpi = { .bt_name = "Toshiba Bluetooth", - .rfk_name = "Toshiba RFKill Switch", - .last_rfk_state = false, }; /* Bluetooth rfkill handlers */ @@ -283,21 +276,6 @@ static u32 hci_get_bt_present(bool *present) return hci_result; } -static u32 hci_get_bt_on(bool *on) -{ - u32 hci_result; - u32 value, value2; - - value = 0; - value2 = 0x0001; - hci_read2(HCI_WIRELESS, &value, &value2, &hci_result); - if (hci_result == HCI_SUCCESS) - *on = (value & HCI_WIRELESS_BT_POWER) && - (value & HCI_WIRELESS_BT_ATTACH); - - return hci_result; -} - static u32 hci_get_radio_state(bool *radio_state) { u32 hci_result; @@ -311,70 +289,67 @@ static u32 hci_get_radio_state(bool *radio_state) return hci_result; } -static int bt_rfkill_toggle_radio(void *data, enum rfkill_state state) +static int bt_rfkill_set_block(void *data, bool blocked) { + struct toshiba_acpi_dev *dev = data; u32 result1, result2; u32 value; + int err; bool radio_state; - struct toshiba_acpi_dev *dev = data; - value = (state == RFKILL_STATE_UNBLOCKED); + value = (blocked == false); - if (hci_get_radio_state(&radio_state) != HCI_SUCCESS) - return -EFAULT; + mutex_lock(&dev->mutex); + if (hci_get_radio_state(&radio_state) != HCI_SUCCESS) { + err = -EBUSY; + goto out; + } - switch (state) { - case RFKILL_STATE_UNBLOCKED: - if (!radio_state) - return -EPERM; - break; - case RFKILL_STATE_SOFT_BLOCKED: - break; - default: - return -EINVAL; + if (!radio_state) { + err = 0; + goto out; } - mutex_lock(&dev->mutex); hci_write2(HCI_WIRELESS, value, HCI_WIRELESS_BT_POWER, &result1); hci_write2(HCI_WIRELESS, value, HCI_WIRELESS_BT_ATTACH, &result2); - mutex_unlock(&dev->mutex); if (result1 != HCI_SUCCESS || result2 != HCI_SUCCESS) - return -EFAULT; - - return 0; + err = -EBUSY; + else + err = 0; + out: + mutex_unlock(&dev->mutex); + return err; } -static void bt_poll_rfkill(struct input_polled_dev *poll_dev) +static void bt_rfkill_poll(struct rfkill *rfkill, void *data) { - bool state_changed; bool new_rfk_state; bool value; u32 hci_result; - struct toshiba_acpi_dev *dev = poll_dev->private; + struct toshiba_acpi_dev *dev = data; + + mutex_lock(&dev->mutex); hci_result = hci_get_radio_state(&value); - if (hci_result != HCI_SUCCESS) - return; /* Can't do anything useful */ + if (hci_result != HCI_SUCCESS) { + /* Can't do anything useful */ + mutex_unlock(&dev->mutex); + } new_rfk_state = value; - mutex_lock(&dev->mutex); - state_changed = new_rfk_state != dev->last_rfk_state; - dev->last_rfk_state = new_rfk_state; mutex_unlock(&dev->mutex); - if (unlikely(state_changed)) { - rfkill_force_state(dev->rfk_dev, - new_rfk_state ? - RFKILL_STATE_SOFT_BLOCKED : - RFKILL_STATE_HARD_BLOCKED); - input_report_switch(poll_dev->input, SW_RFKILL_ALL, - new_rfk_state); - input_sync(poll_dev->input); - } + if (rfkill_set_hw_state(rfkill, !new_rfk_state)) + bt_rfkill_set_block(data, true); } +static const struct rfkill_ops toshiba_rfk_ops = { + .set_block = bt_rfkill_set_block, + .poll = bt_rfkill_poll, +}; + static struct proc_dir_entry *toshiba_proc_dir /*= 0*/ ; static struct backlight_device *toshiba_backlight_device; static int force_fan; @@ -702,14 +677,11 @@ static struct backlight_ops toshiba_backlight_data = { static void toshiba_acpi_exit(void) { - if (toshiba_acpi.poll_dev) { - input_unregister_polled_device(toshiba_acpi.poll_dev); - input_free_polled_device(toshiba_acpi.poll_dev); + if (toshiba_acpi.bt_rfk) { + rfkill_unregister(toshiba_acpi.bt_rfk); + rfkill_destroy(toshiba_acpi.bt_rfk); } - if (toshiba_acpi.rfk_dev) - rfkill_unregister(toshiba_acpi.rfk_dev); - if (toshiba_backlight_device) backlight_device_unregister(toshiba_backlight_device); @@ -728,8 +700,6 @@ static int __init toshiba_acpi_init(void) acpi_status status = AE_OK; u32 hci_result; bool bt_present; - bool bt_on; - bool radio_on; int ret = 0; if (acpi_disabled) @@ -793,60 +763,21 @@ static int __init toshiba_acpi_init(void) /* Register rfkill switch for Bluetooth */ if (hci_get_bt_present(&bt_present) == HCI_SUCCESS && bt_present) { - toshiba_acpi.rfk_dev = rfkill_allocate(&toshiba_acpi.p_dev->dev, - RFKILL_TYPE_BLUETOOTH); - if (!toshiba_acpi.rfk_dev) { + toshiba_acpi.bt_rfk = rfkill_alloc(toshiba_acpi.bt_name, + &toshiba_acpi.p_dev->dev, + RFKILL_TYPE_BLUETOOTH, + &toshiba_rfk_ops, + &toshiba_acpi); + if (!toshiba_acpi.bt_rfk) { printk(MY_ERR "unable to allocate rfkill device\n"); toshiba_acpi_exit(); return -ENOMEM; } - toshiba_acpi.rfk_dev->name = toshiba_acpi.bt_name; - toshiba_acpi.rfk_dev->toggle_radio = bt_rfkill_toggle_radio; - toshiba_acpi.rfk_dev->data = &toshiba_acpi; - - if (hci_get_bt_on(&bt_on) == HCI_SUCCESS && bt_on) { - toshiba_acpi.rfk_dev->state = RFKILL_STATE_UNBLOCKED; - } else if (hci_get_radio_state(&radio_on) == HCI_SUCCESS && - radio_on) { - toshiba_acpi.rfk_dev->state = RFKILL_STATE_SOFT_BLOCKED; - } else { - toshiba_acpi.rfk_dev->state = RFKILL_STATE_HARD_BLOCKED; - } - - ret = rfkill_register(toshiba_acpi.rfk_dev); + ret = rfkill_register(toshiba_acpi.bt_rfk); if (ret) { printk(MY_ERR "unable to register rfkill device\n"); - toshiba_acpi_exit(); - return -ENOMEM; - } - - /* Register input device for kill switch */ - toshiba_acpi.poll_dev = input_allocate_polled_device(); - if (!toshiba_acpi.poll_dev) { - printk(MY_ERR - "unable to allocate kill-switch input device\n"); - toshiba_acpi_exit(); - return -ENOMEM; - } - toshiba_acpi.poll_dev->private = &toshiba_acpi; - toshiba_acpi.poll_dev->poll = bt_poll_rfkill; - toshiba_acpi.poll_dev->poll_interval = 1000; /* msecs */ - - toshiba_acpi.poll_dev->input->name = toshiba_acpi.rfk_name; - toshiba_acpi.poll_dev->input->id.bustype = BUS_HOST; - /* Toshiba USB ID */ - toshiba_acpi.poll_dev->input->id.vendor = 0x0930; - set_bit(EV_SW, toshiba_acpi.poll_dev->input->evbit); - set_bit(SW_RFKILL_ALL, toshiba_acpi.poll_dev->input->swbit); - input_report_switch(toshiba_acpi.poll_dev->input, - SW_RFKILL_ALL, TRUE); - input_sync(toshiba_acpi.poll_dev->input); - - ret = input_register_polled_device(toshiba_acpi.poll_dev); - if (ret) { - printk(MY_ERR - "unable to register kill-switch input device\n"); + rfkill_destroy(toshiba_acpi.bt_rfk); toshiba_acpi_exit(); return ret; } diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 3f0eaa397ef..7e09c5c1ed0 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -311,6 +311,7 @@ unifdef-y += ptrace.h unifdef-y += qnx4_fs.h unifdef-y += quota.h unifdef-y += random.h +unifdef-y += rfkill.h unifdef-y += irqnr.h unifdef-y += reboot.h unifdef-y += reiserfs_fs.h diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index de18ef227e0..090852c8de7 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -4,6 +4,7 @@ /* * Copyright (C) 2006 - 2007 Ivo van Doorn * Copyright (C) 2007 Dmitry Torokhov + * Copyright 2009 Johannes Berg * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -21,6 +22,24 @@ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +/* define userspace visible states */ +#define RFKILL_STATE_SOFT_BLOCKED 0 +#define RFKILL_STATE_UNBLOCKED 1 +#define RFKILL_STATE_HARD_BLOCKED 2 + +/* and that's all userspace gets */ +#ifdef __KERNEL__ +/* don't allow anyone to use these in the kernel */ +enum rfkill_user_states { + RFKILL_USER_STATE_SOFT_BLOCKED = RFKILL_STATE_SOFT_BLOCKED, + RFKILL_USER_STATE_UNBLOCKED = RFKILL_STATE_UNBLOCKED, + RFKILL_USER_STATE_HARD_BLOCKED = RFKILL_STATE_HARD_BLOCKED, +}; +#undef RFKILL_STATE_SOFT_BLOCKED +#undef RFKILL_STATE_UNBLOCKED +#undef RFKILL_STATE_HARD_BLOCKED + #include #include #include @@ -30,109 +49,267 @@ /** * enum rfkill_type - type of rfkill switch. - * RFKILL_TYPE_WLAN: switch is on a 802.11 wireless network device. - * RFKILL_TYPE_BLUETOOTH: switch is on a bluetooth device. - * RFKILL_TYPE_UWB: switch is on a ultra wideband device. - * RFKILL_TYPE_WIMAX: switch is on a WiMAX device. - * RFKILL_TYPE_WWAN: switch is on a wireless WAN device. + * + * @RFKILL_TYPE_WLAN: switch is on a 802.11 wireless network device. + * @RFKILL_TYPE_BLUETOOTH: switch is on a bluetooth device. + * @RFKILL_TYPE_UWB: switch is on a ultra wideband device. + * @RFKILL_TYPE_WIMAX: switch is on a WiMAX device. + * @RFKILL_TYPE_WWAN: switch is on a wireless WAN device. + * @NUM_RFKILL_TYPES: number of defined rfkill types */ enum rfkill_type { - RFKILL_TYPE_WLAN , + RFKILL_TYPE_WLAN, RFKILL_TYPE_BLUETOOTH, RFKILL_TYPE_UWB, RFKILL_TYPE_WIMAX, RFKILL_TYPE_WWAN, - RFKILL_TYPE_MAX, + NUM_RFKILL_TYPES, }; -enum rfkill_state { - RFKILL_STATE_SOFT_BLOCKED = 0, /* Radio output blocked */ - RFKILL_STATE_UNBLOCKED = 1, /* Radio output allowed */ - RFKILL_STATE_HARD_BLOCKED = 2, /* Output blocked, non-overrideable */ - RFKILL_STATE_MAX, /* marker for last valid state */ +/* this is opaque */ +struct rfkill; + +/** + * struct rfkill_ops - rfkill driver methods + * + * @poll: poll the rfkill block state(s) -- only assign this method + * when you need polling. When called, simply call one of the + * rfkill_set{,_hw,_sw}_state family of functions. If the hw + * is getting unblocked you need to take into account the return + * value of those functions to make sure the software block is + * properly used. + * @query: query the rfkill block state(s) and call exactly one of the + * rfkill_set{,_hw,_sw}_state family of functions. Assign this + * method if input events can cause hardware state changes to make + * the rfkill core query your driver before setting a requested + * block. + * @set_block: turn the transmitter on (blocked == false) or off + * (blocked == true) -- this is called only while the transmitter + * is not hard-blocked, but note that the core's view of whether + * the transmitter is hard-blocked might differ from your driver's + * view due to race conditions, so it is possible that it is still + * called at the same time as you are calling rfkill_set_hw_state(). + * This callback must be assigned. + */ +struct rfkill_ops { + void (*poll)(struct rfkill *rfkill, void *data); + void (*query)(struct rfkill *rfkill, void *data); + int (*set_block)(void *data, bool blocked); }; +#if defined(CONFIG_RFKILL) || defined(CONFIG_RFKILL_MODULE) /** - * struct rfkill - rfkill control structure. - * @name: Name of the switch. - * @type: Radio type which the button controls, the value stored - * here should be a value from enum rfkill_type. - * @state: State of the switch, "UNBLOCKED" means radio can operate. - * @mutex: Guards switch state transitions. It serializes callbacks - * and also protects the state. - * @data: Pointer to the RF button drivers private data which will be - * passed along when toggling radio state. - * @toggle_radio(): Mandatory handler to control state of the radio. - * only RFKILL_STATE_SOFT_BLOCKED and RFKILL_STATE_UNBLOCKED are - * valid parameters. - * @get_state(): handler to read current radio state from hardware, - * may be called from atomic context, should return 0 on success. - * Either this handler OR judicious use of rfkill_force_state() is - * MANDATORY for any driver capable of RFKILL_STATE_HARD_BLOCKED. - * @led_trigger: A LED trigger for this button's LED. - * @dev: Device structure integrating the switch into device tree. - * @node: Used to place switch into list of all switches known to the - * the system. - * - * This structure represents a RF switch located on a network device. + * rfkill_alloc - allocate rfkill structure + * @name: name of the struct -- the string is not copied internally + * @parent: device that has rf switch on it + * @type: type of the switch (RFKILL_TYPE_*) + * @ops: rfkill methods + * @ops_data: data passed to each method + * + * This function should be called by the transmitter driver to allocate an + * rfkill structure. Returns %NULL on failure. */ -struct rfkill { - const char *name; - enum rfkill_type type; - - /* the mutex serializes callbacks and also protects - * the state */ - struct mutex mutex; - enum rfkill_state state; - void *data; - int (*toggle_radio)(void *data, enum rfkill_state state); - int (*get_state)(void *data, enum rfkill_state *state); +struct rfkill * __must_check rfkill_alloc(const char *name, + struct device *parent, + const enum rfkill_type type, + const struct rfkill_ops *ops, + void *ops_data); -#ifdef CONFIG_RFKILL_LEDS - struct led_trigger led_trigger; -#endif +/** + * rfkill_register - Register a rfkill structure. + * @rfkill: rfkill structure to be registered + * + * This function should be called by the transmitter driver to register + * the rfkill structure needs to be registered. Before calling this function + * the driver needs to be ready to service method calls from rfkill. + */ +int __must_check rfkill_register(struct rfkill *rfkill); - struct device dev; - struct list_head node; - enum rfkill_state state_for_resume; -}; -#define to_rfkill(d) container_of(d, struct rfkill, dev) +/** + * rfkill_pause_polling(struct rfkill *rfkill) + * + * Pause polling -- say transmitter is off for other reasons. + * NOTE: not necessary for suspend/resume -- in that case the + * core stops polling anyway + */ +void rfkill_pause_polling(struct rfkill *rfkill); -struct rfkill * __must_check rfkill_allocate(struct device *parent, - enum rfkill_type type); -void rfkill_free(struct rfkill *rfkill); -int __must_check rfkill_register(struct rfkill *rfkill); +/** + * rfkill_resume_polling(struct rfkill *rfkill) + * + * Pause polling -- say transmitter is off for other reasons. + * NOTE: not necessary for suspend/resume -- in that case the + * core stops polling anyway + */ +void rfkill_resume_polling(struct rfkill *rfkill); + + +/** + * rfkill_unregister - Unregister a rfkill structure. + * @rfkill: rfkill structure to be unregistered + * + * This function should be called by the network driver during device + * teardown to destroy rfkill structure. Until it returns, the driver + * needs to be able to service method calls. + */ void rfkill_unregister(struct rfkill *rfkill); -int rfkill_force_state(struct rfkill *rfkill, enum rfkill_state state); -int rfkill_set_default(enum rfkill_type type, enum rfkill_state state); +/** + * rfkill_destroy - free rfkill structure + * @rfkill: rfkill structure to be destroyed + * + * Destroys the rfkill structure. + */ +void rfkill_destroy(struct rfkill *rfkill); + +/** + * rfkill_set_hw_state - Set the internal rfkill hardware block state + * @rfkill: pointer to the rfkill class to modify. + * @state: the current hardware block state to set + * + * rfkill drivers that get events when the hard-blocked state changes + * use this function to notify the rfkill core (and through that also + * userspace) of the current state -- they should also use this after + * resume if the state could have changed. + * + * You need not (but may) call this function if poll_state is assigned. + * + * This function can be called in any context, even from within rfkill + * callbacks. + * + * The function returns the combined block state (true if transmitter + * should be blocked) so that drivers need not keep track of the soft + * block state -- which they might not be able to. + */ +bool __must_check rfkill_set_hw_state(struct rfkill *rfkill, bool blocked); + +/** + * rfkill_set_sw_state - Set the internal rfkill software block state + * @rfkill: pointer to the rfkill class to modify. + * @state: the current software block state to set + * + * rfkill drivers that get events when the soft-blocked state changes + * (yes, some platforms directly act on input but allow changing again) + * use this function to notify the rfkill core (and through that also + * userspace) of the current state -- they should also use this after + * resume if the state could have changed. + * + * This function can be called in any context, even from within rfkill + * callbacks. + * + * The function returns the combined block state (true if transmitter + * should be blocked). + */ +bool rfkill_set_sw_state(struct rfkill *rfkill, bool blocked); + +/** + * rfkill_set_states - Set the internal rfkill block states + * @rfkill: pointer to the rfkill class to modify. + * @sw: the current software block state to set + * @hw: the current hardware block state to set + * + * This function can be called in any context, even from within rfkill + * callbacks. + */ +void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw); /** - * rfkill_state_complement - return complementar state - * @state: state to return the complement of + * rfkill_set_global_sw_state - set global sw block default + * @type: rfkill type to set default for + * @blocked: default to set * - * Returns RFKILL_STATE_SOFT_BLOCKED if @state is RFKILL_STATE_UNBLOCKED, - * returns RFKILL_STATE_UNBLOCKED otherwise. + * This function sets the global default -- use at boot if your platform has + * an rfkill switch. If not early enough this call may be ignored. + * + * XXX: instead of ignoring -- how about just updating all currently + * registered drivers? */ -static inline enum rfkill_state rfkill_state_complement(enum rfkill_state state) +void rfkill_set_global_sw_state(const enum rfkill_type type, bool blocked); +#else /* !RFKILL */ +static inline struct rfkill * __must_check +rfkill_alloc(const char *name, + struct device *parent, + const enum rfkill_type type, + const struct rfkill_ops *ops, + void *ops_data) +{ + return ERR_PTR(-ENODEV); +} + +static inline int __must_check rfkill_register(struct rfkill *rfkill) +{ + if (rfkill == ERR_PTR(-ENODEV)) + return 0; + return -EINVAL; +} + +static inline void rfkill_pause_polling(struct rfkill *rfkill) +{ +} + +static inline void rfkill_resume_polling(struct rfkill *rfkill) +{ +} + +static inline void rfkill_unregister(struct rfkill *rfkill) +{ +} + +static inline void rfkill_destroy(struct rfkill *rfkill) +{ +} + +static inline bool rfkill_set_hw_state(struct rfkill *rfkill, bool blocked) +{ + return blocked; +} + +static inline bool rfkill_set_sw_state(struct rfkill *rfkill, bool blocked) +{ + return blocked; +} + +static inline void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw) +{ +} + +static inline void rfkill_set_global_sw_state(const enum rfkill_type type, + bool blocked) { - return (state == RFKILL_STATE_UNBLOCKED) ? - RFKILL_STATE_SOFT_BLOCKED : RFKILL_STATE_UNBLOCKED; } +#endif /* RFKILL || RFKILL_MODULE */ + +#ifdef CONFIG_RFKILL_LEDS /** - * rfkill_get_led_name - Get the LED trigger name for the button's LED. + * rfkill_get_led_trigger_name - Get the LED trigger name for the button's LED. * This function might return a NULL pointer if registering of the - * LED trigger failed. - * Use this as "default_trigger" for the LED. + * LED trigger failed. Use this as "default_trigger" for the LED. */ -static inline char *rfkill_get_led_name(struct rfkill *rfkill) -{ -#ifdef CONFIG_RFKILL_LEDS - return (char *)(rfkill->led_trigger.name); +const char *rfkill_get_led_trigger_name(struct rfkill *rfkill); + +/** + * rfkill_set_led_trigger_name -- set the LED trigger name + * @rfkill: rfkill struct + * @name: LED trigger name + * + * This function sets the LED trigger name of the radio LED + * trigger that rfkill creates. It is optional, but if called + * must be called before rfkill_register() to be effective. + */ +void rfkill_set_led_trigger_name(struct rfkill *rfkill, const char *name); #else +static inline const char *rfkill_get_led_trigger_name(struct rfkill *rfkill) +{ return NULL; -#endif } +static inline void +rfkill_set_led_trigger_name(struct rfkill *rfkill, const char *name) +{ +} +#endif + +#endif /* __KERNEL__ */ + #endif /* RFKILL_H */ diff --git a/include/net/wimax.h b/include/net/wimax.h index 6b3824edb39..2af7bf839f2 100644 --- a/include/net/wimax.h +++ b/include/net/wimax.h @@ -253,7 +253,6 @@ struct net_device; struct genl_info; struct wimax_dev; -struct input_dev; /** * struct wimax_dev - Generic WiMAX device @@ -293,8 +292,8 @@ struct input_dev; * See wimax_reset()'s documentation. * * @name: [fill] A way to identify this device. We need to register a - * name with many subsystems (input for RFKILL, workqueue - * creation, etc). We can't use the network device name as that + * name with many subsystems (rfkill, workqueue creation, etc). + * We can't use the network device name as that * might change and in some instances we don't know it yet (until * we don't call register_netdev()). So we generate an unique one * using the driver name and device bus id, place it here and use @@ -316,9 +315,6 @@ struct input_dev; * * @rfkill: [private] integration into the RF-Kill infrastructure. * - * @rfkill_input: [private] virtual input device to process the - * hardware RF Kill switches. - * * @rf_sw: [private] State of the software radio switch (OFF/ON) * * @rf_hw: [private] State of the hardware radio switch (OFF/ON) diff --git a/net/rfkill/Kconfig b/net/rfkill/Kconfig index 7f807b30cfb..b47f72fae05 100644 --- a/net/rfkill/Kconfig +++ b/net/rfkill/Kconfig @@ -10,22 +10,15 @@ menuconfig RFKILL To compile this driver as a module, choose M here: the module will be called rfkill. -config RFKILL_INPUT - tristate "Input layer to RF switch connector" - depends on RFKILL && INPUT - help - Say Y here if you want kernel automatically toggle state - of RF switches on and off when user presses appropriate - button or a key on the keyboard. Without this module you - need a some kind of userspace application to control - state of the switches. - - To compile this driver as a module, choose M here: the - module will be called rfkill-input. - # LED trigger support config RFKILL_LEDS bool - depends on RFKILL && LEDS_TRIGGERS + depends on RFKILL + depends on LEDS_TRIGGERS = y || RFKILL = LEDS_TRIGGERS default y +config RFKILL_INPUT + bool + depends on RFKILL + depends on INPUT = y || RFKILL = INPUT + default y diff --git a/net/rfkill/Makefile b/net/rfkill/Makefile index b38c430be05..66210535269 100644 --- a/net/rfkill/Makefile +++ b/net/rfkill/Makefile @@ -2,5 +2,6 @@ # Makefile for the RF switch subsystem. # -obj-$(CONFIG_RFKILL) += rfkill.o -obj-$(CONFIG_RFKILL_INPUT) += rfkill-input.o +rfkill-y += core.o +rfkill-$(CONFIG_RFKILL_INPUT) += input.o +obj-$(CONFIG_RFKILL) += rfkill.o diff --git a/net/rfkill/core.c b/net/rfkill/core.c new file mode 100644 index 00000000000..30a6f8d819b --- /dev/null +++ b/net/rfkill/core.c @@ -0,0 +1,896 @@ +/* + * Copyright (C) 2006 - 2007 Ivo van Doorn + * Copyright (C) 2007 Dmitry Torokhov + * Copyright 2009 Johannes Berg + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rfkill.h" + +#define POLL_INTERVAL (5 * HZ) + +#define RFKILL_BLOCK_HW BIT(0) +#define RFKILL_BLOCK_SW BIT(1) +#define RFKILL_BLOCK_SW_PREV BIT(2) +#define RFKILL_BLOCK_ANY (RFKILL_BLOCK_HW |\ + RFKILL_BLOCK_SW |\ + RFKILL_BLOCK_SW_PREV) +#define RFKILL_BLOCK_SW_SETCALL BIT(31) + +struct rfkill { + spinlock_t lock; + + const char *name; + enum rfkill_type type; + + unsigned long state; + + bool registered; + bool suspended; + + const struct rfkill_ops *ops; + void *data; + +#ifdef CONFIG_RFKILL_LEDS + struct led_trigger led_trigger; + const char *ledtrigname; +#endif + + struct device dev; + struct list_head node; + + struct delayed_work poll_work; + struct work_struct uevent_work; + struct work_struct sync_work; +}; +#define to_rfkill(d) container_of(d, struct rfkill, dev) + + + +MODULE_AUTHOR("Ivo van Doorn "); +MODULE_AUTHOR("Johannes Berg "); +MODULE_DESCRIPTION("RF switch support"); +MODULE_LICENSE("GPL"); + + +/* + * The locking here should be made much smarter, we currently have + * a bit of a stupid situation because drivers might want to register + * the rfkill struct under their own lock, and take this lock during + * rfkill method calls -- which will cause an AB-BA deadlock situation. + * + * To fix that, we need to rework this code here to be mostly lock-free + * and only use the mutex for list manipulations, not to protect the + * various other global variables. Then we can avoid holding the mutex + * around driver operations, and all is happy. + */ +static LIST_HEAD(rfkill_list); /* list of registered rf switches */ +static DEFINE_MUTEX(rfkill_global_mutex); + +static unsigned int rfkill_default_state = 1; +module_param_named(default_state, rfkill_default_state, uint, 0444); +MODULE_PARM_DESC(default_state, + "Default initial state for all radio types, 0 = radio off"); + +static struct { + bool cur, def; +} rfkill_global_states[NUM_RFKILL_TYPES]; + +static unsigned long rfkill_states_default_locked; + +static bool rfkill_epo_lock_active; + + +#ifdef CONFIG_RFKILL_LEDS +static void rfkill_led_trigger_event(struct rfkill *rfkill) +{ + struct led_trigger *trigger; + + if (!rfkill->registered) + return; + + trigger = &rfkill->led_trigger; + + if (rfkill->state & RFKILL_BLOCK_ANY) + led_trigger_event(trigger, LED_OFF); + else + led_trigger_event(trigger, LED_FULL); +} + +static void rfkill_led_trigger_activate(struct led_classdev *led) +{ + struct rfkill *rfkill; + + rfkill = container_of(led->trigger, struct rfkill, led_trigger); + + rfkill_led_trigger_event(rfkill); +} + +const char *rfkill_get_led_trigger_name(struct rfkill *rfkill) +{ + return rfkill->led_trigger.name; +} +EXPORT_SYMBOL(rfkill_get_led_trigger_name); + +void rfkill_set_led_trigger_name(struct rfkill *rfkill, const char *name) +{ + BUG_ON(!rfkill); + + rfkill->ledtrigname = name; +} +EXPORT_SYMBOL(rfkill_set_led_trigger_name); + +static int rfkill_led_trigger_register(struct rfkill *rfkill) +{ + rfkill->led_trigger.name = rfkill->ledtrigname + ? : dev_name(&rfkill->dev); + rfkill->led_trigger.activate = rfkill_led_trigger_activate; + return led_trigger_register(&rfkill->led_trigger); +} + +static void rfkill_led_trigger_unregister(struct rfkill *rfkill) +{ + led_trigger_unregister(&rfkill->led_trigger); +} +#else +static void rfkill_led_trigger_event(struct rfkill *rfkill) +{ +} + +static inline int rfkill_led_trigger_register(struct rfkill *rfkill) +{ + return 0; +} + +static inline void rfkill_led_trigger_unregister(struct rfkill *rfkill) +{ +} +#endif /* CONFIG_RFKILL_LEDS */ + +static void rfkill_uevent(struct rfkill *rfkill) +{ + if (!rfkill->registered || rfkill->suspended) + return; + + kobject_uevent(&rfkill->dev.kobj, KOBJ_CHANGE); +} + +static bool __rfkill_set_hw_state(struct rfkill *rfkill, + bool blocked, bool *change) +{ + unsigned long flags; + bool prev, any; + + BUG_ON(!rfkill); + + spin_lock_irqsave(&rfkill->lock, flags); + prev = !!(rfkill->state & RFKILL_BLOCK_HW); + if (blocked) + rfkill->state |= RFKILL_BLOCK_HW; + else + rfkill->state &= ~RFKILL_BLOCK_HW; + *change = prev != blocked; + any = rfkill->state & RFKILL_BLOCK_ANY; + spin_unlock_irqrestore(&rfkill->lock, flags); + + rfkill_led_trigger_event(rfkill); + + return any; +} + +/** + * rfkill_set_block - wrapper for set_block method + * + * @rfkill: the rfkill struct to use + * @blocked: the new software state + * + * Calls the set_block method (when applicable) and handles notifications + * etc. as well. + */ +static void rfkill_set_block(struct rfkill *rfkill, bool blocked) +{ + unsigned long flags; + int err; + + /* + * Some platforms (...!) generate input events which affect the + * _hard_ kill state -- whenever something tries to change the + * current software state query the hardware state too. + */ + if (rfkill->ops->query) + rfkill->ops->query(rfkill, rfkill->data); + + spin_lock_irqsave(&rfkill->lock, flags); + if (rfkill->state & RFKILL_BLOCK_SW) + rfkill->state |= RFKILL_BLOCK_SW_PREV; + else + rfkill->state &= ~RFKILL_BLOCK_SW_PREV; + + if (blocked) + rfkill->state |= RFKILL_BLOCK_SW; + else + rfkill->state &= ~RFKILL_BLOCK_SW; + + rfkill->state |= RFKILL_BLOCK_SW_SETCALL; + spin_unlock_irqrestore(&rfkill->lock, flags); + + if (unlikely(rfkill->dev.power.power_state.event & PM_EVENT_SLEEP)) + return; + + err = rfkill->ops->set_block(rfkill->data, blocked); + + spin_lock_irqsave(&rfkill->lock, flags); + if (err) { + /* + * Failed -- reset status to _prev, this may be different + * from what set set _PREV to earlier in this function + * if rfkill_set_sw_state was invoked. + */ + if (rfkill->state & RFKILL_BLOCK_SW_PREV) + rfkill->state |= RFKILL_BLOCK_SW; + else + rfkill->state &= ~RFKILL_BLOCK_SW; + } + rfkill->state &= ~RFKILL_BLOCK_SW_SETCALL; + rfkill->state &= ~RFKILL_BLOCK_SW_PREV; + spin_unlock_irqrestore(&rfkill->lock, flags); + + rfkill_led_trigger_event(rfkill); + rfkill_uevent(rfkill); +} + +/** + * __rfkill_switch_all - Toggle state of all switches of given type + * @type: type of interfaces to be affected + * @state: the new state + * + * This function sets the state of all switches of given type, + * unless a specific switch is claimed by userspace (in which case, + * that switch is left alone) or suspended. + * + * Caller must have acquired rfkill_global_mutex. + */ +static void __rfkill_switch_all(const enum rfkill_type type, bool blocked) +{ + struct rfkill *rfkill; + + rfkill_global_states[type].cur = blocked; + list_for_each_entry(rfkill, &rfkill_list, node) { + if (rfkill->type != type) + continue; + + rfkill_set_block(rfkill, blocked); + } +} + +/** + * rfkill_switch_all - Toggle state of all switches of given type + * @type: type of interfaces to be affected + * @state: the new state + * + * Acquires rfkill_global_mutex and calls __rfkill_switch_all(@type, @state). + * Please refer to __rfkill_switch_all() for details. + * + * Does nothing if the EPO lock is active. + */ +void rfkill_switch_all(enum rfkill_type type, bool blocked) +{ + mutex_lock(&rfkill_global_mutex); + + if (!rfkill_epo_lock_active) + __rfkill_switch_all(type, blocked); + + mutex_unlock(&rfkill_global_mutex); +} + +/** + * rfkill_epo - emergency power off all transmitters + * + * This kicks all non-suspended rfkill devices to RFKILL_STATE_SOFT_BLOCKED, + * ignoring everything in its path but rfkill_global_mutex and rfkill->mutex. + * + * The global state before the EPO is saved and can be restored later + * using rfkill_restore_states(). + */ +void rfkill_epo(void) +{ + struct rfkill *rfkill; + int i; + + mutex_lock(&rfkill_global_mutex); + + rfkill_epo_lock_active = true; + list_for_each_entry(rfkill, &rfkill_list, node) + rfkill_set_block(rfkill, true); + + for (i = 0; i < NUM_RFKILL_TYPES; i++) { + rfkill_global_states[i].def = rfkill_global_states[i].cur; + rfkill_global_states[i].cur = true; + } + mutex_unlock(&rfkill_global_mutex); +} + +/** + * rfkill_restore_states - restore global states + * + * Restore (and sync switches to) the global state from the + * states in rfkill_default_states. This can undo the effects of + * a call to rfkill_epo(). + */ +void rfkill_restore_states(void) +{ + int i; + + mutex_lock(&rfkill_global_mutex); + + rfkill_epo_lock_active = false; + for (i = 0; i < NUM_RFKILL_TYPES; i++) + __rfkill_switch_all(i, rfkill_global_states[i].def); + mutex_unlock(&rfkill_global_mutex); +} + +/** + * rfkill_remove_epo_lock - unlock state changes + * + * Used by rfkill-input manually unlock state changes, when + * the EPO switch is deactivated. + */ +void rfkill_remove_epo_lock(void) +{ + mutex_lock(&rfkill_global_mutex); + rfkill_epo_lock_active = false; + mutex_unlock(&rfkill_global_mutex); +} + +/** + * rfkill_is_epo_lock_active - returns true EPO is active + * + * Returns 0 (false) if there is NOT an active EPO contidion, + * and 1 (true) if there is an active EPO contition, which + * locks all radios in one of the BLOCKED states. + * + * Can be called in atomic context. + */ +bool rfkill_is_epo_lock_active(void) +{ + return rfkill_epo_lock_active; +} + +/** + * rfkill_get_global_sw_state - returns global state for a type + * @type: the type to get the global state of + * + * Returns the current global state for a given wireless + * device type. + */ +bool rfkill_get_global_sw_state(const enum rfkill_type type) +{ + return rfkill_global_states[type].cur; +} + +void rfkill_set_global_sw_state(const enum rfkill_type type, bool blocked) +{ + mutex_lock(&rfkill_global_mutex); + + /* don't allow unblock when epo */ + if (rfkill_epo_lock_active && !blocked) + goto out; + + /* too late */ + if (rfkill_states_default_locked & BIT(type)) + goto out; + + rfkill_states_default_locked |= BIT(type); + + rfkill_global_states[type].cur = blocked; + rfkill_global_states[type].def = blocked; + out: + mutex_unlock(&rfkill_global_mutex); +} +EXPORT_SYMBOL(rfkill_set_global_sw_state); + + +bool rfkill_set_hw_state(struct rfkill *rfkill, bool blocked) +{ + bool ret, change; + + ret = __rfkill_set_hw_state(rfkill, blocked, &change); + + if (!rfkill->registered) + return ret; + + if (change) + schedule_work(&rfkill->uevent_work); + + return ret; +} +EXPORT_SYMBOL(rfkill_set_hw_state); + +static void __rfkill_set_sw_state(struct rfkill *rfkill, bool blocked) +{ + u32 bit = RFKILL_BLOCK_SW; + + /* if in a ops->set_block right now, use other bit */ + if (rfkill->state & RFKILL_BLOCK_SW_SETCALL) + bit = RFKILL_BLOCK_SW_PREV; + + if (blocked) + rfkill->state |= bit; + else + rfkill->state &= ~bit; +} + +bool rfkill_set_sw_state(struct rfkill *rfkill, bool blocked) +{ + unsigned long flags; + bool prev, hwblock; + + BUG_ON(!rfkill); + + spin_lock_irqsave(&rfkill->lock, flags); + prev = !!(rfkill->state & RFKILL_BLOCK_SW); + __rfkill_set_sw_state(rfkill, blocked); + hwblock = !!(rfkill->state & RFKILL_BLOCK_HW); + blocked = blocked || hwblock; + spin_unlock_irqrestore(&rfkill->lock, flags); + + if (!rfkill->registered) + return blocked; + + if (prev != blocked && !hwblock) + schedule_work(&rfkill->uevent_work); + + rfkill_led_trigger_event(rfkill); + + return blocked; +} +EXPORT_SYMBOL(rfkill_set_sw_state); + +void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw) +{ + unsigned long flags; + bool swprev, hwprev; + + BUG_ON(!rfkill); + + spin_lock_irqsave(&rfkill->lock, flags); + + /* + * No need to care about prev/setblock ... this is for uevent only + * and that will get triggered by rfkill_set_block anyway. + */ + swprev = !!(rfkill->state & RFKILL_BLOCK_SW); + hwprev = !!(rfkill->state & RFKILL_BLOCK_HW); + __rfkill_set_sw_state(rfkill, sw); + + spin_unlock_irqrestore(&rfkill->lock, flags); + + if (!rfkill->registered) + return; + + if (swprev != sw || hwprev != hw) + schedule_work(&rfkill->uevent_work); + + rfkill_led_trigger_event(rfkill); +} +EXPORT_SYMBOL(rfkill_set_states); + +static ssize_t rfkill_name_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct rfkill *rfkill = to_rfkill(dev); + + return sprintf(buf, "%s\n", rfkill->name); +} + +static const char *rfkill_get_type_str(enum rfkill_type type) +{ + switch (type) { + case RFKILL_TYPE_WLAN: + return "wlan"; + case RFKILL_TYPE_BLUETOOTH: + return "bluetooth"; + case RFKILL_TYPE_UWB: + return "ultrawideband"; + case RFKILL_TYPE_WIMAX: + return "wimax"; + case RFKILL_TYPE_WWAN: + return "wwan"; + default: + BUG(); + } + + BUILD_BUG_ON(NUM_RFKILL_TYPES != RFKILL_TYPE_WWAN + 1); +} + +static ssize_t rfkill_type_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct rfkill *rfkill = to_rfkill(dev); + + return sprintf(buf, "%s\n", rfkill_get_type_str(rfkill->type)); +} + +static u8 user_state_from_blocked(unsigned long state) +{ + if (state & RFKILL_BLOCK_HW) + return RFKILL_USER_STATE_HARD_BLOCKED; + if (state & RFKILL_BLOCK_SW) + return RFKILL_USER_STATE_SOFT_BLOCKED; + + return RFKILL_USER_STATE_UNBLOCKED; +} + +static ssize_t rfkill_state_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct rfkill *rfkill = to_rfkill(dev); + unsigned long flags; + u32 state; + + spin_lock_irqsave(&rfkill->lock, flags); + state = rfkill->state; + spin_unlock_irqrestore(&rfkill->lock, flags); + + return sprintf(buf, "%d\n", user_state_from_blocked(state)); +} + +static ssize_t rfkill_state_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + /* + * The intention was that userspace can only take control over + * a given device when/if rfkill-input doesn't control it due + * to user_claim. Since user_claim is currently unsupported, + * we never support changing the state from userspace -- this + * can be implemented again later. + */ + + return -EPERM; +} + +static ssize_t rfkill_claim_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", 0); +} + +static ssize_t rfkill_claim_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return -EOPNOTSUPP; +} + +static struct device_attribute rfkill_dev_attrs[] = { + __ATTR(name, S_IRUGO, rfkill_name_show, NULL), + __ATTR(type, S_IRUGO, rfkill_type_show, NULL), + __ATTR(state, S_IRUGO|S_IWUSR, rfkill_state_show, rfkill_state_store), + __ATTR(claim, S_IRUGO|S_IWUSR, rfkill_claim_show, rfkill_claim_store), + __ATTR_NULL +}; + +static void rfkill_release(struct device *dev) +{ + struct rfkill *rfkill = to_rfkill(dev); + + kfree(rfkill); +} + +static int rfkill_dev_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + struct rfkill *rfkill = to_rfkill(dev); + unsigned long flags; + u32 state; + int error; + + error = add_uevent_var(env, "RFKILL_NAME=%s", rfkill->name); + if (error) + return error; + error = add_uevent_var(env, "RFKILL_TYPE=%s", + rfkill_get_type_str(rfkill->type)); + if (error) + return error; + spin_lock_irqsave(&rfkill->lock, flags); + state = rfkill->state; + spin_unlock_irqrestore(&rfkill->lock, flags); + error = add_uevent_var(env, "RFKILL_STATE=%d", + user_state_from_blocked(state)); + return error; +} + +void rfkill_pause_polling(struct rfkill *rfkill) +{ + BUG_ON(!rfkill); + + if (!rfkill->ops->poll) + return; + + cancel_delayed_work_sync(&rfkill->poll_work); +} +EXPORT_SYMBOL(rfkill_pause_polling); + +void rfkill_resume_polling(struct rfkill *rfkill) +{ + BUG_ON(!rfkill); + + if (!rfkill->ops->poll) + return; + + schedule_work(&rfkill->poll_work.work); +} +EXPORT_SYMBOL(rfkill_resume_polling); + +static int rfkill_suspend(struct device *dev, pm_message_t state) +{ + struct rfkill *rfkill = to_rfkill(dev); + + rfkill_pause_polling(rfkill); + + rfkill->suspended = true; + + return 0; +} + +static int rfkill_resume(struct device *dev) +{ + struct rfkill *rfkill = to_rfkill(dev); + bool cur; + + mutex_lock(&rfkill_global_mutex); + cur = rfkill_global_states[rfkill->type].cur; + rfkill_set_block(rfkill, cur); + mutex_unlock(&rfkill_global_mutex); + + rfkill->suspended = false; + + schedule_work(&rfkill->uevent_work); + + rfkill_resume_polling(rfkill); + + return 0; +} + +static struct class rfkill_class = { + .name = "rfkill", + .dev_release = rfkill_release, + .dev_attrs = rfkill_dev_attrs, + .dev_uevent = rfkill_dev_uevent, + .suspend = rfkill_suspend, + .resume = rfkill_resume, +}; + + +struct rfkill * __must_check rfkill_alloc(const char *name, + struct device *parent, + const enum rfkill_type type, + const struct rfkill_ops *ops, + void *ops_data) +{ + struct rfkill *rfkill; + struct device *dev; + + if (WARN_ON(!ops)) + return NULL; + + if (WARN_ON(!ops->set_block)) + return NULL; + + if (WARN_ON(!name)) + return NULL; + + if (WARN_ON(type >= NUM_RFKILL_TYPES)) + return NULL; + + rfkill = kzalloc(sizeof(*rfkill), GFP_KERNEL); + if (!rfkill) + return NULL; + + spin_lock_init(&rfkill->lock); + INIT_LIST_HEAD(&rfkill->node); + rfkill->type = type; + rfkill->name = name; + rfkill->ops = ops; + rfkill->data = ops_data; + + dev = &rfkill->dev; + dev->class = &rfkill_class; + dev->parent = parent; + device_initialize(dev); + + return rfkill; +} +EXPORT_SYMBOL(rfkill_alloc); + +static void rfkill_poll(struct work_struct *work) +{ + struct rfkill *rfkill; + + rfkill = container_of(work, struct rfkill, poll_work.work); + + /* + * Poll hardware state -- driver will use one of the + * rfkill_set{,_hw,_sw}_state functions and use its + * return value to update the current status. + */ + rfkill->ops->poll(rfkill, rfkill->data); + + schedule_delayed_work(&rfkill->poll_work, + round_jiffies_relative(POLL_INTERVAL)); +} + +static void rfkill_uevent_work(struct work_struct *work) +{ + struct rfkill *rfkill; + + rfkill = container_of(work, struct rfkill, uevent_work); + + rfkill_uevent(rfkill); +} + +static void rfkill_sync_work(struct work_struct *work) +{ + struct rfkill *rfkill; + bool cur; + + rfkill = container_of(work, struct rfkill, sync_work); + + mutex_lock(&rfkill_global_mutex); + cur = rfkill_global_states[rfkill->type].cur; + rfkill_set_block(rfkill, cur); + mutex_unlock(&rfkill_global_mutex); +} + +int __must_check rfkill_register(struct rfkill *rfkill) +{ + static unsigned long rfkill_no; + struct device *dev = &rfkill->dev; + int error; + + BUG_ON(!rfkill); + + mutex_lock(&rfkill_global_mutex); + + if (rfkill->registered) { + error = -EALREADY; + goto unlock; + } + + dev_set_name(dev, "rfkill%lu", rfkill_no); + rfkill_no++; + + if (!(rfkill_states_default_locked & BIT(rfkill->type))) { + /* first of its kind */ + BUILD_BUG_ON(NUM_RFKILL_TYPES > + sizeof(rfkill_states_default_locked) * 8); + rfkill_states_default_locked |= BIT(rfkill->type); + rfkill_global_states[rfkill->type].cur = + rfkill_global_states[rfkill->type].def; + } + + list_add_tail(&rfkill->node, &rfkill_list); + + error = device_add(dev); + if (error) + goto remove; + + error = rfkill_led_trigger_register(rfkill); + if (error) + goto devdel; + + rfkill->registered = true; + + if (rfkill->ops->poll) { + INIT_DELAYED_WORK(&rfkill->poll_work, rfkill_poll); + schedule_delayed_work(&rfkill->poll_work, + round_jiffies_relative(POLL_INTERVAL)); + } + + INIT_WORK(&rfkill->uevent_work, rfkill_uevent_work); + + INIT_WORK(&rfkill->sync_work, rfkill_sync_work); + schedule_work(&rfkill->sync_work); + + mutex_unlock(&rfkill_global_mutex); + return 0; + + devdel: + device_del(&rfkill->dev); + remove: + list_del_init(&rfkill->node); + unlock: + mutex_unlock(&rfkill_global_mutex); + return error; +} +EXPORT_SYMBOL(rfkill_register); + +void rfkill_unregister(struct rfkill *rfkill) +{ + BUG_ON(!rfkill); + + if (rfkill->ops->poll) + cancel_delayed_work_sync(&rfkill->poll_work); + + cancel_work_sync(&rfkill->uevent_work); + cancel_work_sync(&rfkill->sync_work); + + rfkill->registered = false; + + device_del(&rfkill->dev); + + mutex_lock(&rfkill_global_mutex); + list_del_init(&rfkill->node); + mutex_unlock(&rfkill_global_mutex); + + rfkill_led_trigger_unregister(rfkill); +} +EXPORT_SYMBOL(rfkill_unregister); + +void rfkill_destroy(struct rfkill *rfkill) +{ + if (rfkill) + put_device(&rfkill->dev); +} +EXPORT_SYMBOL(rfkill_destroy); + + +static int __init rfkill_init(void) +{ + int error; + int i; + + for (i = 0; i < NUM_RFKILL_TYPES; i++) + rfkill_global_states[i].def = !rfkill_default_state; + + error = class_register(&rfkill_class); + if (error) + goto out; + +#ifdef CONFIG_RFKILL_INPUT + error = rfkill_handler_init(); + if (error) + class_unregister(&rfkill_class); +#endif + + out: + return error; +} +subsys_initcall(rfkill_init); + +static void __exit rfkill_exit(void) +{ +#ifdef CONFIG_RFKILL_INPUT + rfkill_handler_exit(); +#endif + class_unregister(&rfkill_class); +} +module_exit(rfkill_exit); diff --git a/net/rfkill/input.c b/net/rfkill/input.c new file mode 100644 index 00000000000..a7295ad5f9c --- /dev/null +++ b/net/rfkill/input.c @@ -0,0 +1,342 @@ +/* + * Input layer to RF Kill interface connector + * + * Copyright (c) 2007 Dmitry Torokhov + * Copyright 2009 Johannes Berg + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * If you ever run into a situation in which you have a SW_ type rfkill + * input device, then you can revive code that was removed in the patch + * "rfkill-input: remove unused code". + */ + +#include +#include +#include +#include +#include +#include + +#include "rfkill.h" + +enum rfkill_input_master_mode { + RFKILL_INPUT_MASTER_UNLOCK = 0, + RFKILL_INPUT_MASTER_RESTORE = 1, + RFKILL_INPUT_MASTER_UNBLOCKALL = 2, + NUM_RFKILL_INPUT_MASTER_MODES +}; + +/* Delay (in ms) between consecutive switch ops */ +#define RFKILL_OPS_DELAY 200 + +static enum rfkill_input_master_mode rfkill_master_switch_mode = + RFKILL_INPUT_MASTER_UNBLOCKALL; +module_param_named(master_switch_mode, rfkill_master_switch_mode, uint, 0); +MODULE_PARM_DESC(master_switch_mode, + "SW_RFKILL_ALL ON should: 0=do nothing (only unlock); 1=restore; 2=unblock all"); + +static spinlock_t rfkill_op_lock; +static bool rfkill_op_pending; +static unsigned long rfkill_sw_pending[BITS_TO_LONGS(NUM_RFKILL_TYPES)]; +static unsigned long rfkill_sw_state[BITS_TO_LONGS(NUM_RFKILL_TYPES)]; + +enum rfkill_sched_op { + RFKILL_GLOBAL_OP_EPO = 0, + RFKILL_GLOBAL_OP_RESTORE, + RFKILL_GLOBAL_OP_UNLOCK, + RFKILL_GLOBAL_OP_UNBLOCK, +}; + +static enum rfkill_sched_op rfkill_master_switch_op; +static enum rfkill_sched_op rfkill_op; + +static void __rfkill_handle_global_op(enum rfkill_sched_op op) +{ + unsigned int i; + + switch (op) { + case RFKILL_GLOBAL_OP_EPO: + rfkill_epo(); + break; + case RFKILL_GLOBAL_OP_RESTORE: + rfkill_restore_states(); + break; + case RFKILL_GLOBAL_OP_UNLOCK: + rfkill_remove_epo_lock(); + break; + case RFKILL_GLOBAL_OP_UNBLOCK: + rfkill_remove_epo_lock(); + for (i = 0; i < NUM_RFKILL_TYPES; i++) + rfkill_switch_all(i, false); + break; + default: + /* memory corruption or bug, fail safely */ + rfkill_epo(); + WARN(1, "Unknown requested operation %d! " + "rfkill Emergency Power Off activated\n", + op); + } +} + +static void __rfkill_handle_normal_op(const enum rfkill_type type, + const bool complement) +{ + bool blocked; + + blocked = rfkill_get_global_sw_state(type); + if (complement) + blocked = !blocked; + + rfkill_switch_all(type, blocked); +} + +static void rfkill_op_handler(struct work_struct *work) +{ + unsigned int i; + bool c; + + spin_lock_irq(&rfkill_op_lock); + do { + if (rfkill_op_pending) { + enum rfkill_sched_op op = rfkill_op; + rfkill_op_pending = false; + memset(rfkill_sw_pending, 0, + sizeof(rfkill_sw_pending)); + spin_unlock_irq(&rfkill_op_lock); + + __rfkill_handle_global_op(op); + + spin_lock_irq(&rfkill_op_lock); + + /* + * handle global ops first -- during unlocked period + * we might have gotten a new global op. + */ + if (rfkill_op_pending) + continue; + } + + if (rfkill_is_epo_lock_active()) + continue; + + for (i = 0; i < NUM_RFKILL_TYPES; i++) { + if (__test_and_clear_bit(i, rfkill_sw_pending)) { + c = __test_and_clear_bit(i, rfkill_sw_state); + spin_unlock_irq(&rfkill_op_lock); + + __rfkill_handle_normal_op(i, c); + + spin_lock_irq(&rfkill_op_lock); + } + } + } while (rfkill_op_pending); + spin_unlock_irq(&rfkill_op_lock); +} + +static DECLARE_DELAYED_WORK(rfkill_op_work, rfkill_op_handler); +static unsigned long rfkill_last_scheduled; + +static unsigned long rfkill_ratelimit(const unsigned long last) +{ + const unsigned long delay = msecs_to_jiffies(RFKILL_OPS_DELAY); + return (time_after(jiffies, last + delay)) ? 0 : delay; +} + +static void rfkill_schedule_ratelimited(void) +{ + if (delayed_work_pending(&rfkill_op_work)) + return; + schedule_delayed_work(&rfkill_op_work, + rfkill_ratelimit(rfkill_last_scheduled)); + rfkill_last_scheduled = jiffies; +} + +static void rfkill_schedule_global_op(enum rfkill_sched_op op) +{ + unsigned long flags; + + spin_lock_irqsave(&rfkill_op_lock, flags); + rfkill_op = op; + rfkill_op_pending = true; + if (op == RFKILL_GLOBAL_OP_EPO && !rfkill_is_epo_lock_active()) { + /* bypass the limiter for EPO */ + cancel_delayed_work(&rfkill_op_work); + schedule_delayed_work(&rfkill_op_work, 0); + rfkill_last_scheduled = jiffies; + } else + rfkill_schedule_ratelimited(); + spin_unlock_irqrestore(&rfkill_op_lock, flags); +} + +static void rfkill_schedule_toggle(enum rfkill_type type) +{ + unsigned long flags; + + if (rfkill_is_epo_lock_active()) + return; + + spin_lock_irqsave(&rfkill_op_lock, flags); + if (!rfkill_op_pending) { + __set_bit(type, rfkill_sw_pending); + __change_bit(type, rfkill_sw_state); + rfkill_schedule_ratelimited(); + } + spin_unlock_irqrestore(&rfkill_op_lock, flags); +} + +static void rfkill_schedule_evsw_rfkillall(int state) +{ + if (state) + rfkill_schedule_global_op(rfkill_master_switch_op); + else + rfkill_schedule_global_op(RFKILL_GLOBAL_OP_EPO); +} + +static void rfkill_event(struct input_handle *handle, unsigned int type, + unsigned int code, int data) +{ + if (type == EV_KEY && data == 1) { + switch (code) { + case KEY_WLAN: + rfkill_schedule_toggle(RFKILL_TYPE_WLAN); + break; + case KEY_BLUETOOTH: + rfkill_schedule_toggle(RFKILL_TYPE_BLUETOOTH); + break; + case KEY_UWB: + rfkill_schedule_toggle(RFKILL_TYPE_UWB); + break; + case KEY_WIMAX: + rfkill_schedule_toggle(RFKILL_TYPE_WIMAX); + break; + } + } else if (type == EV_SW && code == SW_RFKILL_ALL) + rfkill_schedule_evsw_rfkillall(data); +} + +static int rfkill_connect(struct input_handler *handler, struct input_dev *dev, + const struct input_device_id *id) +{ + struct input_handle *handle; + int error; + + handle = kzalloc(sizeof(struct input_handle), GFP_KERNEL); + if (!handle) + return -ENOMEM; + + handle->dev = dev; + handle->handler = handler; + handle->name = "rfkill"; + + /* causes rfkill_start() to be called */ + error = input_register_handle(handle); + if (error) + goto err_free_handle; + + error = input_open_device(handle); + if (error) + goto err_unregister_handle; + + return 0; + + err_unregister_handle: + input_unregister_handle(handle); + err_free_handle: + kfree(handle); + return error; +} + +static void rfkill_start(struct input_handle *handle) +{ + /* + * Take event_lock to guard against configuration changes, we + * should be able to deal with concurrency with rfkill_event() + * just fine (which event_lock will also avoid). + */ + spin_lock_irq(&handle->dev->event_lock); + + if (test_bit(EV_SW, handle->dev->evbit) && + test_bit(SW_RFKILL_ALL, handle->dev->swbit)) + rfkill_schedule_evsw_rfkillall(test_bit(SW_RFKILL_ALL, + handle->dev->sw)); + + spin_unlock_irq(&handle->dev->event_lock); +} + +static void rfkill_disconnect(struct input_handle *handle) +{ + input_close_device(handle); + input_unregister_handle(handle); + kfree(handle); +} + +static const struct input_device_id rfkill_ids[] = { + { + .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT, + .evbit = { BIT_MASK(EV_KEY) }, + .keybit = { [BIT_WORD(KEY_WLAN)] = BIT_MASK(KEY_WLAN) }, + }, + { + .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT, + .evbit = { BIT_MASK(EV_KEY) }, + .keybit = { [BIT_WORD(KEY_BLUETOOTH)] = BIT_MASK(KEY_BLUETOOTH) }, + }, + { + .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT, + .evbit = { BIT_MASK(EV_KEY) }, + .keybit = { [BIT_WORD(KEY_UWB)] = BIT_MASK(KEY_UWB) }, + }, + { + .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT, + .evbit = { BIT_MASK(EV_KEY) }, + .keybit = { [BIT_WORD(KEY_WIMAX)] = BIT_MASK(KEY_WIMAX) }, + }, + { + .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_SWBIT, + .evbit = { BIT(EV_SW) }, + .swbit = { [BIT_WORD(SW_RFKILL_ALL)] = BIT_MASK(SW_RFKILL_ALL) }, + }, + { } +}; + +static struct input_handler rfkill_handler = { + .name = "rfkill", + .event = rfkill_event, + .connect = rfkill_connect, + .start = rfkill_start, + .disconnect = rfkill_disconnect, + .id_table = rfkill_ids, +}; + +int __init rfkill_handler_init(void) +{ + switch (rfkill_master_switch_mode) { + case RFKILL_INPUT_MASTER_UNBLOCKALL: + rfkill_master_switch_op = RFKILL_GLOBAL_OP_UNBLOCK; + break; + case RFKILL_INPUT_MASTER_RESTORE: + rfkill_master_switch_op = RFKILL_GLOBAL_OP_RESTORE; + break; + case RFKILL_INPUT_MASTER_UNLOCK: + rfkill_master_switch_op = RFKILL_GLOBAL_OP_UNLOCK; + break; + default: + return -EINVAL; + } + + spin_lock_init(&rfkill_op_lock); + + /* Avoid delay at first schedule */ + rfkill_last_scheduled = + jiffies - msecs_to_jiffies(RFKILL_OPS_DELAY) - 1; + return input_register_handler(&rfkill_handler); +} + +void __exit rfkill_handler_exit(void) +{ + input_unregister_handler(&rfkill_handler); + cancel_delayed_work_sync(&rfkill_op_work); +} diff --git a/net/rfkill/rfkill-input.c b/net/rfkill/rfkill-input.c deleted file mode 100644 index 60a34f3b5f6..00000000000 --- a/net/rfkill/rfkill-input.c +++ /dev/null @@ -1,390 +0,0 @@ -/* - * Input layer to RF Kill interface connector - * - * Copyright (c) 2007 Dmitry Torokhov - */ - -/* - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "rfkill-input.h" - -MODULE_AUTHOR("Dmitry Torokhov "); -MODULE_DESCRIPTION("Input layer to RF switch connector"); -MODULE_LICENSE("GPL"); - -enum rfkill_input_master_mode { - RFKILL_INPUT_MASTER_DONOTHING = 0, - RFKILL_INPUT_MASTER_RESTORE = 1, - RFKILL_INPUT_MASTER_UNBLOCKALL = 2, - RFKILL_INPUT_MASTER_MAX, /* marker */ -}; - -/* Delay (in ms) between consecutive switch ops */ -#define RFKILL_OPS_DELAY 200 - -static enum rfkill_input_master_mode rfkill_master_switch_mode = - RFKILL_INPUT_MASTER_UNBLOCKALL; -module_param_named(master_switch_mode, rfkill_master_switch_mode, uint, 0); -MODULE_PARM_DESC(master_switch_mode, - "SW_RFKILL_ALL ON should: 0=do nothing; 1=restore; 2=unblock all"); - -enum rfkill_global_sched_op { - RFKILL_GLOBAL_OP_EPO = 0, - RFKILL_GLOBAL_OP_RESTORE, - RFKILL_GLOBAL_OP_UNLOCK, - RFKILL_GLOBAL_OP_UNBLOCK, -}; - -struct rfkill_task { - struct delayed_work dwork; - - /* ensures that task is serialized */ - struct mutex mutex; - - /* protects everything below */ - spinlock_t lock; - - /* pending regular switch operations (1=pending) */ - unsigned long sw_pending[BITS_TO_LONGS(RFKILL_TYPE_MAX)]; - - /* should the state be complemented (1=yes) */ - unsigned long sw_togglestate[BITS_TO_LONGS(RFKILL_TYPE_MAX)]; - - bool global_op_pending; - enum rfkill_global_sched_op op; - - /* last time it was scheduled */ - unsigned long last_scheduled; -}; - -static void __rfkill_handle_global_op(enum rfkill_global_sched_op op) -{ - unsigned int i; - - switch (op) { - case RFKILL_GLOBAL_OP_EPO: - rfkill_epo(); - break; - case RFKILL_GLOBAL_OP_RESTORE: - rfkill_restore_states(); - break; - case RFKILL_GLOBAL_OP_UNLOCK: - rfkill_remove_epo_lock(); - break; - case RFKILL_GLOBAL_OP_UNBLOCK: - rfkill_remove_epo_lock(); - for (i = 0; i < RFKILL_TYPE_MAX; i++) - rfkill_switch_all(i, RFKILL_STATE_UNBLOCKED); - break; - default: - /* memory corruption or bug, fail safely */ - rfkill_epo(); - WARN(1, "Unknown requested operation %d! " - "rfkill Emergency Power Off activated\n", - op); - } -} - -static void __rfkill_handle_normal_op(const enum rfkill_type type, - const bool c) -{ - enum rfkill_state state; - - state = rfkill_get_global_state(type); - if (c) - state = rfkill_state_complement(state); - - rfkill_switch_all(type, state); -} - -static void rfkill_task_handler(struct work_struct *work) -{ - struct rfkill_task *task = container_of(work, - struct rfkill_task, dwork.work); - bool doit = true; - - mutex_lock(&task->mutex); - - spin_lock_irq(&task->lock); - while (doit) { - if (task->global_op_pending) { - enum rfkill_global_sched_op op = task->op; - task->global_op_pending = false; - memset(task->sw_pending, 0, sizeof(task->sw_pending)); - spin_unlock_irq(&task->lock); - - __rfkill_handle_global_op(op); - - /* make sure we do at least one pass with - * !task->global_op_pending */ - spin_lock_irq(&task->lock); - continue; - } else if (!rfkill_is_epo_lock_active()) { - unsigned int i = 0; - - while (!task->global_op_pending && - i < RFKILL_TYPE_MAX) { - if (test_and_clear_bit(i, task->sw_pending)) { - bool c; - c = test_and_clear_bit(i, - task->sw_togglestate); - spin_unlock_irq(&task->lock); - - __rfkill_handle_normal_op(i, c); - - spin_lock_irq(&task->lock); - } - i++; - } - } - doit = task->global_op_pending; - } - spin_unlock_irq(&task->lock); - - mutex_unlock(&task->mutex); -} - -static struct rfkill_task rfkill_task = { - .dwork = __DELAYED_WORK_INITIALIZER(rfkill_task.dwork, - rfkill_task_handler), - .mutex = __MUTEX_INITIALIZER(rfkill_task.mutex), - .lock = __SPIN_LOCK_UNLOCKED(rfkill_task.lock), -}; - -static unsigned long rfkill_ratelimit(const unsigned long last) -{ - const unsigned long delay = msecs_to_jiffies(RFKILL_OPS_DELAY); - return (time_after(jiffies, last + delay)) ? 0 : delay; -} - -static void rfkill_schedule_ratelimited(void) -{ - if (!delayed_work_pending(&rfkill_task.dwork)) { - schedule_delayed_work(&rfkill_task.dwork, - rfkill_ratelimit(rfkill_task.last_scheduled)); - rfkill_task.last_scheduled = jiffies; - } -} - -static void rfkill_schedule_global_op(enum rfkill_global_sched_op op) -{ - unsigned long flags; - - spin_lock_irqsave(&rfkill_task.lock, flags); - rfkill_task.op = op; - rfkill_task.global_op_pending = true; - if (op == RFKILL_GLOBAL_OP_EPO && !rfkill_is_epo_lock_active()) { - /* bypass the limiter for EPO */ - cancel_delayed_work(&rfkill_task.dwork); - schedule_delayed_work(&rfkill_task.dwork, 0); - rfkill_task.last_scheduled = jiffies; - } else - rfkill_schedule_ratelimited(); - spin_unlock_irqrestore(&rfkill_task.lock, flags); -} - -static void rfkill_schedule_toggle(enum rfkill_type type) -{ - unsigned long flags; - - if (rfkill_is_epo_lock_active()) - return; - - spin_lock_irqsave(&rfkill_task.lock, flags); - if (!rfkill_task.global_op_pending) { - set_bit(type, rfkill_task.sw_pending); - change_bit(type, rfkill_task.sw_togglestate); - rfkill_schedule_ratelimited(); - } - spin_unlock_irqrestore(&rfkill_task.lock, flags); -} - -static void rfkill_schedule_evsw_rfkillall(int state) -{ - if (state) { - switch (rfkill_master_switch_mode) { - case RFKILL_INPUT_MASTER_UNBLOCKALL: - rfkill_schedule_global_op(RFKILL_GLOBAL_OP_UNBLOCK); - break; - case RFKILL_INPUT_MASTER_RESTORE: - rfkill_schedule_global_op(RFKILL_GLOBAL_OP_RESTORE); - break; - case RFKILL_INPUT_MASTER_DONOTHING: - rfkill_schedule_global_op(RFKILL_GLOBAL_OP_UNLOCK); - break; - default: - /* memory corruption or driver bug! fail safely */ - rfkill_schedule_global_op(RFKILL_GLOBAL_OP_EPO); - WARN(1, "Unknown rfkill_master_switch_mode (%d), " - "driver bug or memory corruption detected!\n", - rfkill_master_switch_mode); - break; - } - } else - rfkill_schedule_global_op(RFKILL_GLOBAL_OP_EPO); -} - -static void rfkill_event(struct input_handle *handle, unsigned int type, - unsigned int code, int data) -{ - if (type == EV_KEY && data == 1) { - enum rfkill_type t; - - switch (code) { - case KEY_WLAN: - t = RFKILL_TYPE_WLAN; - break; - case KEY_BLUETOOTH: - t = RFKILL_TYPE_BLUETOOTH; - break; - case KEY_UWB: - t = RFKILL_TYPE_UWB; - break; - case KEY_WIMAX: - t = RFKILL_TYPE_WIMAX; - break; - default: - return; - } - rfkill_schedule_toggle(t); - return; - } else if (type == EV_SW) { - switch (code) { - case SW_RFKILL_ALL: - rfkill_schedule_evsw_rfkillall(data); - return; - default: - return; - } - } -} - -static int rfkill_connect(struct input_handler *handler, struct input_dev *dev, - const struct input_device_id *id) -{ - struct input_handle *handle; - int error; - - handle = kzalloc(sizeof(struct input_handle), GFP_KERNEL); - if (!handle) - return -ENOMEM; - - handle->dev = dev; - handle->handler = handler; - handle->name = "rfkill"; - - /* causes rfkill_start() to be called */ - error = input_register_handle(handle); - if (error) - goto err_free_handle; - - error = input_open_device(handle); - if (error) - goto err_unregister_handle; - - return 0; - - err_unregister_handle: - input_unregister_handle(handle); - err_free_handle: - kfree(handle); - return error; -} - -static void rfkill_start(struct input_handle *handle) -{ - /* Take event_lock to guard against configuration changes, we - * should be able to deal with concurrency with rfkill_event() - * just fine (which event_lock will also avoid). */ - spin_lock_irq(&handle->dev->event_lock); - - if (test_bit(EV_SW, handle->dev->evbit)) { - if (test_bit(SW_RFKILL_ALL, handle->dev->swbit)) - rfkill_schedule_evsw_rfkillall(test_bit(SW_RFKILL_ALL, - handle->dev->sw)); - /* add resync for further EV_SW events here */ - } - - spin_unlock_irq(&handle->dev->event_lock); -} - -static void rfkill_disconnect(struct input_handle *handle) -{ - input_close_device(handle); - input_unregister_handle(handle); - kfree(handle); -} - -static const struct input_device_id rfkill_ids[] = { - { - .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT, - .evbit = { BIT_MASK(EV_KEY) }, - .keybit = { [BIT_WORD(KEY_WLAN)] = BIT_MASK(KEY_WLAN) }, - }, - { - .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT, - .evbit = { BIT_MASK(EV_KEY) }, - .keybit = { [BIT_WORD(KEY_BLUETOOTH)] = BIT_MASK(KEY_BLUETOOTH) }, - }, - { - .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT, - .evbit = { BIT_MASK(EV_KEY) }, - .keybit = { [BIT_WORD(KEY_UWB)] = BIT_MASK(KEY_UWB) }, - }, - { - .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT, - .evbit = { BIT_MASK(EV_KEY) }, - .keybit = { [BIT_WORD(KEY_WIMAX)] = BIT_MASK(KEY_WIMAX) }, - }, - { - .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_SWBIT, - .evbit = { BIT(EV_SW) }, - .swbit = { [BIT_WORD(SW_RFKILL_ALL)] = BIT_MASK(SW_RFKILL_ALL) }, - }, - { } -}; - -static struct input_handler rfkill_handler = { - .event = rfkill_event, - .connect = rfkill_connect, - .disconnect = rfkill_disconnect, - .start = rfkill_start, - .name = "rfkill", - .id_table = rfkill_ids, -}; - -static int __init rfkill_handler_init(void) -{ - if (rfkill_master_switch_mode >= RFKILL_INPUT_MASTER_MAX) - return -EINVAL; - - /* - * The penalty to not doing this is a possible RFKILL_OPS_DELAY delay - * at the first use. Acceptable, but if we can avoid it, why not? - */ - rfkill_task.last_scheduled = - jiffies - msecs_to_jiffies(RFKILL_OPS_DELAY) - 1; - return input_register_handler(&rfkill_handler); -} - -static void __exit rfkill_handler_exit(void) -{ - input_unregister_handler(&rfkill_handler); - cancel_delayed_work_sync(&rfkill_task.dwork); - rfkill_remove_epo_lock(); -} - -module_init(rfkill_handler_init); -module_exit(rfkill_handler_exit); diff --git a/net/rfkill/rfkill-input.h b/net/rfkill/rfkill-input.h deleted file mode 100644 index fe8df6b5b93..00000000000 --- a/net/rfkill/rfkill-input.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright (C) 2007 Ivo van Doorn - */ - -/* - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation. - */ - -#ifndef __RFKILL_INPUT_H -#define __RFKILL_INPUT_H - -void rfkill_switch_all(enum rfkill_type type, enum rfkill_state state); -void rfkill_epo(void); -void rfkill_restore_states(void); -void rfkill_remove_epo_lock(void); -bool rfkill_is_epo_lock_active(void); -enum rfkill_state rfkill_get_global_state(const enum rfkill_type type); - -#endif /* __RFKILL_INPUT_H */ diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c deleted file mode 100644 index 4f5a83183c9..00000000000 --- a/net/rfkill/rfkill.c +++ /dev/null @@ -1,855 +0,0 @@ -/* - * Copyright (C) 2006 - 2007 Ivo van Doorn - * Copyright (C) 2007 Dmitry Torokhov - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -/* Get declaration of rfkill_switch_all() to shut up sparse. */ -#include "rfkill-input.h" - - -MODULE_AUTHOR("Ivo van Doorn "); -MODULE_VERSION("1.0"); -MODULE_DESCRIPTION("RF switch support"); -MODULE_LICENSE("GPL"); - -static LIST_HEAD(rfkill_list); /* list of registered rf switches */ -static DEFINE_MUTEX(rfkill_global_mutex); - -static unsigned int rfkill_default_state = RFKILL_STATE_UNBLOCKED; -module_param_named(default_state, rfkill_default_state, uint, 0444); -MODULE_PARM_DESC(default_state, - "Default initial state for all radio types, 0 = radio off"); - -struct rfkill_gsw_state { - enum rfkill_state current_state; - enum rfkill_state default_state; -}; - -static struct rfkill_gsw_state rfkill_global_states[RFKILL_TYPE_MAX]; -static unsigned long rfkill_states_lockdflt[BITS_TO_LONGS(RFKILL_TYPE_MAX)]; -static bool rfkill_epo_lock_active; - - -#ifdef CONFIG_RFKILL_LEDS -static void rfkill_led_trigger(struct rfkill *rfkill, - enum rfkill_state state) -{ - struct led_trigger *led = &rfkill->led_trigger; - - if (!led->name) - return; - if (state != RFKILL_STATE_UNBLOCKED) - led_trigger_event(led, LED_OFF); - else - led_trigger_event(led, LED_FULL); -} - -static void rfkill_led_trigger_activate(struct led_classdev *led) -{ - struct rfkill *rfkill = container_of(led->trigger, - struct rfkill, led_trigger); - - rfkill_led_trigger(rfkill, rfkill->state); -} -#else -static inline void rfkill_led_trigger(struct rfkill *rfkill, - enum rfkill_state state) -{ -} -#endif /* CONFIG_RFKILL_LEDS */ - -static void rfkill_uevent(struct rfkill *rfkill) -{ - kobject_uevent(&rfkill->dev.kobj, KOBJ_CHANGE); -} - -static void update_rfkill_state(struct rfkill *rfkill) -{ - enum rfkill_state newstate, oldstate; - - if (rfkill->get_state) { - mutex_lock(&rfkill->mutex); - if (!rfkill->get_state(rfkill->data, &newstate)) { - oldstate = rfkill->state; - rfkill->state = newstate; - if (oldstate != newstate) - rfkill_uevent(rfkill); - } - mutex_unlock(&rfkill->mutex); - } - rfkill_led_trigger(rfkill, rfkill->state); -} - -/** - * rfkill_toggle_radio - wrapper for toggle_radio hook - * @rfkill: the rfkill struct to use - * @force: calls toggle_radio even if cache says it is not needed, - * and also makes sure notifications of the state will be - * sent even if it didn't change - * @state: the new state to call toggle_radio() with - * - * Calls rfkill->toggle_radio, enforcing the API for toggle_radio - * calls and handling all the red tape such as issuing notifications - * if the call is successful. - * - * Suspended devices are not touched at all, and -EAGAIN is returned. - * - * Note that the @force parameter cannot override a (possibly cached) - * state of RFKILL_STATE_HARD_BLOCKED. Any device making use of - * RFKILL_STATE_HARD_BLOCKED implements either get_state() or - * rfkill_force_state(), so the cache either is bypassed or valid. - * - * Note that we do call toggle_radio for RFKILL_STATE_SOFT_BLOCKED - * even if the radio is in RFKILL_STATE_HARD_BLOCKED state, so as to - * give the driver a hint that it should double-BLOCK the transmitter. - * - * Caller must have acquired rfkill->mutex. - */ -static int rfkill_toggle_radio(struct rfkill *rfkill, - enum rfkill_state state, - int force) -{ - int retval = 0; - enum rfkill_state oldstate, newstate; - - if (unlikely(rfkill->dev.power.power_state.event & PM_EVENT_SLEEP)) - return -EBUSY; - - oldstate = rfkill->state; - - if (rfkill->get_state && !force && - !rfkill->get_state(rfkill->data, &newstate)) { - rfkill->state = newstate; - } - - switch (state) { - case RFKILL_STATE_HARD_BLOCKED: - /* typically happens when refreshing hardware state, - * such as on resume */ - state = RFKILL_STATE_SOFT_BLOCKED; - break; - case RFKILL_STATE_UNBLOCKED: - /* force can't override this, only rfkill_force_state() can */ - if (rfkill->state == RFKILL_STATE_HARD_BLOCKED) - return -EPERM; - break; - case RFKILL_STATE_SOFT_BLOCKED: - /* nothing to do, we want to give drivers the hint to double - * BLOCK even a transmitter that is already in state - * RFKILL_STATE_HARD_BLOCKED */ - break; - default: - WARN(1, KERN_WARNING - "rfkill: illegal state %d passed as parameter " - "to rfkill_toggle_radio\n", state); - return -EINVAL; - } - - if (force || state != rfkill->state) { - retval = rfkill->toggle_radio(rfkill->data, state); - /* never allow a HARD->SOFT downgrade! */ - if (!retval && rfkill->state != RFKILL_STATE_HARD_BLOCKED) - rfkill->state = state; - } - - if (force || rfkill->state != oldstate) - rfkill_uevent(rfkill); - - rfkill_led_trigger(rfkill, rfkill->state); - return retval; -} - -/** - * __rfkill_switch_all - Toggle state of all switches of given type - * @type: type of interfaces to be affected - * @state: the new state - * - * This function toggles the state of all switches of given type, - * unless a specific switch is claimed by userspace (in which case, - * that switch is left alone) or suspended. - * - * Caller must have acquired rfkill_global_mutex. - */ -static void __rfkill_switch_all(const enum rfkill_type type, - const enum rfkill_state state) -{ - struct rfkill *rfkill; - - if (WARN((state >= RFKILL_STATE_MAX || type >= RFKILL_TYPE_MAX), - KERN_WARNING - "rfkill: illegal state %d or type %d " - "passed as parameter to __rfkill_switch_all\n", - state, type)) - return; - - rfkill_global_states[type].current_state = state; - list_for_each_entry(rfkill, &rfkill_list, node) { - if (rfkill->type == type) { - mutex_lock(&rfkill->mutex); - rfkill_toggle_radio(rfkill, state, 0); - mutex_unlock(&rfkill->mutex); - rfkill_led_trigger(rfkill, rfkill->state); - } - } -} - -/** - * rfkill_switch_all - Toggle state of all switches of given type - * @type: type of interfaces to be affected - * @state: the new state - * - * Acquires rfkill_global_mutex and calls __rfkill_switch_all(@type, @state). - * Please refer to __rfkill_switch_all() for details. - * - * Does nothing if the EPO lock is active. - */ -void rfkill_switch_all(enum rfkill_type type, enum rfkill_state state) -{ - mutex_lock(&rfkill_global_mutex); - if (!rfkill_epo_lock_active) - __rfkill_switch_all(type, state); - mutex_unlock(&rfkill_global_mutex); -} -EXPORT_SYMBOL(rfkill_switch_all); - -/** - * rfkill_epo - emergency power off all transmitters - * - * This kicks all non-suspended rfkill devices to RFKILL_STATE_SOFT_BLOCKED, - * ignoring everything in its path but rfkill_global_mutex and rfkill->mutex. - * - * The global state before the EPO is saved and can be restored later - * using rfkill_restore_states(). - */ -void rfkill_epo(void) -{ - struct rfkill *rfkill; - int i; - - mutex_lock(&rfkill_global_mutex); - - rfkill_epo_lock_active = true; - list_for_each_entry(rfkill, &rfkill_list, node) { - mutex_lock(&rfkill->mutex); - rfkill_toggle_radio(rfkill, RFKILL_STATE_SOFT_BLOCKED, 1); - mutex_unlock(&rfkill->mutex); - } - for (i = 0; i < RFKILL_TYPE_MAX; i++) { - rfkill_global_states[i].default_state = - rfkill_global_states[i].current_state; - rfkill_global_states[i].current_state = - RFKILL_STATE_SOFT_BLOCKED; - } - mutex_unlock(&rfkill_global_mutex); - rfkill_led_trigger(rfkill, rfkill->state); -} -EXPORT_SYMBOL_GPL(rfkill_epo); - -/** - * rfkill_restore_states - restore global states - * - * Restore (and sync switches to) the global state from the - * states in rfkill_default_states. This can undo the effects of - * a call to rfkill_epo(). - */ -void rfkill_restore_states(void) -{ - int i; - - mutex_lock(&rfkill_global_mutex); - - rfkill_epo_lock_active = false; - for (i = 0; i < RFKILL_TYPE_MAX; i++) - __rfkill_switch_all(i, rfkill_global_states[i].default_state); - mutex_unlock(&rfkill_global_mutex); -} -EXPORT_SYMBOL_GPL(rfkill_restore_states); - -/** - * rfkill_remove_epo_lock - unlock state changes - * - * Used by rfkill-input manually unlock state changes, when - * the EPO switch is deactivated. - */ -void rfkill_remove_epo_lock(void) -{ - mutex_lock(&rfkill_global_mutex); - rfkill_epo_lock_active = false; - mutex_unlock(&rfkill_global_mutex); -} -EXPORT_SYMBOL_GPL(rfkill_remove_epo_lock); - -/** - * rfkill_is_epo_lock_active - returns true EPO is active - * - * Returns 0 (false) if there is NOT an active EPO contidion, - * and 1 (true) if there is an active EPO contition, which - * locks all radios in one of the BLOCKED states. - * - * Can be called in atomic context. - */ -bool rfkill_is_epo_lock_active(void) -{ - return rfkill_epo_lock_active; -} -EXPORT_SYMBOL_GPL(rfkill_is_epo_lock_active); - -/** - * rfkill_get_global_state - returns global state for a type - * @type: the type to get the global state of - * - * Returns the current global state for a given wireless - * device type. - */ -enum rfkill_state rfkill_get_global_state(const enum rfkill_type type) -{ - return rfkill_global_states[type].current_state; -} -EXPORT_SYMBOL_GPL(rfkill_get_global_state); - -/** - * rfkill_force_state - Force the internal rfkill radio state - * @rfkill: pointer to the rfkill class to modify. - * @state: the current radio state the class should be forced to. - * - * This function updates the internal state of the radio cached - * by the rfkill class. It should be used when the driver gets - * a notification by the firmware/hardware of the current *real* - * state of the radio rfkill switch. - * - * Devices which are subject to external changes on their rfkill - * state (such as those caused by a hardware rfkill line) MUST - * have their driver arrange to call rfkill_force_state() as soon - * as possible after such a change. - * - * This function may not be called from an atomic context. - */ -int rfkill_force_state(struct rfkill *rfkill, enum rfkill_state state) -{ - enum rfkill_state oldstate; - - BUG_ON(!rfkill); - if (WARN((state >= RFKILL_STATE_MAX), - KERN_WARNING - "rfkill: illegal state %d passed as parameter " - "to rfkill_force_state\n", state)) - return -EINVAL; - - mutex_lock(&rfkill->mutex); - - oldstate = rfkill->state; - rfkill->state = state; - - if (state != oldstate) - rfkill_uevent(rfkill); - - mutex_unlock(&rfkill->mutex); - rfkill_led_trigger(rfkill, rfkill->state); - - return 0; -} -EXPORT_SYMBOL(rfkill_force_state); - -static ssize_t rfkill_name_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct rfkill *rfkill = to_rfkill(dev); - - return sprintf(buf, "%s\n", rfkill->name); -} - -static const char *rfkill_get_type_str(enum rfkill_type type) -{ - switch (type) { - case RFKILL_TYPE_WLAN: - return "wlan"; - case RFKILL_TYPE_BLUETOOTH: - return "bluetooth"; - case RFKILL_TYPE_UWB: - return "ultrawideband"; - case RFKILL_TYPE_WIMAX: - return "wimax"; - case RFKILL_TYPE_WWAN: - return "wwan"; - default: - BUG(); - } -} - -static ssize_t rfkill_type_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct rfkill *rfkill = to_rfkill(dev); - - return sprintf(buf, "%s\n", rfkill_get_type_str(rfkill->type)); -} - -static ssize_t rfkill_state_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct rfkill *rfkill = to_rfkill(dev); - - update_rfkill_state(rfkill); - return sprintf(buf, "%d\n", rfkill->state); -} - -static ssize_t rfkill_state_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct rfkill *rfkill = to_rfkill(dev); - unsigned long state; - int error; - - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - error = strict_strtoul(buf, 0, &state); - if (error) - return error; - - /* RFKILL_STATE_HARD_BLOCKED is illegal here... */ - if (state != RFKILL_STATE_UNBLOCKED && - state != RFKILL_STATE_SOFT_BLOCKED) - return -EINVAL; - - error = mutex_lock_killable(&rfkill->mutex); - if (error) - return error; - - if (!rfkill_epo_lock_active) - error = rfkill_toggle_radio(rfkill, state, 0); - else - error = -EPERM; - - mutex_unlock(&rfkill->mutex); - - return error ? error : count; -} - -static ssize_t rfkill_claim_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - return sprintf(buf, "%d\n", 0); -} - -static ssize_t rfkill_claim_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - return -EOPNOTSUPP; -} - -static struct device_attribute rfkill_dev_attrs[] = { - __ATTR(name, S_IRUGO, rfkill_name_show, NULL), - __ATTR(type, S_IRUGO, rfkill_type_show, NULL), - __ATTR(state, S_IRUGO|S_IWUSR, rfkill_state_show, rfkill_state_store), - __ATTR(claim, S_IRUGO|S_IWUSR, rfkill_claim_show, rfkill_claim_store), - __ATTR_NULL -}; - -static void rfkill_release(struct device *dev) -{ - struct rfkill *rfkill = to_rfkill(dev); - - kfree(rfkill); - module_put(THIS_MODULE); -} - -#ifdef CONFIG_PM -static int rfkill_suspend(struct device *dev, pm_message_t state) -{ - struct rfkill *rfkill = to_rfkill(dev); - - /* mark class device as suspended */ - if (dev->power.power_state.event != state.event) - dev->power.power_state = state; - - /* store state for the resume handler */ - rfkill->state_for_resume = rfkill->state; - - return 0; -} - -static int rfkill_resume(struct device *dev) -{ - struct rfkill *rfkill = to_rfkill(dev); - enum rfkill_state newstate; - - if (dev->power.power_state.event != PM_EVENT_ON) { - mutex_lock(&rfkill->mutex); - - dev->power.power_state.event = PM_EVENT_ON; - - /* - * rfkill->state could have been modified before we got - * called, and won't be updated by rfkill_toggle_radio() - * in force mode. Sync it FIRST. - */ - if (rfkill->get_state && - !rfkill->get_state(rfkill->data, &newstate)) - rfkill->state = newstate; - - /* - * If we are under EPO, kick transmitter offline, - * otherwise restore to pre-suspend state. - * - * Issue a notification in any case - */ - rfkill_toggle_radio(rfkill, - rfkill_epo_lock_active ? - RFKILL_STATE_SOFT_BLOCKED : - rfkill->state_for_resume, - 1); - - mutex_unlock(&rfkill->mutex); - rfkill_led_trigger(rfkill, rfkill->state); - } - - return 0; -} -#else -#define rfkill_suspend NULL -#define rfkill_resume NULL -#endif - -static int rfkill_dev_uevent(struct device *dev, struct kobj_uevent_env *env) -{ - struct rfkill *rfkill = to_rfkill(dev); - int error; - - error = add_uevent_var(env, "RFKILL_NAME=%s", rfkill->name); - if (error) - return error; - error = add_uevent_var(env, "RFKILL_TYPE=%s", - rfkill_get_type_str(rfkill->type)); - if (error) - return error; - error = add_uevent_var(env, "RFKILL_STATE=%d", rfkill->state); - return error; -} - -static struct class rfkill_class = { - .name = "rfkill", - .dev_release = rfkill_release, - .dev_attrs = rfkill_dev_attrs, - .suspend = rfkill_suspend, - .resume = rfkill_resume, - .dev_uevent = rfkill_dev_uevent, -}; - -static int rfkill_check_duplicity(const struct rfkill *rfkill) -{ - struct rfkill *p; - unsigned long seen[BITS_TO_LONGS(RFKILL_TYPE_MAX)]; - - memset(seen, 0, sizeof(seen)); - - list_for_each_entry(p, &rfkill_list, node) { - if (WARN((p == rfkill), KERN_WARNING - "rfkill: illegal attempt to register " - "an already registered rfkill struct\n")) - return -EEXIST; - set_bit(p->type, seen); - } - - /* 0: first switch of its kind */ - return (test_bit(rfkill->type, seen)) ? 1 : 0; -} - -static int rfkill_add_switch(struct rfkill *rfkill) -{ - int error; - - mutex_lock(&rfkill_global_mutex); - - error = rfkill_check_duplicity(rfkill); - if (error < 0) - goto unlock_out; - - if (!error) { - /* lock default after first use */ - set_bit(rfkill->type, rfkill_states_lockdflt); - rfkill_global_states[rfkill->type].current_state = - rfkill_global_states[rfkill->type].default_state; - } - - rfkill_toggle_radio(rfkill, - rfkill_global_states[rfkill->type].current_state, - 0); - - list_add_tail(&rfkill->node, &rfkill_list); - - error = 0; -unlock_out: - mutex_unlock(&rfkill_global_mutex); - - return error; -} - -static void rfkill_remove_switch(struct rfkill *rfkill) -{ - mutex_lock(&rfkill_global_mutex); - list_del_init(&rfkill->node); - mutex_unlock(&rfkill_global_mutex); - - mutex_lock(&rfkill->mutex); - rfkill_toggle_radio(rfkill, RFKILL_STATE_SOFT_BLOCKED, 1); - mutex_unlock(&rfkill->mutex); -} - -/** - * rfkill_allocate - allocate memory for rfkill structure. - * @parent: device that has rf switch on it - * @type: type of the switch (RFKILL_TYPE_*) - * - * This function should be called by the network driver when it needs - * rfkill structure. Once the structure is allocated the driver should - * finish its initialization by setting the name, private data, enable_radio - * and disable_radio methods and then register it with rfkill_register(). - * - * NOTE: If registration fails the structure shoudl be freed by calling - * rfkill_free() otherwise rfkill_unregister() should be used. - */ -struct rfkill * __must_check rfkill_allocate(struct device *parent, - enum rfkill_type type) -{ - struct rfkill *rfkill; - struct device *dev; - - if (WARN((type >= RFKILL_TYPE_MAX), - KERN_WARNING - "rfkill: illegal type %d passed as parameter " - "to rfkill_allocate\n", type)) - return NULL; - - rfkill = kzalloc(sizeof(struct rfkill), GFP_KERNEL); - if (!rfkill) - return NULL; - - mutex_init(&rfkill->mutex); - INIT_LIST_HEAD(&rfkill->node); - rfkill->type = type; - - dev = &rfkill->dev; - dev->class = &rfkill_class; - dev->parent = parent; - device_initialize(dev); - - __module_get(THIS_MODULE); - - return rfkill; -} -EXPORT_SYMBOL(rfkill_allocate); - -/** - * rfkill_free - Mark rfkill structure for deletion - * @rfkill: rfkill structure to be destroyed - * - * Decrements reference count of the rfkill structure so it is destroyed. - * Note that rfkill_free() should _not_ be called after rfkill_unregister(). - */ -void rfkill_free(struct rfkill *rfkill) -{ - if (rfkill) - put_device(&rfkill->dev); -} -EXPORT_SYMBOL(rfkill_free); - -static void rfkill_led_trigger_register(struct rfkill *rfkill) -{ -#ifdef CONFIG_RFKILL_LEDS - int error; - - if (!rfkill->led_trigger.name) - rfkill->led_trigger.name = dev_name(&rfkill->dev); - if (!rfkill->led_trigger.activate) - rfkill->led_trigger.activate = rfkill_led_trigger_activate; - error = led_trigger_register(&rfkill->led_trigger); - if (error) - rfkill->led_trigger.name = NULL; -#endif /* CONFIG_RFKILL_LEDS */ -} - -static void rfkill_led_trigger_unregister(struct rfkill *rfkill) -{ -#ifdef CONFIG_RFKILL_LEDS - if (rfkill->led_trigger.name) { - led_trigger_unregister(&rfkill->led_trigger); - rfkill->led_trigger.name = NULL; - } -#endif -} - -/** - * rfkill_register - Register a rfkill structure. - * @rfkill: rfkill structure to be registered - * - * This function should be called by the network driver when the rfkill - * structure needs to be registered. Immediately from registration the - * switch driver should be able to service calls to toggle_radio. - */ -int __must_check rfkill_register(struct rfkill *rfkill) -{ - static atomic_t rfkill_no = ATOMIC_INIT(0); - struct device *dev = &rfkill->dev; - int error; - - if (WARN((!rfkill || !rfkill->toggle_radio || - rfkill->type >= RFKILL_TYPE_MAX || - rfkill->state >= RFKILL_STATE_MAX), - KERN_WARNING - "rfkill: attempt to register a " - "badly initialized rfkill struct\n")) - return -EINVAL; - - dev_set_name(dev, "rfkill%ld", (long)atomic_inc_return(&rfkill_no) - 1); - - rfkill_led_trigger_register(rfkill); - - error = rfkill_add_switch(rfkill); - if (error) { - rfkill_led_trigger_unregister(rfkill); - return error; - } - - error = device_add(dev); - if (error) { - rfkill_remove_switch(rfkill); - rfkill_led_trigger_unregister(rfkill); - return error; - } - - return 0; -} -EXPORT_SYMBOL(rfkill_register); - -/** - * rfkill_unregister - Unregister a rfkill structure. - * @rfkill: rfkill structure to be unregistered - * - * This function should be called by the network driver during device - * teardown to destroy rfkill structure. Note that rfkill_free() should - * _not_ be called after rfkill_unregister(). - */ -void rfkill_unregister(struct rfkill *rfkill) -{ - BUG_ON(!rfkill); - device_del(&rfkill->dev); - rfkill_remove_switch(rfkill); - rfkill_led_trigger_unregister(rfkill); - put_device(&rfkill->dev); -} -EXPORT_SYMBOL(rfkill_unregister); - -/** - * rfkill_set_default - set initial value for a switch type - * @type - the type of switch to set the default state of - * @state - the new default state for that group of switches - * - * Sets the initial state rfkill should use for a given type. - * The following initial states are allowed: RFKILL_STATE_SOFT_BLOCKED - * and RFKILL_STATE_UNBLOCKED. - * - * This function is meant to be used by platform drivers for platforms - * that can save switch state across power down/reboot. - * - * The default state for each switch type can be changed exactly once. - * After a switch of that type is registered, the default state cannot - * be changed anymore. This guards against multiple drivers it the - * same platform trying to set the initial switch default state, which - * is not allowed. - * - * Returns -EPERM if the state has already been set once or is in use, - * so drivers likely want to either ignore or at most printk(KERN_NOTICE) - * if this function returns -EPERM. - * - * Returns 0 if the new default state was set, or an error if it - * could not be set. - */ -int rfkill_set_default(enum rfkill_type type, enum rfkill_state state) -{ - int error; - - if (WARN((type >= RFKILL_TYPE_MAX || - (state != RFKILL_STATE_SOFT_BLOCKED && - state != RFKILL_STATE_UNBLOCKED)), - KERN_WARNING - "rfkill: illegal state %d or type %d passed as " - "parameter to rfkill_set_default\n", state, type)) - return -EINVAL; - - mutex_lock(&rfkill_global_mutex); - - if (!test_and_set_bit(type, rfkill_states_lockdflt)) { - rfkill_global_states[type].default_state = state; - rfkill_global_states[type].current_state = state; - error = 0; - } else - error = -EPERM; - - mutex_unlock(&rfkill_global_mutex); - return error; -} -EXPORT_SYMBOL_GPL(rfkill_set_default); - -/* - * Rfkill module initialization/deinitialization. - */ -static int __init rfkill_init(void) -{ - int error; - int i; - - /* RFKILL_STATE_HARD_BLOCKED is illegal here... */ - if (rfkill_default_state != RFKILL_STATE_SOFT_BLOCKED && - rfkill_default_state != RFKILL_STATE_UNBLOCKED) - return -EINVAL; - - for (i = 0; i < RFKILL_TYPE_MAX; i++) - rfkill_global_states[i].default_state = rfkill_default_state; - - error = class_register(&rfkill_class); - if (error) { - printk(KERN_ERR "rfkill: unable to register rfkill class\n"); - return error; - } - - return 0; -} - -static void __exit rfkill_exit(void) -{ - class_unregister(&rfkill_class); -} - -subsys_initcall(rfkill_init); -module_exit(rfkill_exit); diff --git a/net/rfkill/rfkill.h b/net/rfkill/rfkill.h new file mode 100644 index 00000000000..d1117cb6e4d --- /dev/null +++ b/net/rfkill/rfkill.h @@ -0,0 +1,27 @@ +/* + * Copyright (C) 2007 Ivo van Doorn + * Copyright 2009 Johannes Berg + */ + +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#ifndef __RFKILL_INPUT_H +#define __RFKILL_INPUT_H + +/* core code */ +void rfkill_switch_all(const enum rfkill_type type, bool blocked); +void rfkill_epo(void); +void rfkill_restore_states(void); +void rfkill_remove_epo_lock(void); +bool rfkill_is_epo_lock_active(void); +bool rfkill_get_global_sw_state(const enum rfkill_type type); + +/* input handler */ +int rfkill_handler_init(void); +void rfkill_handler_exit(void); + +#endif /* __RFKILL_INPUT_H */ diff --git a/net/wimax/Kconfig b/net/wimax/Kconfig index 1b46747a5f5..0bdbb692820 100644 --- a/net/wimax/Kconfig +++ b/net/wimax/Kconfig @@ -1,23 +1,9 @@ # # WiMAX LAN device configuration # -# Note the ugly 'depends on' on WIMAX: that disallows RFKILL to be a -# module if WIMAX is to be linked in. The WiMAX code is done in such a -# way that it doesn't require and explicit dependency on RFKILL in -# case an embedded system wants to rip it out. -# -# As well, enablement of the RFKILL code means we need the INPUT layer -# support to inject events coming from hw rfkill switches. That -# dependency could be killed if input.h provided appropriate means to -# work when input is disabled. - -comment "WiMAX Wireless Broadband support requires CONFIG_INPUT enabled" - depends on INPUT = n && RFKILL != n menuconfig WIMAX tristate "WiMAX Wireless Broadband support" - depends on (y && RFKILL != m) || m - depends on (INPUT && RFKILL != n) || RFKILL = n help Select to configure support for devices that provide diff --git a/net/wimax/op-rfkill.c b/net/wimax/op-rfkill.c index a3616e2ccb8..bb102e4aa3e 100644 --- a/net/wimax/op-rfkill.c +++ b/net/wimax/op-rfkill.c @@ -29,8 +29,8 @@ * A non-polled generic rfkill device is embedded into the WiMAX * subsystem's representation of a device. * - * FIXME: Need polled support? use a timer or add the implementation - * to the stack. + * FIXME: Need polled support? Let drivers provide a poll routine + * and hand it to rfkill ops then? * * All device drivers have to do is after wimax_dev_init(), call * wimax_report_rfkill_hw() and wimax_report_rfkill_sw() to update @@ -43,7 +43,7 @@ * wimax_rfkill() Kernel calling wimax_rfkill() * __wimax_rf_toggle_radio() * - * wimax_rfkill_toggle_radio() RF-Kill subsytem calling + * wimax_rfkill_set_radio_block() RF-Kill subsytem calling * __wimax_rf_toggle_radio() * * __wimax_rf_toggle_radio() @@ -65,15 +65,11 @@ #include #include #include -#include #include "wimax-internal.h" #define D_SUBMODULE op_rfkill #include "debug-levels.h" -#if defined(CONFIG_RFKILL) || defined(CONFIG_RFKILL_MODULE) - - /** * wimax_report_rfkill_hw - Reports changes in the hardware RF switch * @@ -99,7 +95,6 @@ void wimax_report_rfkill_hw(struct wimax_dev *wimax_dev, int result; struct device *dev = wimax_dev_to_dev(wimax_dev); enum wimax_st wimax_state; - enum rfkill_state rfkill_state; d_fnstart(3, dev, "(wimax_dev %p state %u)\n", wimax_dev, state); BUG_ON(state == WIMAX_RF_QUERY); @@ -112,16 +107,15 @@ void wimax_report_rfkill_hw(struct wimax_dev *wimax_dev, if (state != wimax_dev->rf_hw) { wimax_dev->rf_hw = state; - rfkill_state = state == WIMAX_RF_ON ? - RFKILL_STATE_UNBLOCKED : RFKILL_STATE_SOFT_BLOCKED; if (wimax_dev->rf_hw == WIMAX_RF_ON && wimax_dev->rf_sw == WIMAX_RF_ON) wimax_state = WIMAX_ST_READY; else wimax_state = WIMAX_ST_RADIO_OFF; + + rfkill_set_hw_state(wimax_dev->rfkill, state == WIMAX_RF_OFF); + __wimax_state_change(wimax_dev, wimax_state); - input_report_key(wimax_dev->rfkill_input, KEY_WIMAX, - rfkill_state); } error_not_ready: mutex_unlock(&wimax_dev->mutex); @@ -174,6 +168,7 @@ void wimax_report_rfkill_sw(struct wimax_dev *wimax_dev, else wimax_state = WIMAX_ST_RADIO_OFF; __wimax_state_change(wimax_dev, wimax_state); + rfkill_set_sw_state(wimax_dev->rfkill, state == WIMAX_RF_OFF); } error_not_ready: mutex_unlock(&wimax_dev->mutex); @@ -249,36 +244,31 @@ out_no_change: * * NOTE: This call will block until the operation is completed. */ -static -int wimax_rfkill_toggle_radio(void *data, enum rfkill_state state) +static int wimax_rfkill_set_radio_block(void *data, bool blocked) { int result; struct wimax_dev *wimax_dev = data; struct device *dev = wimax_dev_to_dev(wimax_dev); enum wimax_rf_state rf_state; - d_fnstart(3, dev, "(wimax_dev %p state %u)\n", wimax_dev, state); - switch (state) { - case RFKILL_STATE_SOFT_BLOCKED: + d_fnstart(3, dev, "(wimax_dev %p blocked %u)\n", wimax_dev, blocked); + rf_state = WIMAX_RF_ON; + if (blocked) rf_state = WIMAX_RF_OFF; - break; - case RFKILL_STATE_UNBLOCKED: - rf_state = WIMAX_RF_ON; - break; - default: - BUG(); - } mutex_lock(&wimax_dev->mutex); if (wimax_dev->state <= __WIMAX_ST_QUIESCING) - result = 0; /* just pretend it didn't happen */ + result = 0; else result = __wimax_rf_toggle_radio(wimax_dev, rf_state); mutex_unlock(&wimax_dev->mutex); - d_fnend(3, dev, "(wimax_dev %p state %u) = %d\n", - wimax_dev, state, result); + d_fnend(3, dev, "(wimax_dev %p blocked %u) = %d\n", + wimax_dev, blocked, result); return result; } +static const struct rfkill_ops wimax_rfkill_ops = { + .set_block = wimax_rfkill_set_radio_block, +}; /** * wimax_rfkill - Set the software RF switch state for a WiMAX device @@ -322,6 +312,7 @@ int wimax_rfkill(struct wimax_dev *wimax_dev, enum wimax_rf_state state) result = __wimax_rf_toggle_radio(wimax_dev, state); if (result < 0) goto error; + rfkill_set_sw_state(wimax_dev->rfkill, state == WIMAX_RF_OFF); break; case WIMAX_RF_QUERY: break; @@ -349,40 +340,20 @@ int wimax_rfkill_add(struct wimax_dev *wimax_dev) { int result; struct rfkill *rfkill; - struct input_dev *input_dev; struct device *dev = wimax_dev_to_dev(wimax_dev); d_fnstart(3, dev, "(wimax_dev %p)\n", wimax_dev); /* Initialize RF Kill */ result = -ENOMEM; - rfkill = rfkill_allocate(dev, RFKILL_TYPE_WIMAX); + rfkill = rfkill_alloc(wimax_dev->name, dev, RFKILL_TYPE_WIMAX, + &wimax_rfkill_ops, wimax_dev); if (rfkill == NULL) goto error_rfkill_allocate; + + d_printf(1, dev, "rfkill %p\n", rfkill); + wimax_dev->rfkill = rfkill; - rfkill->name = wimax_dev->name; - rfkill->state = RFKILL_STATE_UNBLOCKED; - rfkill->data = wimax_dev; - rfkill->toggle_radio = wimax_rfkill_toggle_radio; - - /* Initialize the input device for the hw key */ - input_dev = input_allocate_device(); - if (input_dev == NULL) - goto error_input_allocate; - wimax_dev->rfkill_input = input_dev; - d_printf(1, dev, "rfkill %p input %p\n", rfkill, input_dev); - - input_dev->name = wimax_dev->name; - /* FIXME: get a real device bus ID and stuff? do we care? */ - input_dev->id.bustype = BUS_HOST; - input_dev->id.vendor = 0xffff; - input_dev->evbit[0] = BIT(EV_KEY); - set_bit(KEY_WIMAX, input_dev->keybit); - - /* Register both */ - result = input_register_device(wimax_dev->rfkill_input); - if (result < 0) - goto error_input_register; result = rfkill_register(wimax_dev->rfkill); if (result < 0) goto error_rfkill_register; @@ -394,17 +365,8 @@ int wimax_rfkill_add(struct wimax_dev *wimax_dev) d_fnend(3, dev, "(wimax_dev %p) = 0\n", wimax_dev); return 0; - /* if rfkill_register() suceeds, can't use rfkill_free() any - * more, only rfkill_unregister() [it owns the refcount]; with - * the input device we have the same issue--hence the if. */ error_rfkill_register: - input_unregister_device(wimax_dev->rfkill_input); - wimax_dev->rfkill_input = NULL; -error_input_register: - if (wimax_dev->rfkill_input) - input_free_device(wimax_dev->rfkill_input); -error_input_allocate: - rfkill_free(wimax_dev->rfkill); + rfkill_destroy(wimax_dev->rfkill); error_rfkill_allocate: d_fnend(3, dev, "(wimax_dev %p) = %d\n", wimax_dev, result); return result; @@ -423,45 +385,12 @@ void wimax_rfkill_rm(struct wimax_dev *wimax_dev) { struct device *dev = wimax_dev_to_dev(wimax_dev); d_fnstart(3, dev, "(wimax_dev %p)\n", wimax_dev); - rfkill_unregister(wimax_dev->rfkill); /* frees */ - input_unregister_device(wimax_dev->rfkill_input); + rfkill_unregister(wimax_dev->rfkill); + rfkill_destroy(wimax_dev->rfkill); d_fnend(3, dev, "(wimax_dev %p)\n", wimax_dev); } -#else /* #ifdef CONFIG_RFKILL */ - -void wimax_report_rfkill_hw(struct wimax_dev *wimax_dev, - enum wimax_rf_state state) -{ -} -EXPORT_SYMBOL_GPL(wimax_report_rfkill_hw); - -void wimax_report_rfkill_sw(struct wimax_dev *wimax_dev, - enum wimax_rf_state state) -{ -} -EXPORT_SYMBOL_GPL(wimax_report_rfkill_sw); - -int wimax_rfkill(struct wimax_dev *wimax_dev, - enum wimax_rf_state state) -{ - return WIMAX_RF_ON << 1 | WIMAX_RF_ON; -} -EXPORT_SYMBOL_GPL(wimax_rfkill); - -int wimax_rfkill_add(struct wimax_dev *wimax_dev) -{ - return 0; -} - -void wimax_rfkill_rm(struct wimax_dev *wimax_dev) -{ -} - -#endif /* #ifdef CONFIG_RFKILL */ - - /* * Exporting to user space over generic netlink * -- cgit v1.2.3-70-g09d2 From c64fb01627e24725d1f9d535e4426475a4415753 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 2 Jun 2009 13:01:38 +0200 Subject: rfkill: create useful userspace interface The new code added by this patch will make rfkill create a misc character device /dev/rfkill that userspace can use to control rfkill soft blocks and get status of devices as well as events when the status changes. Using it is very simple -- when you open it you can read a number of times to get the initial state, and every further read blocks (you can poll) on getting the next event from the kernel. The same structure you read is also used when writing to it to change the soft block of a given device, all devices of a given type, or all devices. This also makes CONFIG_RFKILL_INPUT selectable again in order to be able to test without it present since its functionality can now be replaced by userspace entirely and distros and users may not want the input part of rfkill interfering with their userspace code. We will also write a userspace daemon to handle all that and consequently add the input code to the feature removal schedule. In order to have rfkilld support both kernels with and without CONFIG_RFKILL_INPUT (or new kernels after its eventual removal) we also add an ioctl (that only exists if rfkill-input is present) to disable rfkill-input. It is not very efficient, but at least gives the correct behaviour in all cases. Signed-off-by: Johannes Berg Acked-by: Marcel Holtmann Signed-off-by: John W. Linville --- Documentation/feature-removal-schedule.txt | 7 + include/linux/rfkill.h | 84 +++++--- net/rfkill/Kconfig | 4 +- net/rfkill/core.c | 330 ++++++++++++++++++++++++++++- 4 files changed, 394 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index de491a3e231..edb2f0b0761 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -437,3 +437,10 @@ Why: Superseded by tdfxfb. I2C/DDC support used to live in a separate driver but this caused driver conflicts. Who: Jean Delvare Krzysztof Helt + +--------------------------- + +What: CONFIG_RFKILL_INPUT +When: 2.6.33 +Why: Should be implemented in userspace, policy daemon. +Who: Johannes Berg diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index 090852c8de7..7c116f6631b 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -22,34 +22,17 @@ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +#include /* define userspace visible states */ #define RFKILL_STATE_SOFT_BLOCKED 0 #define RFKILL_STATE_UNBLOCKED 1 #define RFKILL_STATE_HARD_BLOCKED 2 -/* and that's all userspace gets */ -#ifdef __KERNEL__ -/* don't allow anyone to use these in the kernel */ -enum rfkill_user_states { - RFKILL_USER_STATE_SOFT_BLOCKED = RFKILL_STATE_SOFT_BLOCKED, - RFKILL_USER_STATE_UNBLOCKED = RFKILL_STATE_UNBLOCKED, - RFKILL_USER_STATE_HARD_BLOCKED = RFKILL_STATE_HARD_BLOCKED, -}; -#undef RFKILL_STATE_SOFT_BLOCKED -#undef RFKILL_STATE_UNBLOCKED -#undef RFKILL_STATE_HARD_BLOCKED - -#include -#include -#include -#include -#include -#include - /** * enum rfkill_type - type of rfkill switch. * + * @RFKILL_TYPE_ALL: toggles all switches (userspace only) * @RFKILL_TYPE_WLAN: switch is on a 802.11 wireless network device. * @RFKILL_TYPE_BLUETOOTH: switch is on a bluetooth device. * @RFKILL_TYPE_UWB: switch is on a ultra wideband device. @@ -58,6 +41,7 @@ enum rfkill_user_states { * @NUM_RFKILL_TYPES: number of defined rfkill types */ enum rfkill_type { + RFKILL_TYPE_ALL = 0, RFKILL_TYPE_WLAN, RFKILL_TYPE_BLUETOOTH, RFKILL_TYPE_UWB, @@ -66,6 +50,62 @@ enum rfkill_type { NUM_RFKILL_TYPES, }; +/** + * enum rfkill_operation - operation types + * @RFKILL_OP_ADD: a device was added + * @RFKILL_OP_DEL: a device was removed + * @RFKILL_OP_CHANGE: a device's state changed -- userspace changes one device + * @RFKILL_OP_CHANGE_ALL: userspace changes all devices (of a type, or all) + */ +enum rfkill_operation { + RFKILL_OP_ADD = 0, + RFKILL_OP_DEL, + RFKILL_OP_CHANGE, + RFKILL_OP_CHANGE_ALL, +}; + +/** + * struct rfkill_event - events for userspace on /dev/rfkill + * @idx: index of dev rfkill + * @type: type of the rfkill struct + * @op: operation code + * @hard: hard state (0/1) + * @soft: soft state (0/1) + * + * Structure used for userspace communication on /dev/rfkill, + * used for events from the kernel and control to the kernel. + */ +struct rfkill_event { + __u32 idx; + __u8 type; + __u8 op; + __u8 soft, hard; +} __packed; + +/* ioctl for turning off rfkill-input (if present) */ +#define RFKILL_IOC_MAGIC 'R' +#define RFKILL_IOC_NOINPUT 1 +#define RFKILL_IOCTL_NOINPUT _IO(RFKILL_IOC_MAGIC, RFKILL_IOC_NOINPUT) + +/* and that's all userspace gets */ +#ifdef __KERNEL__ +/* don't allow anyone to use these in the kernel */ +enum rfkill_user_states { + RFKILL_USER_STATE_SOFT_BLOCKED = RFKILL_STATE_SOFT_BLOCKED, + RFKILL_USER_STATE_UNBLOCKED = RFKILL_STATE_UNBLOCKED, + RFKILL_USER_STATE_HARD_BLOCKED = RFKILL_STATE_HARD_BLOCKED, +}; +#undef RFKILL_STATE_SOFT_BLOCKED +#undef RFKILL_STATE_UNBLOCKED +#undef RFKILL_STATE_HARD_BLOCKED + +#include +#include +#include +#include +#include +#include + /* this is opaque */ struct rfkill; @@ -84,11 +124,7 @@ struct rfkill; * the rfkill core query your driver before setting a requested * block. * @set_block: turn the transmitter on (blocked == false) or off - * (blocked == true) -- this is called only while the transmitter - * is not hard-blocked, but note that the core's view of whether - * the transmitter is hard-blocked might differ from your driver's - * view due to race conditions, so it is possible that it is still - * called at the same time as you are calling rfkill_set_hw_state(). + * (blocked == true) -- ignore and return 0 when hard blocked. * This callback must be assigned. */ struct rfkill_ops { diff --git a/net/rfkill/Kconfig b/net/rfkill/Kconfig index b47f72fae05..fd7600d8ab1 100644 --- a/net/rfkill/Kconfig +++ b/net/rfkill/Kconfig @@ -18,7 +18,7 @@ config RFKILL_LEDS default y config RFKILL_INPUT - bool + bool "RF switch input support" depends on RFKILL depends on INPUT = y || RFKILL = INPUT - default y + default y if !EMBEDDED diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 30a6f8d819b..2230aa6b14f 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -28,6 +28,10 @@ #include #include #include +#include +#include +#include +#include #include "rfkill.h" @@ -49,6 +53,8 @@ struct rfkill { unsigned long state; + u32 idx; + bool registered; bool suspended; @@ -69,6 +75,18 @@ struct rfkill { }; #define to_rfkill(d) container_of(d, struct rfkill, dev) +struct rfkill_int_event { + struct list_head list; + struct rfkill_event ev; +}; + +struct rfkill_data { + struct list_head list; + struct list_head events; + struct mutex mtx; + wait_queue_head_t read_wait; + bool input_handler; +}; MODULE_AUTHOR("Ivo van Doorn "); @@ -90,6 +108,7 @@ MODULE_LICENSE("GPL"); */ static LIST_HEAD(rfkill_list); /* list of registered rf switches */ static DEFINE_MUTEX(rfkill_global_mutex); +static LIST_HEAD(rfkill_fds); /* list of open fds of /dev/rfkill */ static unsigned int rfkill_default_state = 1; module_param_named(default_state, rfkill_default_state, uint, 0444); @@ -171,12 +190,48 @@ static inline void rfkill_led_trigger_unregister(struct rfkill *rfkill) } #endif /* CONFIG_RFKILL_LEDS */ -static void rfkill_uevent(struct rfkill *rfkill) +static void rfkill_fill_event(struct rfkill_event *ev, struct rfkill *rfkill, + enum rfkill_operation op) +{ + unsigned long flags; + + ev->idx = rfkill->idx; + ev->type = rfkill->type; + ev->op = op; + + spin_lock_irqsave(&rfkill->lock, flags); + ev->hard = !!(rfkill->state & RFKILL_BLOCK_HW); + ev->soft = !!(rfkill->state & (RFKILL_BLOCK_SW | + RFKILL_BLOCK_SW_PREV)); + spin_unlock_irqrestore(&rfkill->lock, flags); +} + +static void rfkill_send_events(struct rfkill *rfkill, enum rfkill_operation op) +{ + struct rfkill_data *data; + struct rfkill_int_event *ev; + + list_for_each_entry(data, &rfkill_fds, list) { + ev = kzalloc(sizeof(*ev), GFP_KERNEL); + if (!ev) + continue; + rfkill_fill_event(&ev->ev, rfkill, op); + mutex_lock(&data->mtx); + list_add_tail(&ev->list, &data->events); + mutex_unlock(&data->mtx); + wake_up_interruptible(&data->read_wait); + } +} + +static void rfkill_event(struct rfkill *rfkill) { if (!rfkill->registered || rfkill->suspended) return; kobject_uevent(&rfkill->dev.kobj, KOBJ_CHANGE); + + /* also send event to /dev/rfkill */ + rfkill_send_events(rfkill, RFKILL_OP_CHANGE); } static bool __rfkill_set_hw_state(struct rfkill *rfkill, @@ -260,9 +315,12 @@ static void rfkill_set_block(struct rfkill *rfkill, bool blocked) spin_unlock_irqrestore(&rfkill->lock, flags); rfkill_led_trigger_event(rfkill); - rfkill_uevent(rfkill); + rfkill_event(rfkill); } +#ifdef CONFIG_RFKILL_INPUT +static atomic_t rfkill_input_disabled = ATOMIC_INIT(0); + /** * __rfkill_switch_all - Toggle state of all switches of given type * @type: type of interfaces to be affected @@ -299,6 +357,9 @@ static void __rfkill_switch_all(const enum rfkill_type type, bool blocked) */ void rfkill_switch_all(enum rfkill_type type, bool blocked) { + if (atomic_read(&rfkill_input_disabled)) + return; + mutex_lock(&rfkill_global_mutex); if (!rfkill_epo_lock_active) @@ -321,6 +382,9 @@ void rfkill_epo(void) struct rfkill *rfkill; int i; + if (atomic_read(&rfkill_input_disabled)) + return; + mutex_lock(&rfkill_global_mutex); rfkill_epo_lock_active = true; @@ -331,6 +395,7 @@ void rfkill_epo(void) rfkill_global_states[i].def = rfkill_global_states[i].cur; rfkill_global_states[i].cur = true; } + mutex_unlock(&rfkill_global_mutex); } @@ -345,6 +410,9 @@ void rfkill_restore_states(void) { int i; + if (atomic_read(&rfkill_input_disabled)) + return; + mutex_lock(&rfkill_global_mutex); rfkill_epo_lock_active = false; @@ -361,6 +429,9 @@ void rfkill_restore_states(void) */ void rfkill_remove_epo_lock(void) { + if (atomic_read(&rfkill_input_disabled)) + return; + mutex_lock(&rfkill_global_mutex); rfkill_epo_lock_active = false; mutex_unlock(&rfkill_global_mutex); @@ -391,9 +462,12 @@ bool rfkill_get_global_sw_state(const enum rfkill_type type) { return rfkill_global_states[type].cur; } +#endif void rfkill_set_global_sw_state(const enum rfkill_type type, bool blocked) { + BUG_ON(type == RFKILL_TYPE_ALL); + mutex_lock(&rfkill_global_mutex); /* don't allow unblock when epo */ @@ -537,6 +611,15 @@ static ssize_t rfkill_type_show(struct device *dev, return sprintf(buf, "%s\n", rfkill_get_type_str(rfkill->type)); } +static ssize_t rfkill_idx_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct rfkill *rfkill = to_rfkill(dev); + + return sprintf(buf, "%d\n", rfkill->idx); +} + static u8 user_state_from_blocked(unsigned long state) { if (state & RFKILL_BLOCK_HW) @@ -594,6 +677,7 @@ static ssize_t rfkill_claim_store(struct device *dev, static struct device_attribute rfkill_dev_attrs[] = { __ATTR(name, S_IRUGO, rfkill_name_show, NULL), __ATTR(type, S_IRUGO, rfkill_type_show, NULL), + __ATTR(index, S_IRUGO, rfkill_idx_show, NULL), __ATTR(state, S_IRUGO|S_IWUSR, rfkill_state_show, rfkill_state_store), __ATTR(claim, S_IRUGO|S_IWUSR, rfkill_claim_show, rfkill_claim_store), __ATTR_NULL @@ -708,7 +792,7 @@ struct rfkill * __must_check rfkill_alloc(const char *name, if (WARN_ON(!name)) return NULL; - if (WARN_ON(type >= NUM_RFKILL_TYPES)) + if (WARN_ON(type == RFKILL_TYPE_ALL || type >= NUM_RFKILL_TYPES)) return NULL; rfkill = kzalloc(sizeof(*rfkill), GFP_KERNEL); @@ -754,7 +838,9 @@ static void rfkill_uevent_work(struct work_struct *work) rfkill = container_of(work, struct rfkill, uevent_work); - rfkill_uevent(rfkill); + mutex_lock(&rfkill_global_mutex); + rfkill_event(rfkill); + mutex_unlock(&rfkill_global_mutex); } static void rfkill_sync_work(struct work_struct *work) @@ -785,6 +871,7 @@ int __must_check rfkill_register(struct rfkill *rfkill) goto unlock; } + rfkill->idx = rfkill_no; dev_set_name(dev, "rfkill%lu", rfkill_no); rfkill_no++; @@ -819,6 +906,7 @@ int __must_check rfkill_register(struct rfkill *rfkill) INIT_WORK(&rfkill->sync_work, rfkill_sync_work); schedule_work(&rfkill->sync_work); + rfkill_send_events(rfkill, RFKILL_OP_ADD); mutex_unlock(&rfkill_global_mutex); return 0; @@ -848,6 +936,7 @@ void rfkill_unregister(struct rfkill *rfkill) device_del(&rfkill->dev); mutex_lock(&rfkill_global_mutex); + rfkill_send_events(rfkill, RFKILL_OP_DEL); list_del_init(&rfkill->node); mutex_unlock(&rfkill_global_mutex); @@ -862,6 +951,227 @@ void rfkill_destroy(struct rfkill *rfkill) } EXPORT_SYMBOL(rfkill_destroy); +static int rfkill_fop_open(struct inode *inode, struct file *file) +{ + struct rfkill_data *data; + struct rfkill *rfkill; + struct rfkill_int_event *ev, *tmp; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + INIT_LIST_HEAD(&data->events); + mutex_init(&data->mtx); + init_waitqueue_head(&data->read_wait); + + mutex_lock(&rfkill_global_mutex); + mutex_lock(&data->mtx); + /* + * start getting events from elsewhere but hold mtx to get + * startup events added first + */ + list_add(&data->list, &rfkill_fds); + + list_for_each_entry(rfkill, &rfkill_list, node) { + ev = kzalloc(sizeof(*ev), GFP_KERNEL); + if (!ev) + goto free; + rfkill_fill_event(&ev->ev, rfkill, RFKILL_OP_ADD); + list_add_tail(&ev->list, &data->events); + } + mutex_unlock(&data->mtx); + mutex_unlock(&rfkill_global_mutex); + + file->private_data = data; + + return nonseekable_open(inode, file); + + free: + mutex_unlock(&data->mtx); + mutex_unlock(&rfkill_global_mutex); + mutex_destroy(&data->mtx); + list_for_each_entry_safe(ev, tmp, &data->events, list) + kfree(ev); + kfree(data); + return -ENOMEM; +} + +static unsigned int rfkill_fop_poll(struct file *file, poll_table *wait) +{ + struct rfkill_data *data = file->private_data; + unsigned int res = POLLOUT | POLLWRNORM; + + poll_wait(file, &data->read_wait, wait); + + mutex_lock(&data->mtx); + if (!list_empty(&data->events)) + res = POLLIN | POLLRDNORM; + mutex_unlock(&data->mtx); + + return res; +} + +static bool rfkill_readable(struct rfkill_data *data) +{ + bool r; + + mutex_lock(&data->mtx); + r = !list_empty(&data->events); + mutex_unlock(&data->mtx); + + return r; +} + +static ssize_t rfkill_fop_read(struct file *file, char __user *buf, + size_t count, loff_t *pos) +{ + struct rfkill_data *data = file->private_data; + struct rfkill_int_event *ev; + unsigned long sz; + int ret; + + mutex_lock(&data->mtx); + + while (list_empty(&data->events)) { + if (file->f_flags & O_NONBLOCK) { + ret = -EAGAIN; + goto out; + } + mutex_unlock(&data->mtx); + ret = wait_event_interruptible(data->read_wait, + rfkill_readable(data)); + mutex_lock(&data->mtx); + + if (ret) + goto out; + } + + ev = list_first_entry(&data->events, struct rfkill_int_event, + list); + + sz = min_t(unsigned long, sizeof(ev->ev), count); + ret = sz; + if (copy_to_user(buf, &ev->ev, sz)) + ret = -EFAULT; + + list_del(&ev->list); + kfree(ev); + out: + mutex_unlock(&data->mtx); + return ret; +} + +static ssize_t rfkill_fop_write(struct file *file, const char __user *buf, + size_t count, loff_t *pos) +{ + struct rfkill *rfkill; + struct rfkill_event ev; + + /* we don't need the 'hard' variable but accept it */ + if (count < sizeof(ev) - 1) + return -EINVAL; + + if (copy_from_user(&ev, buf, sizeof(ev) - 1)) + return -EFAULT; + + if (ev.op != RFKILL_OP_CHANGE && ev.op != RFKILL_OP_CHANGE_ALL) + return -EINVAL; + + if (ev.type >= NUM_RFKILL_TYPES) + return -EINVAL; + + mutex_lock(&rfkill_global_mutex); + + if (ev.op == RFKILL_OP_CHANGE_ALL) { + if (ev.type == RFKILL_TYPE_ALL) { + enum rfkill_type i; + for (i = 0; i < NUM_RFKILL_TYPES; i++) + rfkill_global_states[i].cur = ev.soft; + } else { + rfkill_global_states[ev.type].cur = ev.soft; + } + } + + list_for_each_entry(rfkill, &rfkill_list, node) { + if (rfkill->idx != ev.idx && ev.op != RFKILL_OP_CHANGE_ALL) + continue; + + if (rfkill->type != ev.type && ev.type != RFKILL_TYPE_ALL) + continue; + + rfkill_set_block(rfkill, ev.soft); + } + mutex_unlock(&rfkill_global_mutex); + + return count; +} + +static int rfkill_fop_release(struct inode *inode, struct file *file) +{ + struct rfkill_data *data = file->private_data; + struct rfkill_int_event *ev, *tmp; + + mutex_lock(&rfkill_global_mutex); + list_del(&data->list); + mutex_unlock(&rfkill_global_mutex); + + mutex_destroy(&data->mtx); + list_for_each_entry_safe(ev, tmp, &data->events, list) + kfree(ev); + +#ifdef CONFIG_RFKILL_INPUT + if (data->input_handler) + atomic_dec(&rfkill_input_disabled); +#endif + + kfree(data); + + return 0; +} + +#ifdef CONFIG_RFKILL_INPUT +static long rfkill_fop_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct rfkill_data *data = file->private_data; + + if (_IOC_TYPE(cmd) != RFKILL_IOC_MAGIC) + return -ENOSYS; + + if (_IOC_NR(cmd) != RFKILL_IOC_NOINPUT) + return -ENOSYS; + + mutex_lock(&data->mtx); + + if (!data->input_handler) { + atomic_inc(&rfkill_input_disabled); + data->input_handler = true; + } + + mutex_unlock(&data->mtx); + + return 0; +} +#endif + +static const struct file_operations rfkill_fops = { + .open = rfkill_fop_open, + .read = rfkill_fop_read, + .write = rfkill_fop_write, + .poll = rfkill_fop_poll, + .release = rfkill_fop_release, +#ifdef CONFIG_RFKILL_INPUT + .unlocked_ioctl = rfkill_fop_ioctl, + .compat_ioctl = rfkill_fop_ioctl, +#endif +}; + +static struct miscdevice rfkill_miscdev = { + .name = "rfkill", + .fops = &rfkill_fops, + .minor = MISC_DYNAMIC_MINOR, +}; static int __init rfkill_init(void) { @@ -875,10 +1185,19 @@ static int __init rfkill_init(void) if (error) goto out; + error = misc_register(&rfkill_miscdev); + if (error) { + class_unregister(&rfkill_class); + goto out; + } + #ifdef CONFIG_RFKILL_INPUT error = rfkill_handler_init(); - if (error) + if (error) { + misc_deregister(&rfkill_miscdev); class_unregister(&rfkill_class); + goto out; + } #endif out: @@ -891,6 +1210,7 @@ static void __exit rfkill_exit(void) #ifdef CONFIG_RFKILL_INPUT rfkill_handler_exit(); #endif + misc_deregister(&rfkill_miscdev); class_unregister(&rfkill_class); } module_exit(rfkill_exit); -- cgit v1.2.3-70-g09d2 From 7643a2c3fcc13cd6fbd731f214463547383418ae Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 2 Jun 2009 13:01:39 +0200 Subject: cfg80211: move txpower wext from mac80211 This patch introduces new cfg80211 API to set the TX power via cfg80211, puts the wext code into cfg80211 and updates mac80211 to use all that. The -ENETDOWN bits are a hack but will go away soon. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 30 +++++++++++++++++ net/mac80211/cfg.c | 54 +++++++++++++++++++++++++++++++ net/mac80211/wext.c | 80 ++-------------------------------------------- net/wireless/wext-compat.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 166 insertions(+), 78 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f20da7d63b1..8b8e4b89362 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -751,6 +751,21 @@ enum wiphy_params_flags { WIPHY_PARAM_RTS_THRESHOLD = 1 << 3, }; +/** + * enum tx_power_setting - TX power adjustment + * + * @TX_POWER_AUTOMATIC: the dbm parameter is ignored + * @TX_POWER_LIMITED: limit TX power by the dbm parameter + * @TX_POWER_FIXED: fix TX power to the dbm parameter + * @TX_POWER_OFF: turn off completely (will go away) + */ +enum tx_power_setting { + TX_POWER_AUTOMATIC, + TX_POWER_LIMITED, + TX_POWER_FIXED, + TX_POWER_OFF, +}; + /** * struct cfg80211_ops - backend description for wireless configuration * @@ -837,6 +852,11 @@ enum wiphy_params_flags { * @changed bitfield (see &enum wiphy_params_flags) describes which values * have changed. The actual parameter values are available in * struct wiphy. If returning an error, no value should be changed. + * + * @set_tx_power: set the transmit power according to the parameters + * @get_tx_power: store the current TX power into the dbm variable; + * return 0 if successful; or -ENETDOWN if successful but power + * is disabled (this will go away) */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy); @@ -928,6 +948,10 @@ struct cfg80211_ops { int (*leave_ibss)(struct wiphy *wiphy, struct net_device *dev); int (*set_wiphy_params)(struct wiphy *wiphy, u32 changed); + + int (*set_tx_power)(struct wiphy *wiphy, + enum tx_power_setting type, int dbm); + int (*get_tx_power)(struct wiphy *wiphy, int *dbm); }; /* @@ -1451,6 +1475,12 @@ int cfg80211_wext_siwencode(struct net_device *dev, int cfg80211_wext_giwencode(struct net_device *dev, struct iw_request_info *info, struct iw_point *erq, char *keybuf); +int cfg80211_wext_siwtxpower(struct net_device *dev, + struct iw_request_info *info, + union iwreq_data *data, char *keybuf); +int cfg80211_wext_giwtxpower(struct net_device *dev, + struct iw_request_info *info, + union iwreq_data *data, char *keybuf); /* * callbacks for asynchronous cfg80211 methods, notification diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index d2fc18c1ae0..81258acf48b 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1334,6 +1334,58 @@ static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed) return 0; } +static int ieee80211_set_tx_power(struct wiphy *wiphy, + enum tx_power_setting type, int dbm) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + struct ieee80211_channel *chan = local->hw.conf.channel; + u32 changes = 0; + bool radio_enabled = true; + + switch (type) { + case TX_POWER_AUTOMATIC: + local->user_power_level = -1; + break; + case TX_POWER_LIMITED: + if (dbm < 0) + return -EINVAL; + local->user_power_level = dbm; + break; + case TX_POWER_FIXED: + if (dbm < 0) + return -EINVAL; + /* TODO: move to cfg80211 when it knows the channel */ + if (dbm > chan->max_power) + return -EINVAL; + local->user_power_level = dbm; + break; + case TX_POWER_OFF: + radio_enabled = false; + break; + } + + if (radio_enabled != local->hw.conf.radio_enabled) { + changes |= IEEE80211_CONF_CHANGE_RADIO_ENABLED; + local->hw.conf.radio_enabled = radio_enabled; + } + + ieee80211_hw_config(local, changes); + + return 0; +} + +static int ieee80211_get_tx_power(struct wiphy *wiphy, int *dbm) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + + *dbm = local->hw.conf.power_level; + + if (!local->hw.conf.radio_enabled) + return -ENETDOWN; + + return 0; +} + struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -1373,4 +1425,6 @@ struct cfg80211_ops mac80211_config_ops = { .join_ibss = ieee80211_join_ibss, .leave_ibss = ieee80211_leave_ibss, .set_wiphy_params = ieee80211_set_wiphy_params, + .set_tx_power = ieee80211_set_tx_power, + .get_tx_power = ieee80211_get_tx_power, }; diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index a01154e127f..d2d81b10334 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -306,82 +306,6 @@ static int ieee80211_ioctl_giwrate(struct net_device *dev, return 0; } -static int ieee80211_ioctl_siwtxpower(struct net_device *dev, - struct iw_request_info *info, - union iwreq_data *data, char *extra) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_channel* chan = local->hw.conf.channel; - bool reconf = false; - u32 reconf_flags = 0; - int new_power_level; - - if ((data->txpower.flags & IW_TXPOW_TYPE) != IW_TXPOW_DBM) - return -EINVAL; - if (data->txpower.flags & IW_TXPOW_RANGE) - return -EINVAL; - if (!chan) - return -EINVAL; - - /* only change when not disabling */ - if (!data->txpower.disabled) { - if (data->txpower.fixed) { - if (data->txpower.value < 0) - return -EINVAL; - new_power_level = data->txpower.value; - /* - * Debatable, but we cannot do a fixed power - * level above the regulatory constraint. - * Use "iwconfig wlan0 txpower 15dBm" instead. - */ - if (new_power_level > chan->max_power) - return -EINVAL; - } else { - /* - * Automatic power level setting, max being the value - * passed in from userland. - */ - if (data->txpower.value < 0) - new_power_level = -1; - else - new_power_level = data->txpower.value; - } - - reconf = true; - - /* - * ieee80211_hw_config() will limit to the channel's - * max power and possibly power constraint from AP. - */ - local->user_power_level = new_power_level; - } - - if (local->hw.conf.radio_enabled != !(data->txpower.disabled)) { - local->hw.conf.radio_enabled = !(data->txpower.disabled); - reconf_flags |= IEEE80211_CONF_CHANGE_RADIO_ENABLED; - ieee80211_led_radio(local, local->hw.conf.radio_enabled); - } - - if (reconf || reconf_flags) - ieee80211_hw_config(local, reconf_flags); - - return 0; -} - -static int ieee80211_ioctl_giwtxpower(struct net_device *dev, - struct iw_request_info *info, - union iwreq_data *data, char *extra) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - - data->txpower.fixed = 1; - data->txpower.disabled = !(local->hw.conf.radio_enabled); - data->txpower.value = local->hw.conf.power_level; - data->txpower.flags = IW_TXPOW_DBM; - - return 0; -} - static int ieee80211_ioctl_siwpower(struct net_device *dev, struct iw_request_info *info, struct iw_param *wrq, @@ -658,8 +582,8 @@ static const iw_handler ieee80211_handler[] = (iw_handler) cfg80211_wext_giwrts, /* SIOCGIWRTS */ (iw_handler) cfg80211_wext_siwfrag, /* SIOCSIWFRAG */ (iw_handler) cfg80211_wext_giwfrag, /* SIOCGIWFRAG */ - (iw_handler) ieee80211_ioctl_siwtxpower, /* SIOCSIWTXPOW */ - (iw_handler) ieee80211_ioctl_giwtxpower, /* SIOCGIWTXPOW */ + (iw_handler) cfg80211_wext_siwtxpower, /* SIOCSIWTXPOW */ + (iw_handler) cfg80211_wext_giwtxpower, /* SIOCGIWTXPOW */ (iw_handler) cfg80211_wext_siwretry, /* SIOCSIWRETRY */ (iw_handler) cfg80211_wext_giwretry, /* SIOCGIWRETRY */ (iw_handler) cfg80211_wext_siwencode, /* SIOCSIWENCODE */ diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 711e00a0c9b..9fbfb8536e7 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -744,3 +744,83 @@ int cfg80211_wext_giwencode(struct net_device *dev, return err; } EXPORT_SYMBOL_GPL(cfg80211_wext_giwencode); + +int cfg80211_wext_siwtxpower(struct net_device *dev, + struct iw_request_info *info, + union iwreq_data *data, char *extra) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + enum tx_power_setting type; + int dbm = 0; + + if ((data->txpower.flags & IW_TXPOW_TYPE) != IW_TXPOW_DBM) + return -EINVAL; + if (data->txpower.flags & IW_TXPOW_RANGE) + return -EINVAL; + + if (!rdev->ops->set_tx_power) + return -EOPNOTSUPP; + + /* only change when not disabling */ + if (!data->txpower.disabled) { + if (data->txpower.fixed) { + /* + * wext doesn't support negative values, see + * below where it's for automatic + */ + if (data->txpower.value < 0) + return -EINVAL; + dbm = data->txpower.value; + type = TX_POWER_FIXED; + /* TODO: do regulatory check! */ + } else { + /* + * Automatic power level setting, max being the value + * passed in from userland. + */ + if (data->txpower.value < 0) { + type = TX_POWER_AUTOMATIC; + } else { + dbm = data->txpower.value; + type = TX_POWER_LIMITED; + } + } + } else { + type = TX_POWER_OFF; + } + + return rdev->ops->set_tx_power(wdev->wiphy, type, dbm);; +} +EXPORT_SYMBOL_GPL(cfg80211_wext_siwtxpower); + +int cfg80211_wext_giwtxpower(struct net_device *dev, + struct iw_request_info *info, + union iwreq_data *data, char *extra) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + int err, val; + + if ((data->txpower.flags & IW_TXPOW_TYPE) != IW_TXPOW_DBM) + return -EINVAL; + if (data->txpower.flags & IW_TXPOW_RANGE) + return -EINVAL; + + if (!rdev->ops->get_tx_power) + return -EOPNOTSUPP; + + err = rdev->ops->get_tx_power(wdev->wiphy, &val); + /* HACK!!! */ + if (err && err != -ENETDOWN) + return err; + + /* well... oh well */ + data->txpower.fixed = 1; + data->txpower.disabled = err == -ENETDOWN; + data->txpower.value = val; + data->txpower.flags = IW_TXPOW_DBM; + + return 0; +} +EXPORT_SYMBOL_GPL(cfg80211_wext_giwtxpower); -- cgit v1.2.3-70-g09d2 From 6081162e2ed78dfcf149b076b047078ab1445cc2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 2 Jun 2009 13:01:40 +0200 Subject: rfkill: add function to query state Sometimes it is necessary to know how the state is, and it is easier to query rfkill than keep track of it somewhere else, so add a function for that. This could later be expanded to return hard/soft block, but so far that isn't necessary. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/rfkill.h | 12 ++++++++++++ net/rfkill/core.c | 13 +++++++++++++ 2 files changed, 25 insertions(+) (limited to 'include') diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index 7c116f6631b..ee3eddea856 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -261,6 +261,13 @@ void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw); * registered drivers? */ void rfkill_set_global_sw_state(const enum rfkill_type type, bool blocked); + +/** + * rfkill_blocked - query rfkill block + * + * @rfkill: rfkill struct to query + */ +bool rfkill_blocked(struct rfkill *rfkill); #else /* !RFKILL */ static inline struct rfkill * __must_check rfkill_alloc(const char *name, @@ -313,6 +320,11 @@ static inline void rfkill_set_global_sw_state(const enum rfkill_type type, bool blocked) { } + +static inline bool rfkill_blocked(struct rfkill *rfkill) +{ + return false; +} #endif /* RFKILL || RFKILL_MODULE */ diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 2230aa6b14f..91e9168b544 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -773,6 +773,19 @@ static struct class rfkill_class = { .resume = rfkill_resume, }; +bool rfkill_blocked(struct rfkill *rfkill) +{ + unsigned long flags; + u32 state; + + spin_lock_irqsave(&rfkill->lock, flags); + state = rfkill->state; + spin_unlock_irqrestore(&rfkill->lock, flags); + + return !!(state & RFKILL_BLOCK_ANY); +} +EXPORT_SYMBOL(rfkill_blocked); + struct rfkill * __must_check rfkill_alloc(const char *name, struct device *parent, -- cgit v1.2.3-70-g09d2 From 1f87f7d3a3b42b20f34cb03f0fd1a41c3d0e27f3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 2 Jun 2009 13:01:41 +0200 Subject: cfg80211: add rfkill support To be easier on drivers and users, have cfg80211 register an rfkill structure that drivers can access. When soft-killed, simply take down all interfaces; when hard-killed the driver needs to notify us and we will take down the interfaces after the fact. While rfkilled, interfaces cannot be set UP. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/asm-generic/errno.h | 2 + include/net/cfg80211.h | 29 ++++++++++++-- include/net/mac80211.h | 20 ++++++++-- net/mac80211/cfg.c | 20 ++++------ net/mac80211/driver-ops.h | 7 ++++ net/mac80211/iface.c | 4 +- net/mac80211/util.c | 2 +- net/wireless/Kconfig | 3 +- net/wireless/core.c | 97 +++++++++++++++++++++++++++++++++++++++++++-- net/wireless/core.h | 7 ++++ net/wireless/wext-compat.c | 11 +++-- 11 files changed, 172 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/asm-generic/errno.h b/include/asm-generic/errno.h index e8852c092fe..28cc03bf19e 100644 --- a/include/asm-generic/errno.h +++ b/include/asm-generic/errno.h @@ -106,4 +106,6 @@ #define EOWNERDEAD 130 /* Owner died */ #define ENOTRECOVERABLE 131 /* State not recoverable */ +#define ERFKILL 132 /* Operation not possible due to RF-kill */ + #endif diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 8b8e4b89362..1a21895b732 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -757,13 +757,11 @@ enum wiphy_params_flags { * @TX_POWER_AUTOMATIC: the dbm parameter is ignored * @TX_POWER_LIMITED: limit TX power by the dbm parameter * @TX_POWER_FIXED: fix TX power to the dbm parameter - * @TX_POWER_OFF: turn off completely (will go away) */ enum tx_power_setting { TX_POWER_AUTOMATIC, TX_POWER_LIMITED, TX_POWER_FIXED, - TX_POWER_OFF, }; /** @@ -855,8 +853,10 @@ enum tx_power_setting { * * @set_tx_power: set the transmit power according to the parameters * @get_tx_power: store the current TX power into the dbm variable; - * return 0 if successful; or -ENETDOWN if successful but power - * is disabled (this will go away) + * return 0 if successful + * + * @rfkill_poll: polls the hw rfkill line, use cfg80211 reporting + * functions to adjust rfkill hw state */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy); @@ -952,6 +952,8 @@ struct cfg80211_ops { int (*set_tx_power)(struct wiphy *wiphy, enum tx_power_setting type, int dbm); int (*get_tx_power)(struct wiphy *wiphy, int *dbm); + + void (*rfkill_poll)(struct wiphy *wiphy); }; /* @@ -1666,4 +1668,23 @@ void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr, */ void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, gfp_t gfp); +/** + * wiphy_rfkill_set_hw_state - notify cfg80211 about hw block state + * @wiphy: the wiphy + * @blocked: block status + */ +void wiphy_rfkill_set_hw_state(struct wiphy *wiphy, bool blocked); + +/** + * wiphy_rfkill_start_polling - start polling rfkill + * @wiphy: the wiphy + */ +void wiphy_rfkill_start_polling(struct wiphy *wiphy); + +/** + * wiphy_rfkill_stop_polling - stop polling rfkill + * @wiphy: the wiphy + */ +void wiphy_rfkill_stop_polling(struct wiphy *wiphy); + #endif /* __NET_CFG80211_H */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 0270aa6e08f..17d61d19d91 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -526,7 +526,7 @@ enum ieee80211_conf_flags { /** * enum ieee80211_conf_changed - denotes which configuration changed * - * @IEEE80211_CONF_CHANGE_RADIO_ENABLED: the value of radio_enabled changed + * @_IEEE80211_CONF_CHANGE_RADIO_ENABLED: DEPRECATED * @IEEE80211_CONF_CHANGE_LISTEN_INTERVAL: the listen interval changed * @IEEE80211_CONF_CHANGE_RADIOTAP: the radiotap flag changed * @IEEE80211_CONF_CHANGE_PS: the PS flag or dynamic PS timeout changed @@ -536,7 +536,7 @@ enum ieee80211_conf_flags { * @IEEE80211_CONF_CHANGE_IDLE: Idle flag changed */ enum ieee80211_conf_changed { - IEEE80211_CONF_CHANGE_RADIO_ENABLED = BIT(0), + _IEEE80211_CONF_CHANGE_RADIO_ENABLED = BIT(0), IEEE80211_CONF_CHANGE_LISTEN_INTERVAL = BIT(2), IEEE80211_CONF_CHANGE_RADIOTAP = BIT(3), IEEE80211_CONF_CHANGE_PS = BIT(4), @@ -546,6 +546,14 @@ enum ieee80211_conf_changed { IEEE80211_CONF_CHANGE_IDLE = BIT(8), }; +static inline __deprecated enum ieee80211_conf_changed +__IEEE80211_CONF_CHANGE_RADIO_ENABLED(void) +{ + return _IEEE80211_CONF_CHANGE_RADIO_ENABLED; +} +#define IEEE80211_CONF_CHANGE_RADIO_ENABLED \ + __IEEE80211_CONF_CHANGE_RADIO_ENABLED() + /** * struct ieee80211_conf - configuration of the device * @@ -585,7 +593,7 @@ struct ieee80211_conf { int max_sleep_period; u16 listen_interval; - bool radio_enabled; + bool __deprecated radio_enabled; u8 long_frame_max_tx_count, short_frame_max_tx_count; @@ -1396,6 +1404,10 @@ enum ieee80211_ampdu_mlme_action { * is the first frame we expect to perform the action on. Notice * that TX/RX_STOP can pass NULL for this parameter. * Returns a negative error code on failure. + * + * @rfkill_poll: Poll rfkill hardware state. If you need this, you also + * need to set wiphy->rfkill_poll to %true before registration, + * and need to call wiphy_rfkill_set_hw_state() in the callback. */ struct ieee80211_ops { int (*tx)(struct ieee80211_hw *hw, struct sk_buff *skb); @@ -1444,6 +1456,8 @@ struct ieee80211_ops { int (*ampdu_action)(struct ieee80211_hw *hw, enum ieee80211_ampdu_mlme_action action, struct ieee80211_sta *sta, u16 tid, u16 *ssn); + + void (*rfkill_poll)(struct ieee80211_hw *hw); }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 81258acf48b..a9211cc183c 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1340,7 +1340,6 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_channel *chan = local->hw.conf.channel; u32 changes = 0; - bool radio_enabled = true; switch (type) { case TX_POWER_AUTOMATIC: @@ -1359,14 +1358,6 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, return -EINVAL; local->user_power_level = dbm; break; - case TX_POWER_OFF: - radio_enabled = false; - break; - } - - if (radio_enabled != local->hw.conf.radio_enabled) { - changes |= IEEE80211_CONF_CHANGE_RADIO_ENABLED; - local->hw.conf.radio_enabled = radio_enabled; } ieee80211_hw_config(local, changes); @@ -1380,12 +1371,16 @@ static int ieee80211_get_tx_power(struct wiphy *wiphy, int *dbm) *dbm = local->hw.conf.power_level; - if (!local->hw.conf.radio_enabled) - return -ENETDOWN; - return 0; } +static void ieee80211_rfkill_poll(struct wiphy *wiphy) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + + drv_rfkill_poll(local); +} + struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -1427,4 +1422,5 @@ struct cfg80211_ops mac80211_config_ops = { .set_wiphy_params = ieee80211_set_wiphy_params, .set_tx_power = ieee80211_set_tx_power, .get_tx_power = ieee80211_get_tx_power, + .rfkill_poll = ieee80211_rfkill_poll, }; diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 3912b5334b9..b13446afd48 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -181,4 +181,11 @@ static inline int drv_ampdu_action(struct ieee80211_local *local, sta, tid, ssn); return -EOPNOTSUPP; } + + +static inline void drv_rfkill_poll(struct ieee80211_local *local) +{ + if (local->ops->rfkill_poll) + local->ops->rfkill_poll(&local->hw); +} #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 8c9f1c722cd..b7c8a448429 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -170,7 +170,7 @@ static int ieee80211_open(struct net_device *dev) goto err_del_bss; /* we're brought up, everything changes */ hw_reconf_flags = ~0; - ieee80211_led_radio(local, local->hw.conf.radio_enabled); + ieee80211_led_radio(local, true); } /* @@ -560,7 +560,7 @@ static int ieee80211_stop(struct net_device *dev) drv_stop(local); - ieee80211_led_radio(local, 0); + ieee80211_led_radio(local, false); flush_workqueue(local->hw.workqueue); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 31284c984e3..22f63815fb3 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -973,7 +973,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) if (local->open_count) { res = drv_start(local); - ieee80211_led_radio(local, hw->conf.radio_enabled); + ieee80211_led_radio(local, true); } /* add interfaces */ diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 45005497c63..4428dd5e911 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -1,5 +1,6 @@ config CFG80211 - tristate "Improved wireless configuration API" + tristate "Improved wireless configuration API" + depends on RFKILL || !RFKILL config CFG80211_REG_DEBUG bool "cfg80211 regulatory debugging" diff --git a/net/wireless/core.c b/net/wireless/core.c index a5dbea1da47..3b74b88e10a 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include "nl80211.h" @@ -227,6 +228,41 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, return 0; } +static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data) +{ + struct cfg80211_registered_device *drv = data; + + drv->ops->rfkill_poll(&drv->wiphy); +} + +static int cfg80211_rfkill_set_block(void *data, bool blocked) +{ + struct cfg80211_registered_device *drv = data; + struct wireless_dev *wdev; + + if (!blocked) + return 0; + + rtnl_lock(); + mutex_lock(&drv->devlist_mtx); + + list_for_each_entry(wdev, &drv->netdev_list, list) + dev_close(wdev->netdev); + + mutex_unlock(&drv->devlist_mtx); + rtnl_unlock(); + + return 0; +} + +static void cfg80211_rfkill_sync_work(struct work_struct *work) +{ + struct cfg80211_registered_device *drv; + + drv = container_of(work, struct cfg80211_registered_device, rfkill_sync); + cfg80211_rfkill_set_block(drv, rfkill_blocked(drv->rfkill)); +} + /* exported functions */ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) @@ -274,6 +310,18 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) drv->wiphy.dev.class = &ieee80211_class; drv->wiphy.dev.platform_data = drv; + drv->rfkill_ops.set_block = cfg80211_rfkill_set_block; + drv->rfkill = rfkill_alloc(dev_name(&drv->wiphy.dev), + &drv->wiphy.dev, RFKILL_TYPE_WLAN, + &drv->rfkill_ops, drv); + + if (!drv->rfkill) { + kfree(drv); + return NULL; + } + + INIT_WORK(&drv->rfkill_sync, cfg80211_rfkill_sync_work); + /* * Initialize wiphy parameters to IEEE 802.11 MIB default values. * Fragmentation and RTS threshold are disabled by default with the @@ -356,6 +404,10 @@ int wiphy_register(struct wiphy *wiphy) if (res) goto out_unlock; + res = rfkill_register(drv->rfkill); + if (res) + goto out_rm_dev; + list_add(&drv->list, &cfg80211_drv_list); /* add to debugfs */ @@ -379,16 +431,41 @@ int wiphy_register(struct wiphy *wiphy) cfg80211_debugfs_drv_add(drv); res = 0; -out_unlock: + goto out_unlock; + + out_rm_dev: + device_del(&drv->wiphy.dev); + out_unlock: mutex_unlock(&cfg80211_mutex); return res; } EXPORT_SYMBOL(wiphy_register); +void wiphy_rfkill_start_polling(struct wiphy *wiphy) +{ + struct cfg80211_registered_device *drv = wiphy_to_dev(wiphy); + + if (!drv->ops->rfkill_poll) + return; + drv->rfkill_ops.poll = cfg80211_rfkill_poll; + rfkill_resume_polling(drv->rfkill); +} +EXPORT_SYMBOL(wiphy_rfkill_start_polling); + +void wiphy_rfkill_stop_polling(struct wiphy *wiphy) +{ + struct cfg80211_registered_device *drv = wiphy_to_dev(wiphy); + + rfkill_pause_polling(drv->rfkill); +} +EXPORT_SYMBOL(wiphy_rfkill_stop_polling); + void wiphy_unregister(struct wiphy *wiphy) { struct cfg80211_registered_device *drv = wiphy_to_dev(wiphy); + rfkill_unregister(drv->rfkill); + /* protect the device list */ mutex_lock(&cfg80211_mutex); @@ -425,6 +502,7 @@ EXPORT_SYMBOL(wiphy_unregister); void cfg80211_dev_free(struct cfg80211_registered_device *drv) { struct cfg80211_internal_bss *scan, *tmp; + rfkill_destroy(drv->rfkill); mutex_destroy(&drv->mtx); mutex_destroy(&drv->devlist_mtx); list_for_each_entry_safe(scan, tmp, &drv->bss_list, list) @@ -438,6 +516,15 @@ void wiphy_free(struct wiphy *wiphy) } EXPORT_SYMBOL(wiphy_free); +void wiphy_rfkill_set_hw_state(struct wiphy *wiphy, bool blocked) +{ + struct cfg80211_registered_device *drv = wiphy_to_dev(wiphy); + + if (rfkill_set_hw_state(drv->rfkill, blocked)) + schedule_work(&drv->rfkill_sync); +} +EXPORT_SYMBOL(wiphy_rfkill_set_hw_state); + static int cfg80211_netdev_notifier_call(struct notifier_block * nb, unsigned long state, void *ndev) @@ -446,7 +533,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, struct cfg80211_registered_device *rdev; if (!dev->ieee80211_ptr) - return 0; + return NOTIFY_DONE; rdev = wiphy_to_dev(dev->ieee80211_ptr->wiphy); @@ -492,9 +579,13 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, } mutex_unlock(&rdev->devlist_mtx); break; + case NETDEV_PRE_UP: + if (rfkill_blocked(rdev->rfkill)) + return notifier_from_errno(-ERFKILL); + break; } - return 0; + return NOTIFY_DONE; } static struct notifier_block cfg80211_netdev_notifier = { diff --git a/net/wireless/core.h b/net/wireless/core.h index ab512bcd815..bfa340c7abb 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include #include #include "reg.h" @@ -24,6 +26,11 @@ struct cfg80211_registered_device { * any call is in progress */ struct mutex mtx; + /* rfkill support */ + struct rfkill_ops rfkill_ops; + struct rfkill *rfkill; + struct work_struct rfkill_sync; + /* ISO / IEC 3166 alpha2 for which this device is receiving * country IEs on, this can help disregard country IEs from APs * on the same alpha2 quickly. The alpha2 may differ from diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 9fbfb8536e7..d030c531567 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -764,6 +764,8 @@ int cfg80211_wext_siwtxpower(struct net_device *dev, /* only change when not disabling */ if (!data->txpower.disabled) { + rfkill_set_sw_state(rdev->rfkill, false); + if (data->txpower.fixed) { /* * wext doesn't support negative values, see @@ -787,7 +789,9 @@ int cfg80211_wext_siwtxpower(struct net_device *dev, } } } else { - type = TX_POWER_OFF; + rfkill_set_sw_state(rdev->rfkill, true); + schedule_work(&rdev->rfkill_sync); + return 0; } return rdev->ops->set_tx_power(wdev->wiphy, type, dbm);; @@ -811,13 +815,12 @@ int cfg80211_wext_giwtxpower(struct net_device *dev, return -EOPNOTSUPP; err = rdev->ops->get_tx_power(wdev->wiphy, &val); - /* HACK!!! */ - if (err && err != -ENETDOWN) + if (err) return err; /* well... oh well */ data->txpower.fixed = 1; - data->txpower.disabled = err == -ENETDOWN; + data->txpower.disabled = rfkill_blocked(rdev->rfkill); data->txpower.value = val; data->txpower.flags = IW_TXPOW_DBM; -- cgit v1.2.3-70-g09d2 From e0a94c2a63f2644826069044649669b5e7ca75d3 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 3 Jun 2009 16:04:31 -0400 Subject: security: use mmap_min_addr indepedently of security models This patch removes the dependency of mmap_min_addr on CONFIG_SECURITY. It also sets a default mmap_min_addr of 4096. mmapping of addresses below 4096 will only be possible for processes with CAP_SYS_RAWIO. Signed-off-by: Christoph Lameter Acked-by: Eric Paris Looks-ok-by: Linus Torvalds Signed-off-by: James Morris --- include/linux/mm.h | 2 -- include/linux/security.h | 2 ++ kernel/sysctl.c | 2 -- mm/Kconfig | 19 +++++++++++++++++++ mm/mmap.c | 3 +++ security/Kconfig | 22 +--------------------- security/security.c | 3 --- 7 files changed, 25 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index bff1f0d475c..0c21af6abff 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -580,12 +580,10 @@ static inline void set_page_links(struct page *page, enum zone_type zone, */ static inline unsigned long round_hint_to_min(unsigned long hint) { -#ifdef CONFIG_SECURITY hint &= PAGE_MASK; if (((void *)hint != NULL) && (hint < mmap_min_addr)) return PAGE_ALIGN(mmap_min_addr); -#endif return hint; } diff --git a/include/linux/security.h b/include/linux/security.h index d5fd6163606..5eff459b383 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -2197,6 +2197,8 @@ static inline int security_file_mmap(struct file *file, unsigned long reqprot, unsigned long addr, unsigned long addr_only) { + if ((addr < mmap_min_addr) && !capable(CAP_SYS_RAWIO)) + return -EACCES; return 0; } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 149581fb48a..45bd711a242 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1237,7 +1237,6 @@ static struct ctl_table vm_table[] = { .strategy = &sysctl_jiffies, }, #endif -#ifdef CONFIG_SECURITY { .ctl_name = CTL_UNNUMBERED, .procname = "mmap_min_addr", @@ -1246,7 +1245,6 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = &proc_doulongvec_minmax, }, -#endif #ifdef CONFIG_NUMA { .ctl_name = CTL_UNNUMBERED, diff --git a/mm/Kconfig b/mm/Kconfig index c2b57d81e15..71830ba7b98 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -226,6 +226,25 @@ config HAVE_MLOCKED_PAGE_BIT config MMU_NOTIFIER bool +config DEFAULT_MMAP_MIN_ADDR + int "Low address space to protect from user allocation" + default 4096 + help + This is the portion of low virtual memory which should be protected + from userspace allocation. Keeping a user from writing to low pages + can help reduce the impact of kernel NULL pointer bugs. + + For most ia64, ppc64 and x86 users with lots of address space + a value of 65536 is reasonable and should cause no problems. + On arm and other archs it should not be higher than 32768. + Programs which use vm86 functionality would either need additional + permissions from either the LSM or the capabilities module or have + this protection disabled. + + This value can be changed after boot using the + /proc/sys/vm/mmap_min_addr tunable. + + config NOMMU_INITIAL_TRIM_EXCESS int "Turn on mmap() excess space trimming before booting" depends on !MMU diff --git a/mm/mmap.c b/mm/mmap.c index 6b7b1a95944..2b43fa1aa3c 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -87,6 +87,9 @@ int sysctl_overcommit_ratio = 50; /* default is 50% */ int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; struct percpu_counter vm_committed_as; +/* amount of vm to protect from userspace access */ +unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR; + /* * Check that a process has enough memory to allocate a new virtual * mapping. 0 means there is enough memory for the allocation to diff --git a/security/Kconfig b/security/Kconfig index bb244774e9d..d23c839038f 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -110,28 +110,8 @@ config SECURITY_ROOTPLUG See for more information about this module. - - If you are unsure how to answer this question, answer N. - -config SECURITY_DEFAULT_MMAP_MIN_ADDR - int "Low address space to protect from user allocation" - depends on SECURITY - default 0 - help - This is the portion of low virtual memory which should be protected - from userspace allocation. Keeping a user from writing to low pages - can help reduce the impact of kernel NULL pointer bugs. - - For most ia64, ppc64 and x86 users with lots of address space - a value of 65536 is reasonable and should cause no problems. - On arm and other archs it should not be higher than 32768. - Programs which use vm86 functionality would either need additional - permissions from either the LSM or the capabilities module or have - this protection disabled. - - This value can be changed after boot using the - /proc/sys/vm/mmap_min_addr tunable. + If you are unsure how to answer this question, answer N. source security/selinux/Kconfig source security/smack/Kconfig diff --git a/security/security.c b/security/security.c index 5284255c5cd..dc7674fbfc7 100644 --- a/security/security.c +++ b/security/security.c @@ -26,9 +26,6 @@ extern void security_fixup_ops(struct security_operations *ops); struct security_operations *security_ops; /* Initialized to NULL */ -/* amount of vm to protect from userspace access */ -unsigned long mmap_min_addr = CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR; - static inline int verify(struct security_operations *ops) { /* verify the security_operations structure exists */ -- cgit v1.2.3-70-g09d2 From 278b2513f76161a9cf1ebddd620dc9d1714fe573 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 3 Jun 2009 21:20:51 -0700 Subject: gso: Stop fraglists from escaping As it stands skb fraglists can get past the check in dev_queue_xmit if the skb is marked as GSO. In particular, if the packet doesn't have the proper checksums for GSO, but can otherwise be handled by the underlying device, we will not perform the fraglist check on it at all. If the underlying device cannot handle fraglists, then this will break. The fix is as simple as moving the fraglist check from the device check into skb_gso_ok. This has caused crashes with Xen when used together with GRO which can generate GSO packets with fraglists. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bbfabf3012b..2a801380b50 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1903,15 +1903,14 @@ static inline int net_gso_ok(int features, int gso_type) static inline int skb_gso_ok(struct sk_buff *skb, int features) { - return net_gso_ok(features, skb_shinfo(skb)->gso_type); + return net_gso_ok(features, skb_shinfo(skb)->gso_type) && + (!skb_shinfo(skb)->frag_list || (features & NETIF_F_FRAGLIST)); } static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb) { return skb_is_gso(skb) && (!skb_gso_ok(skb, dev->features) || - (skb_shinfo(skb)->frag_list && - !(dev->features & NETIF_F_FRAGLIST)) || unlikely(skb->ip_summed != CHECKSUM_PARTIAL)); } -- cgit v1.2.3-70-g09d2 From a5e78820966e17c2316866e00047e4e7e5480f04 Mon Sep 17 00:00:00 2001 From: Evgeniy Polyakov Date: Thu, 4 Jun 2009 16:54:42 +0200 Subject: netfilter: x_tables: added hook number into match extension parameter structure. Signed-off-by: Evgeniy Polyakov Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 6 ++++-- net/bridge/netfilter/ebtables.c | 2 +- net/ipv4/netfilter/ip_tables.c | 2 +- net/ipv6/netfilter/ip6_tables.c | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index c9efe039dc5..1030b759389 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -184,9 +184,10 @@ struct xt_counters_info * @matchinfo: per-match data * @fragoff: packet is a fragment, this is the data offset * @thoff: position of transport header relative to skb->data - * @hotdrop: drop packet if we had inspection problems + * @hook: hook number given packet came from * @family: Actual NFPROTO_* through which the function is invoked * (helpful when match->family == NFPROTO_UNSPEC) + * @hotdrop: drop packet if we had inspection problems */ struct xt_match_param { const struct net_device *in, *out; @@ -194,8 +195,9 @@ struct xt_match_param { const void *matchinfo; int fragoff; unsigned int thoff; - bool *hotdrop; + unsigned int hooknum; u_int8_t family; + bool *hotdrop; }; /** diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 24555834d43..37928d5f284 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -170,7 +170,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb, mtpar.in = tgpar.in = in; mtpar.out = tgpar.out = out; mtpar.hotdrop = &hotdrop; - tgpar.hooknum = hook; + mtpar.hooknum = tgpar.hooknum = hook; read_lock_bh(&table->lock); private = table->private; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 7b35c0b3841..5bf7c3f185d 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -343,7 +343,7 @@ ipt_do_table(struct sk_buff *skb, mtpar.in = tgpar.in = in; mtpar.out = tgpar.out = out; mtpar.family = tgpar.family = NFPROTO_IPV4; - tgpar.hooknum = hook; + mtpar.hooknum = tgpar.hooknum = hook; IP_NF_ASSERT(table->valid_hooks & (1 << hook)); xt_info_rdlock_bh(); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 5164e0bf3bc..ced1f2c0cb6 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -369,7 +369,7 @@ ip6t_do_table(struct sk_buff *skb, mtpar.in = tgpar.in = in; mtpar.out = tgpar.out = out; mtpar.family = tgpar.family = NFPROTO_IPV6; - tgpar.hooknum = hook; + mtpar.hooknum = tgpar.hooknum = hook; IP_NF_ASSERT(table->valid_hooks & (1 << hook)); -- cgit v1.2.3-70-g09d2 From 60313ebed739b331e8e61079da27a11ee3b73a30 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 4 Jun 2009 16:53:44 +0200 Subject: perf_counter: Add fork event Create a fork event so that we can easily clone the comm and dso maps without having to generate all those events. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 10 ++++ kernel/fork.c | 4 +- kernel/perf_counter.c | 131 +++++++++++++++++++++++++++++++++++++------ 3 files changed, 126 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 37d5541d74c..380247bdb91 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -276,6 +276,14 @@ enum perf_event_type { PERF_EVENT_THROTTLE = 5, PERF_EVENT_UNTHROTTLE = 6, + /* + * struct { + * struct perf_event_header header; + * u32 pid, ppid; + * }; + */ + PERF_EVENT_FORK = 7, + /* * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field * will be PERF_RECORD_* @@ -618,6 +626,7 @@ extern void perf_counter_munmap(unsigned long addr, unsigned long len, unsigned long pgoff, struct file *file); extern void perf_counter_comm(struct task_struct *tsk); +extern void perf_counter_fork(struct task_struct *tsk); extern void perf_counter_task_migration(struct task_struct *task, int cpu); @@ -673,6 +682,7 @@ perf_counter_munmap(unsigned long addr, unsigned long len, unsigned long pgoff, struct file *file) { } static inline void perf_counter_comm(struct task_struct *tsk) { } +static inline void perf_counter_fork(struct task_struct *tsk) { } static inline void perf_counter_init(void) { } static inline void perf_counter_task_migration(struct task_struct *task, int cpu) { } diff --git a/kernel/fork.c b/kernel/fork.c index b7d7a9f0bd7..f4466ca37ec 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1412,12 +1412,12 @@ long do_fork(unsigned long clone_flags, if (clone_flags & CLONE_VFORK) { p->vfork_done = &vfork; init_completion(&vfork); - } else { + } else if (!(clone_flags & CLONE_VM)) { /* * vfork will do an exec which will call * set_task_comm() */ - perf_counter_comm(p); + perf_counter_fork(p); } audit_finish_fork(p); diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 0bb03f15a5b..78c58623a0d 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -40,9 +40,9 @@ static int perf_reserved_percpu __read_mostly; static int perf_overcommit __read_mostly = 1; static atomic_t nr_counters __read_mostly; -static atomic_t nr_mmap_tracking __read_mostly; -static atomic_t nr_munmap_tracking __read_mostly; -static atomic_t nr_comm_tracking __read_mostly; +static atomic_t nr_mmap_counters __read_mostly; +static atomic_t nr_munmap_counters __read_mostly; +static atomic_t nr_comm_counters __read_mostly; int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */ int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */ @@ -1447,11 +1447,11 @@ static void free_counter(struct perf_counter *counter) atomic_dec(&nr_counters); if (counter->attr.mmap) - atomic_dec(&nr_mmap_tracking); + atomic_dec(&nr_mmap_counters); if (counter->attr.munmap) - atomic_dec(&nr_munmap_tracking); + atomic_dec(&nr_munmap_counters); if (counter->attr.comm) - atomic_dec(&nr_comm_tracking); + atomic_dec(&nr_comm_counters); if (counter->destroy) counter->destroy(counter); @@ -2475,6 +2475,105 @@ static void perf_counter_output(struct perf_counter *counter, perf_output_end(&handle); } +/* + * fork tracking + */ + +struct perf_fork_event { + struct task_struct *task; + + struct { + struct perf_event_header header; + + u32 pid; + u32 ppid; + } event; +}; + +static void perf_counter_fork_output(struct perf_counter *counter, + struct perf_fork_event *fork_event) +{ + struct perf_output_handle handle; + int size = fork_event->event.header.size; + struct task_struct *task = fork_event->task; + int ret = perf_output_begin(&handle, counter, size, 0, 0); + + if (ret) + return; + + fork_event->event.pid = perf_counter_pid(counter, task); + fork_event->event.ppid = perf_counter_pid(counter, task->real_parent); + + perf_output_put(&handle, fork_event->event); + perf_output_end(&handle); +} + +static int perf_counter_fork_match(struct perf_counter *counter) +{ + if (counter->attr.comm || counter->attr.mmap || counter->attr.munmap) + return 1; + + return 0; +} + +static void perf_counter_fork_ctx(struct perf_counter_context *ctx, + struct perf_fork_event *fork_event) +{ + struct perf_counter *counter; + + if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) + return; + + rcu_read_lock(); + list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { + if (perf_counter_fork_match(counter)) + perf_counter_fork_output(counter, fork_event); + } + rcu_read_unlock(); +} + +static void perf_counter_fork_event(struct perf_fork_event *fork_event) +{ + struct perf_cpu_context *cpuctx; + struct perf_counter_context *ctx; + + cpuctx = &get_cpu_var(perf_cpu_context); + perf_counter_fork_ctx(&cpuctx->ctx, fork_event); + put_cpu_var(perf_cpu_context); + + rcu_read_lock(); + /* + * doesn't really matter which of the child contexts the + * events ends up in. + */ + ctx = rcu_dereference(current->perf_counter_ctxp); + if (ctx) + perf_counter_fork_ctx(ctx, fork_event); + rcu_read_unlock(); +} + +void perf_counter_fork(struct task_struct *task) +{ + struct perf_fork_event fork_event; + + if (!atomic_read(&nr_comm_counters) && + !atomic_read(&nr_mmap_counters) && + !atomic_read(&nr_munmap_counters)) + return; + + fork_event = (struct perf_fork_event){ + .task = task, + .event = { + .header = { + .type = PERF_EVENT_FORK, + .size = sizeof(fork_event.event), + }, + }, + }; + + perf_counter_fork_event(&fork_event); +} + /* * comm tracking */ @@ -2511,11 +2610,9 @@ static void perf_counter_comm_output(struct perf_counter *counter, perf_output_end(&handle); } -static int perf_counter_comm_match(struct perf_counter *counter, - struct perf_comm_event *comm_event) +static int perf_counter_comm_match(struct perf_counter *counter) { - if (counter->attr.comm && - comm_event->event.header.type == PERF_EVENT_COMM) + if (counter->attr.comm) return 1; return 0; @@ -2531,7 +2628,7 @@ static void perf_counter_comm_ctx(struct perf_counter_context *ctx, rcu_read_lock(); list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { - if (perf_counter_comm_match(counter, comm_event)) + if (perf_counter_comm_match(counter)) perf_counter_comm_output(counter, comm_event); } rcu_read_unlock(); @@ -2570,7 +2667,7 @@ void perf_counter_comm(struct task_struct *task) { struct perf_comm_event comm_event; - if (!atomic_read(&nr_comm_tracking)) + if (!atomic_read(&nr_comm_counters)) return; comm_event = (struct perf_comm_event){ @@ -2708,7 +2805,7 @@ void perf_counter_mmap(unsigned long addr, unsigned long len, { struct perf_mmap_event mmap_event; - if (!atomic_read(&nr_mmap_tracking)) + if (!atomic_read(&nr_mmap_counters)) return; mmap_event = (struct perf_mmap_event){ @@ -2729,7 +2826,7 @@ void perf_counter_munmap(unsigned long addr, unsigned long len, { struct perf_mmap_event mmap_event; - if (!atomic_read(&nr_munmap_tracking)) + if (!atomic_read(&nr_munmap_counters)) return; mmap_event = (struct perf_mmap_event){ @@ -3427,11 +3524,11 @@ done: atomic_inc(&nr_counters); if (counter->attr.mmap) - atomic_inc(&nr_mmap_tracking); + atomic_inc(&nr_mmap_counters); if (counter->attr.munmap) - atomic_inc(&nr_munmap_tracking); + atomic_inc(&nr_munmap_counters); if (counter->attr.comm) - atomic_inc(&nr_comm_tracking); + atomic_inc(&nr_comm_counters); return counter; } -- cgit v1.2.3-70-g09d2 From d99e9446200c1ffab28cb0e39b76c34a2bfafd06 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 4 Jun 2009 17:08:58 +0200 Subject: perf_counter: Remove munmap stuff In name of keeping it simple, only track mmap events. Userspace will have to remove old overlapping maps when it encounters them. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 11 +---------- kernel/perf_counter.c | 38 +++----------------------------------- mm/mmap.c | 6 ------ 3 files changed, 4 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 380247bdb91..6ca403acd41 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -148,11 +148,10 @@ struct perf_counter_attr { exclude_hv : 1, /* ditto hypervisor */ exclude_idle : 1, /* don't count when idle */ mmap : 1, /* include mmap data */ - munmap : 1, /* include munmap data */ comm : 1, /* include comm data */ freq : 1, /* use freq, not period */ - __reserved_1 : 52; + __reserved_1 : 53; __u32 wakeup_events; /* wakeup every n events */ __u32 __reserved_2; @@ -246,7 +245,6 @@ enum perf_event_type { * }; */ PERF_EVENT_MMAP = 1, - PERF_EVENT_MUNMAP = 2, /* * struct { @@ -622,9 +620,6 @@ extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64); extern void perf_counter_mmap(unsigned long addr, unsigned long len, unsigned long pgoff, struct file *file); -extern void perf_counter_munmap(unsigned long addr, unsigned long len, - unsigned long pgoff, struct file *file); - extern void perf_counter_comm(struct task_struct *tsk); extern void perf_counter_fork(struct task_struct *tsk); @@ -677,10 +672,6 @@ static inline void perf_counter_mmap(unsigned long addr, unsigned long len, unsigned long pgoff, struct file *file) { } -static inline void -perf_counter_munmap(unsigned long addr, unsigned long len, - unsigned long pgoff, struct file *file) { } - static inline void perf_counter_comm(struct task_struct *tsk) { } static inline void perf_counter_fork(struct task_struct *tsk) { } static inline void perf_counter_init(void) { } diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 78c58623a0d..195712e20d0 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -41,7 +41,6 @@ static int perf_overcommit __read_mostly = 1; static atomic_t nr_counters __read_mostly; static atomic_t nr_mmap_counters __read_mostly; -static atomic_t nr_munmap_counters __read_mostly; static atomic_t nr_comm_counters __read_mostly; int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */ @@ -1448,8 +1447,6 @@ static void free_counter(struct perf_counter *counter) atomic_dec(&nr_counters); if (counter->attr.mmap) atomic_dec(&nr_mmap_counters); - if (counter->attr.munmap) - atomic_dec(&nr_munmap_counters); if (counter->attr.comm) atomic_dec(&nr_comm_counters); @@ -2510,7 +2507,7 @@ static void perf_counter_fork_output(struct perf_counter *counter, static int perf_counter_fork_match(struct perf_counter *counter) { - if (counter->attr.comm || counter->attr.mmap || counter->attr.munmap) + if (counter->attr.comm || counter->attr.mmap) return 1; return 0; @@ -2557,8 +2554,7 @@ void perf_counter_fork(struct task_struct *task) struct perf_fork_event fork_event; if (!atomic_read(&nr_comm_counters) && - !atomic_read(&nr_mmap_counters) && - !atomic_read(&nr_munmap_counters)) + !atomic_read(&nr_mmap_counters)) return; fork_event = (struct perf_fork_event){ @@ -2722,12 +2718,7 @@ static void perf_counter_mmap_output(struct perf_counter *counter, static int perf_counter_mmap_match(struct perf_counter *counter, struct perf_mmap_event *mmap_event) { - if (counter->attr.mmap && - mmap_event->event.header.type == PERF_EVENT_MMAP) - return 1; - - if (counter->attr.munmap && - mmap_event->event.header.type == PERF_EVENT_MUNMAP) + if (counter->attr.mmap) return 1; return 0; @@ -2821,27 +2812,6 @@ void perf_counter_mmap(unsigned long addr, unsigned long len, perf_counter_mmap_event(&mmap_event); } -void perf_counter_munmap(unsigned long addr, unsigned long len, - unsigned long pgoff, struct file *file) -{ - struct perf_mmap_event mmap_event; - - if (!atomic_read(&nr_munmap_counters)) - return; - - mmap_event = (struct perf_mmap_event){ - .file = file, - .event = { - .header = { .type = PERF_EVENT_MUNMAP, }, - .start = addr, - .len = len, - .pgoff = pgoff, - }, - }; - - perf_counter_mmap_event(&mmap_event); -} - /* * Log sample_period changes so that analyzing tools can re-normalize the * event flow. @@ -3525,8 +3495,6 @@ done: atomic_inc(&nr_counters); if (counter->attr.mmap) atomic_inc(&nr_mmap_counters); - if (counter->attr.munmap) - atomic_inc(&nr_munmap_counters); if (counter->attr.comm) atomic_inc(&nr_comm_counters); diff --git a/mm/mmap.c b/mm/mmap.c index 2c1c2cb0e2e..6451ce2854b 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1756,12 +1756,6 @@ static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma) do { long nrpages = vma_pages(vma); - if (vma->vm_flags & VM_EXEC) { - perf_counter_munmap(vma->vm_start, - nrpages << PAGE_SHIFT, - vma->vm_pgoff, vma->vm_file); - } - mm->total_vm -= nrpages; vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages); vma = remove_vma(vma); -- cgit v1.2.3-70-g09d2 From 5926a295bb78272b3f648f62febecd19a1b6a6ca Mon Sep 17 00:00:00 2001 From: Alessandro Rubini Date: Thu, 4 Jun 2009 17:43:04 +0100 Subject: [ARM] 5541/1: serial/amba-pl011.c: add support for the modified port found in Nomadik The Nomadik 8815 SoC has a slightly modified version of the PL011 block. The patch uses the different ID value as a key to select a vendor structure that is used to keep track of the differences, as suggested by Russell King. Signed-off-by: Alessandro Rubini Acked-by: Andrea Gallo Acked-by: Linus Walleij Signed-off-by: Russell King --- drivers/serial/amba-pl011.c | 30 +++++++++++++++++++++++++++--- include/linux/amba/serial.h | 3 +++ 2 files changed, 30 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c index 4cfa1eb2689..8c5bda27736 100644 --- a/drivers/serial/amba-pl011.c +++ b/drivers/serial/amba-pl011.c @@ -70,6 +70,23 @@ struct uart_amba_port { struct clk *clk; unsigned int im; /* interrupt mask */ unsigned int old_status; + unsigned int ifls; /* vendor-specific */ +}; + +/* There is by now at least one vendor with differing details, so handle it */ +struct vendor_data { + unsigned int ifls; + unsigned int fifosize; +}; + +static struct vendor_data vendor_arm = { + .ifls = UART011_IFLS_RX4_8|UART011_IFLS_TX4_8, + .fifosize = 16, +}; + +static struct vendor_data vendor_st = { + .ifls = UART011_IFLS_RX_HALF|UART011_IFLS_TX_HALF, + .fifosize = 64, }; static void pl011_stop_tx(struct uart_port *port) @@ -360,8 +377,7 @@ static int pl011_startup(struct uart_port *port) if (retval) goto clk_dis; - writew(UART011_IFLS_RX4_8|UART011_IFLS_TX4_8, - uap->port.membase + UART011_IFLS); + writew(uap->ifls, uap->port.membase + UART011_IFLS); /* * Provoke TX FIFO interrupt into asserting. @@ -732,6 +748,7 @@ static struct uart_driver amba_reg = { static int pl011_probe(struct amba_device *dev, struct amba_id *id) { struct uart_amba_port *uap; + struct vendor_data *vendor = id->data; void __iomem *base; int i, ret; @@ -762,12 +779,13 @@ static int pl011_probe(struct amba_device *dev, struct amba_id *id) goto unmap; } + uap->ifls = vendor->ifls; uap->port.dev = &dev->dev; uap->port.mapbase = dev->res.start; uap->port.membase = base; uap->port.iotype = UPIO_MEM; uap->port.irq = dev->irq[0]; - uap->port.fifosize = 16; + uap->port.fifosize = vendor->fifosize; uap->port.ops = &amba_pl011_pops; uap->port.flags = UPF_BOOT_AUTOCONF; uap->port.line = i; @@ -812,6 +830,12 @@ static struct amba_id pl011_ids[] __initdata = { { .id = 0x00041011, .mask = 0x000fffff, + .data = &vendor_arm, + }, + { + .id = 0x00380802, + .mask = 0x00ffffff, + .data = &vendor_st, }, { 0, 0 }, }; diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h index 48ee32a18ac..42949e210cb 100644 --- a/include/linux/amba/serial.h +++ b/include/linux/amba/serial.h @@ -114,6 +114,9 @@ #define UART011_IFLS_TX4_8 (2 << 0) #define UART011_IFLS_TX6_8 (3 << 0) #define UART011_IFLS_TX7_8 (4 << 0) +/* special values for ST vendor with deeper fifo */ +#define UART011_IFLS_RX_HALF (5 << 3) +#define UART011_IFLS_TX_HALF (5 << 0) #define UART011_OEIM (1 << 10) /* overrun error interrupt mask */ #define UART011_BEIM (1 << 9) /* break error interrupt mask */ -- cgit v1.2.3-70-g09d2 From c0683039207226afcffbe0fbf6a1caaee77a37b0 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 3 Jun 2009 17:43:14 +0100 Subject: [ARM] 5536/1: Move clk_add_alias() to arch/arm/common/clkdev.c This can be used for other arm platforms too as discussed on the linux-arm-kernel list. Also check the return value with IS_ERR and return PTR_ERR as suggested by Russell King. Signed-off-by: Tony Lindgren Signed-off-by: Russell King --- arch/arm/common/clkdev.c | 18 ++++++++++++++++++ arch/arm/mach-pxa/clock.c | 17 ----------------- include/linux/clk.h | 13 +++++++++++++ 3 files changed, 31 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/arch/arm/common/clkdev.c b/arch/arm/common/clkdev.c index 5589444ff43..f37afd9422f 100644 --- a/arch/arm/common/clkdev.c +++ b/arch/arm/common/clkdev.c @@ -135,6 +135,24 @@ struct clk_lookup *clkdev_alloc(struct clk *clk, const char *con_id, } EXPORT_SYMBOL(clkdev_alloc); +int clk_add_alias(const char *alias, const char *alias_dev_name, char *id, + struct device *dev) +{ + struct clk *r = clk_get(dev, id); + struct clk_lookup *l; + + if (IS_ERR(r)) + return PTR_ERR(r); + + l = clkdev_alloc(r, alias, alias_dev_name); + clk_put(r); + if (!l) + return -ENODEV; + clkdev_add(l); + return 0; +} +EXPORT_SYMBOL(clk_add_alias); + /* * clkdev_drop - remove a clock dynamically allocated */ diff --git a/arch/arm/mach-pxa/clock.c b/arch/arm/mach-pxa/clock.c index db52d2c4791..49ae3829231 100644 --- a/arch/arm/mach-pxa/clock.c +++ b/arch/arm/mach-pxa/clock.c @@ -86,20 +86,3 @@ void clks_register(struct clk_lookup *clks, size_t num) for (i = 0; i < num; i++) clkdev_add(&clks[i]); } - -int clk_add_alias(const char *alias, const char *alias_dev_name, char *id, - struct device *dev) -{ - struct clk *r = clk_get(dev, id); - struct clk_lookup *l; - - if (!r) - return -ENODEV; - - l = clkdev_alloc(r, alias, alias_dev_name); - clk_put(r); - if (!l) - return -ENODEV; - clkdev_add(l); - return 0; -} diff --git a/include/linux/clk.h b/include/linux/clk.h index 1db9bbf444a..1d37f42ac29 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -142,4 +142,17 @@ struct clk *clk_get_parent(struct clk *clk); */ struct clk *clk_get_sys(const char *dev_id, const char *con_id); +/** + * clk_add_alias - add a new clock alias + * @alias: name for clock alias + * @alias_dev_name: device name + * @id: platform specific clock name + * @dev: device + * + * Allows using generic clock names for drivers by adding a new alias. + * Assumes clkdev, see clkdev.h for more info. + */ +int clk_add_alias(const char *alias, const char *alias_dev_name, char *id, + struct device *dev); + #endif -- cgit v1.2.3-70-g09d2 From 10662aa3083f869c645cc2abf5d66849001e2f5d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 5 Jun 2009 13:24:24 +0200 Subject: netfilter: xt_NFQUEUE: queue balancing support Adds support for specifying a range of queues instead of a single queue id. Flows will be distributed across the given range. This is useful for multicore systems: Instead of having a single application read packets from a queue, start multiple instances on queues x, x+1, .. x+n. Each instance can process flows independently. Packets for the same connection are put into the same queue. Signed-off-by: Holger Eitzenberger Signed-off-by: Florian Westphal Signed-off-by: Patrick McHardy --- include/linux/netfilter/xt_NFQUEUE.h | 5 ++ net/netfilter/xt_NFQUEUE.c | 93 ++++++++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+) (limited to 'include') diff --git a/include/linux/netfilter/xt_NFQUEUE.h b/include/linux/netfilter/xt_NFQUEUE.h index 982a89f7827..2584f4a777d 100644 --- a/include/linux/netfilter/xt_NFQUEUE.h +++ b/include/linux/netfilter/xt_NFQUEUE.h @@ -15,4 +15,9 @@ struct xt_NFQ_info { __u16 queuenum; }; +struct xt_NFQ_info_v1 { + __u16 queuenum; + __u16 queues_total; +}; + #endif /* _XT_NFQ_TARGET_H */ diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 6e0f84d4d05..498b45101df 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -11,6 +11,10 @@ #include #include +#include +#include +#include + #include #include #include @@ -23,6 +27,8 @@ MODULE_ALIAS("ipt_NFQUEUE"); MODULE_ALIAS("ip6t_NFQUEUE"); MODULE_ALIAS("arpt_NFQUEUE"); +static u32 jhash_initval __read_mostly; + static unsigned int nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par) { @@ -31,6 +37,72 @@ nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par) return NF_QUEUE_NR(tinfo->queuenum); } +static u32 hash_v4(const struct sk_buff *skb) +{ + const struct iphdr *iph = ip_hdr(skb); + u32 ipaddr; + + /* packets in either direction go into same queue */ + ipaddr = iph->saddr ^ iph->daddr; + + return jhash_2words(ipaddr, iph->protocol, jhash_initval); +} + +static unsigned int +nfqueue_tg4_v1(struct sk_buff *skb, const struct xt_target_param *par) +{ + const struct xt_NFQ_info_v1 *info = par->targinfo; + u32 queue = info->queuenum; + + if (info->queues_total > 1) + queue = hash_v4(skb) % info->queues_total + queue; + return NF_QUEUE_NR(queue); +} + +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +static u32 hash_v6(const struct sk_buff *skb) +{ + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + u32 addr[4]; + + addr[0] = ip6h->saddr.s6_addr32[0] ^ ip6h->daddr.s6_addr32[0]; + addr[1] = ip6h->saddr.s6_addr32[1] ^ ip6h->daddr.s6_addr32[1]; + addr[2] = ip6h->saddr.s6_addr32[2] ^ ip6h->daddr.s6_addr32[2]; + addr[3] = ip6h->saddr.s6_addr32[3] ^ ip6h->daddr.s6_addr32[3]; + + return jhash2(addr, ARRAY_SIZE(addr), jhash_initval); +} + +static unsigned int +nfqueue_tg6_v1(struct sk_buff *skb, const struct xt_target_param *par) +{ + const struct xt_NFQ_info_v1 *info = par->targinfo; + u32 queue = info->queuenum; + + if (info->queues_total > 1) + queue = hash_v6(skb) % info->queues_total + queue; + return NF_QUEUE_NR(queue); +} +#endif + +static bool nfqueue_tg_v1_check(const struct xt_tgchk_param *par) +{ + const struct xt_NFQ_info_v1 *info = par->targinfo; + u32 maxid; + + if (info->queues_total == 0) { + pr_err("NFQUEUE: number of total queues is 0\n"); + return false; + } + maxid = info->queues_total - 1 + info->queuenum; + if (maxid > 0xffff) { + pr_err("NFQUEUE: number of queues (%u) out of range (got %u)\n", + info->queues_total, maxid); + return false; + } + return true; +} + static struct xt_target nfqueue_tg_reg[] __read_mostly = { { .name = "NFQUEUE", @@ -39,10 +111,31 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = { .targetsize = sizeof(struct xt_NFQ_info), .me = THIS_MODULE, }, + { + .name = "NFQUEUE", + .revision = 1, + .family = NFPROTO_IPV4, + .checkentry = nfqueue_tg_v1_check, + .target = nfqueue_tg4_v1, + .targetsize = sizeof(struct xt_NFQ_info_v1), + .me = THIS_MODULE, + }, +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) + { + .name = "NFQUEUE", + .revision = 1, + .family = NFPROTO_IPV6, + .checkentry = nfqueue_tg_v1_check, + .target = nfqueue_tg6_v1, + .targetsize = sizeof(struct xt_NFQ_info_v1), + .me = THIS_MODULE, + }, +#endif }; static int __init nfqueue_tg_init(void) { + get_random_bytes(&jhash_initval, sizeof(jhash_initval)); return xt_register_targets(nfqueue_tg_reg, ARRAY_SIZE(nfqueue_tg_reg)); } -- cgit v1.2.3-70-g09d2 From 089dd79db9264dc0da602bad45d42f1b3e7d1e07 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 5 Jun 2009 14:04:55 +0200 Subject: perf_counter: Generate mmap events for install_special_mapping() In order to track the vdso also generate mmap events for install_special_mapping(). Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 14 ++++++++------ kernel/perf_counter.c | 34 ++++++++++++++++++++++------------ mm/mmap.c | 5 +++-- 3 files changed, 33 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 6ca403acd41..40dc0e273d9 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -617,8 +617,13 @@ static inline int is_software_counter(struct perf_counter *counter) extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64); -extern void perf_counter_mmap(unsigned long addr, unsigned long len, - unsigned long pgoff, struct file *file); +extern void __perf_counter_mmap(struct vm_area_struct *vma); + +static inline void perf_counter_mmap(struct vm_area_struct *vma) +{ + if (vma->vm_flags & VM_EXEC) + __perf_counter_mmap(vma); +} extern void perf_counter_comm(struct task_struct *tsk); extern void perf_counter_fork(struct task_struct *tsk); @@ -668,10 +673,7 @@ static inline void perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr) { } -static inline void -perf_counter_mmap(unsigned long addr, unsigned long len, - unsigned long pgoff, struct file *file) { } - +static inline void perf_counter_mmap(struct vm_area_struct *vma) { } static inline void perf_counter_comm(struct task_struct *tsk) { } static inline void perf_counter_fork(struct task_struct *tsk) { } static inline void perf_counter_init(void) { } diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index a5d3e2aedd2..37a5a241ca7 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -2255,7 +2255,7 @@ out: } static void perf_output_copy(struct perf_output_handle *handle, - void *buf, unsigned int len) + const void *buf, unsigned int len) { unsigned int pages_mask; unsigned int offset; @@ -2681,9 +2681,10 @@ void perf_counter_comm(struct task_struct *task) */ struct perf_mmap_event { - struct file *file; - char *file_name; - int file_size; + struct vm_area_struct *vma; + + const char *file_name; + int file_size; struct { struct perf_event_header header; @@ -2744,11 +2745,12 @@ static void perf_counter_mmap_event(struct perf_mmap_event *mmap_event) { struct perf_cpu_context *cpuctx; struct perf_counter_context *ctx; - struct file *file = mmap_event->file; + struct vm_area_struct *vma = mmap_event->vma; + struct file *file = vma->vm_file; unsigned int size; char tmp[16]; char *buf = NULL; - char *name; + const char *name; if (file) { buf = kzalloc(PATH_MAX, GFP_KERNEL); @@ -2762,6 +2764,15 @@ static void perf_counter_mmap_event(struct perf_mmap_event *mmap_event) goto got_name; } } else { + name = arch_vma_name(mmap_event->vma); + if (name) + goto got_name; + + if (!vma->vm_mm) { + name = strncpy(tmp, "[vdso]", sizeof(tmp)); + goto got_name; + } + name = strncpy(tmp, "//anon", sizeof(tmp)); goto got_name; } @@ -2791,8 +2802,7 @@ got_name: kfree(buf); } -void perf_counter_mmap(unsigned long addr, unsigned long len, - unsigned long pgoff, struct file *file) +void __perf_counter_mmap(struct vm_area_struct *vma) { struct perf_mmap_event mmap_event; @@ -2800,12 +2810,12 @@ void perf_counter_mmap(unsigned long addr, unsigned long len, return; mmap_event = (struct perf_mmap_event){ - .file = file, + .vma = vma, .event = { .header = { .type = PERF_EVENT_MMAP, }, - .start = addr, - .len = len, - .pgoff = pgoff, + .start = vma->vm_start, + .len = vma->vm_end - vma->vm_start, + .pgoff = vma->vm_pgoff, }, }; diff --git a/mm/mmap.c b/mm/mmap.c index 6451ce2854b..8101de490c7 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1220,8 +1220,7 @@ munmap_back: if (correct_wcount) atomic_inc(&inode->i_writecount); out: - if (vm_flags & VM_EXEC) - perf_counter_mmap(addr, len, pgoff, file); + perf_counter_mmap(vma); mm->total_vm += len >> PAGE_SHIFT; vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT); @@ -2309,6 +2308,8 @@ int install_special_mapping(struct mm_struct *mm, mm->total_vm += len >> PAGE_SHIFT; + perf_counter_mmap(vma); + return 0; } -- cgit v1.2.3-70-g09d2 From 77c9a5daa9c4d9b37812c9c69c7bcbb3f9399c3c Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Fri, 5 Jun 2009 16:26:18 +0200 Subject: firewire: reorganize header files The three header files of firewire-core, i.e. "drivers/firewire/fw-device.h", "drivers/firewire/fw-topology.h", "drivers/firewire/fw-transaction.h", are replaced by "drivers/firewire/core.h", "include/linux/firewire.h". The latter includes everything which a firewire high-level driver (like firewire-sbp2) needs besides linux/firewire-constants.h, while core.h contains the rest which is needed by firewire-core itself and by low- level drivers (card drivers) like firewire-ohci. High-level drivers can now also reside outside of drivers/firewire without having to add drivers/firewire to the header file search path in makefiles. At least the firedtv driver will be such a driver. I also considered to spread the contents of core.h over several files, one for each .c file where the respective implementation resides. But it turned out that most core .c files will end up including most of the core .h files. Also, the combined core.h isn't unreasonably big, and it will lose more of its contents to linux/firewire.h anyway soon when more firewire drivers are added. (IP-over-1394, firedtv, and there are plans for one or two more.) Furthermore, fw-ohci.h is renamed to ohci.h. The name of core.h and ohci.h is chosen with regard to name changes of the .c files in a follow-up change. Signed-off-by: Stefan Richter --- drivers/firewire/core.h | 293 +++++++++++++++++++++++++ drivers/firewire/fw-card.c | 6 +- drivers/firewire/fw-cdev.c | 5 +- drivers/firewire/fw-device.c | 9 +- drivers/firewire/fw-device.h | 190 ---------------- drivers/firewire/fw-iso.c | 4 +- drivers/firewire/fw-ohci.c | 5 +- drivers/firewire/fw-ohci.h | 157 -------------- drivers/firewire/fw-sbp2.c | 4 +- drivers/firewire/fw-topology.c | 10 +- drivers/firewire/fw-topology.h | 77 ------- drivers/firewire/fw-transaction.c | 8 +- drivers/firewire/fw-transaction.h | 446 -------------------------------------- drivers/firewire/ohci.h | 157 ++++++++++++++ include/linux/firewire.h | 350 ++++++++++++++++++++++++++++++ 15 files changed, 830 insertions(+), 891 deletions(-) create mode 100644 drivers/firewire/core.h delete mode 100644 drivers/firewire/fw-device.h delete mode 100644 drivers/firewire/fw-ohci.h delete mode 100644 drivers/firewire/fw-topology.h delete mode 100644 drivers/firewire/fw-transaction.h create mode 100644 drivers/firewire/ohci.h create mode 100644 include/linux/firewire.h (limited to 'include') diff --git a/drivers/firewire/core.h b/drivers/firewire/core.h new file mode 100644 index 00000000000..273f0ab8292 --- /dev/null +++ b/drivers/firewire/core.h @@ -0,0 +1,293 @@ +#ifndef _FIREWIRE_CORE_H +#define _FIREWIRE_CORE_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +struct device; +struct fw_card; +struct fw_device; +struct fw_iso_buffer; +struct fw_iso_context; +struct fw_iso_packet; +struct fw_node; +struct fw_packet; + + +/* -card */ + +/* bitfields within the PHY registers */ +#define PHY_LINK_ACTIVE 0x80 +#define PHY_CONTENDER 0x40 +#define PHY_BUS_RESET 0x40 +#define PHY_BUS_SHORT_RESET 0x40 + +#define BANDWIDTH_AVAILABLE_INITIAL 4915 +#define BROADCAST_CHANNEL_INITIAL (1 << 31 | 31) +#define BROADCAST_CHANNEL_VALID (1 << 30) + +struct fw_card_driver { + /* + * Enable the given card with the given initial config rom. + * This function is expected to activate the card, and either + * enable the PHY or set the link_on bit and initiate a bus + * reset. + */ + int (*enable)(struct fw_card *card, u32 *config_rom, size_t length); + + int (*update_phy_reg)(struct fw_card *card, int address, + int clear_bits, int set_bits); + + /* + * Update the config rom for an enabled card. This function + * should change the config rom that is presented on the bus + * an initiate a bus reset. + */ + int (*set_config_rom)(struct fw_card *card, + u32 *config_rom, size_t length); + + void (*send_request)(struct fw_card *card, struct fw_packet *packet); + void (*send_response)(struct fw_card *card, struct fw_packet *packet); + /* Calling cancel is valid once a packet has been submitted. */ + int (*cancel_packet)(struct fw_card *card, struct fw_packet *packet); + + /* + * Allow the specified node ID to do direct DMA out and in of + * host memory. The card will disable this for all node when + * a bus reset happens, so driver need to reenable this after + * bus reset. Returns 0 on success, -ENODEV if the card + * doesn't support this, -ESTALE if the generation doesn't + * match. + */ + int (*enable_phys_dma)(struct fw_card *card, + int node_id, int generation); + + u64 (*get_bus_time)(struct fw_card *card); + + struct fw_iso_context * + (*allocate_iso_context)(struct fw_card *card, + int type, int channel, size_t header_size); + void (*free_iso_context)(struct fw_iso_context *ctx); + + int (*start_iso)(struct fw_iso_context *ctx, + s32 cycle, u32 sync, u32 tags); + + int (*queue_iso)(struct fw_iso_context *ctx, + struct fw_iso_packet *packet, + struct fw_iso_buffer *buffer, + unsigned long payload); + + int (*stop_iso)(struct fw_iso_context *ctx); +}; + +void fw_card_initialize(struct fw_card *card, + const struct fw_card_driver *driver, struct device *device); +int fw_card_add(struct fw_card *card, + u32 max_receive, u32 link_speed, u64 guid); +void fw_core_remove_card(struct fw_card *card); +int fw_core_initiate_bus_reset(struct fw_card *card, int short_reset); +int fw_compute_block_crc(u32 *block); +void fw_schedule_bm_work(struct fw_card *card, unsigned long delay); + +struct fw_descriptor { + struct list_head link; + size_t length; + u32 immediate; + u32 key; + const u32 *data; +}; + +int fw_core_add_descriptor(struct fw_descriptor *desc); +void fw_core_remove_descriptor(struct fw_descriptor *desc); + + +/* -cdev */ + +extern const struct file_operations fw_device_ops; + +void fw_device_cdev_update(struct fw_device *device); +void fw_device_cdev_remove(struct fw_device *device); + + +/* -device */ + +extern struct rw_semaphore fw_device_rwsem; +extern struct idr fw_device_idr; +extern int fw_cdev_major; + +struct fw_device *fw_device_get_by_devt(dev_t devt); +void fw_device_set_broadcast_channel(struct fw_device *device, int generation); +void fw_node_event(struct fw_card *card, struct fw_node *node, int event); + + +/* -iso */ + +/* + * The iso packet format allows for an immediate header/payload part + * stored in 'header' immediately after the packet info plus an + * indirect payload part that is pointer to by the 'payload' field. + * Applications can use one or the other or both to implement simple + * low-bandwidth streaming (e.g. audio) or more advanced + * scatter-gather streaming (e.g. assembling video frame automatically). + */ +struct fw_iso_packet { + u16 payload_length; /* Length of indirect payload. */ + u32 interrupt:1; /* Generate interrupt on this packet */ + u32 skip:1; /* Set to not send packet at all. */ + u32 tag:2; + u32 sy:4; + u32 header_length:8; /* Length of immediate header. */ + u32 header[0]; +}; + +#define FW_ISO_CONTEXT_TRANSMIT 0 +#define FW_ISO_CONTEXT_RECEIVE 1 + +#define FW_ISO_CONTEXT_MATCH_TAG0 1 +#define FW_ISO_CONTEXT_MATCH_TAG1 2 +#define FW_ISO_CONTEXT_MATCH_TAG2 4 +#define FW_ISO_CONTEXT_MATCH_TAG3 8 +#define FW_ISO_CONTEXT_MATCH_ALL_TAGS 15 + +/* + * An iso buffer is just a set of pages mapped for DMA in the + * specified direction. Since the pages are to be used for DMA, they + * are not mapped into the kernel virtual address space. We store the + * DMA address in the page private. The helper function + * fw_iso_buffer_map() will map the pages into a given vma. + */ +struct fw_iso_buffer { + enum dma_data_direction direction; + struct page **pages; + int page_count; +}; + +typedef void (*fw_iso_callback_t)(struct fw_iso_context *context, + u32 cycle, size_t header_length, + void *header, void *data); + +struct fw_iso_context { + struct fw_card *card; + int type; + int channel; + int speed; + size_t header_size; + fw_iso_callback_t callback; + void *callback_data; +}; + +int fw_iso_buffer_init(struct fw_iso_buffer *buffer, struct fw_card *card, + int page_count, enum dma_data_direction direction); +int fw_iso_buffer_map(struct fw_iso_buffer *buffer, struct vm_area_struct *vma); +void fw_iso_buffer_destroy(struct fw_iso_buffer *buffer, struct fw_card *card); + +struct fw_iso_context *fw_iso_context_create(struct fw_card *card, + int type, int channel, int speed, size_t header_size, + fw_iso_callback_t callback, void *callback_data); +int fw_iso_context_queue(struct fw_iso_context *ctx, + struct fw_iso_packet *packet, + struct fw_iso_buffer *buffer, + unsigned long payload); +int fw_iso_context_start(struct fw_iso_context *ctx, + int cycle, int sync, int tags); +int fw_iso_context_stop(struct fw_iso_context *ctx); +void fw_iso_context_destroy(struct fw_iso_context *ctx); + +void fw_iso_resource_manage(struct fw_card *card, int generation, + u64 channels_mask, int *channel, int *bandwidth, bool allocate); + + +/* -topology */ + +enum { + FW_NODE_CREATED, + FW_NODE_UPDATED, + FW_NODE_DESTROYED, + FW_NODE_LINK_ON, + FW_NODE_LINK_OFF, + FW_NODE_INITIATED_RESET, +}; + +struct fw_node { + u16 node_id; + u8 color; + u8 port_count; + u8 link_on:1; + u8 initiated_reset:1; + u8 b_path:1; + u8 phy_speed:2; /* As in the self ID packet. */ + u8 max_speed:2; /* Minimum of all phy-speeds on the path from the + * local node to this node. */ + u8 max_depth:4; /* Maximum depth to any leaf node */ + u8 max_hops:4; /* Max hops in this sub tree */ + atomic_t ref_count; + + /* For serializing node topology into a list. */ + struct list_head link; + + /* Upper layer specific data. */ + void *data; + + struct fw_node *ports[0]; +}; + +static inline struct fw_node *fw_node_get(struct fw_node *node) +{ + atomic_inc(&node->ref_count); + + return node; +} + +static inline void fw_node_put(struct fw_node *node) +{ + if (atomic_dec_and_test(&node->ref_count)) + kfree(node); +} + +void fw_core_handle_bus_reset(struct fw_card *card, int node_id, + int generation, int self_id_count, u32 *self_ids); +void fw_destroy_nodes(struct fw_card *card); + +/* + * Check whether new_generation is the immediate successor of old_generation. + * Take counter roll-over at 255 (as per OHCI) into account. + */ +static inline bool is_next_generation(int new_generation, int old_generation) +{ + return (new_generation & 0xff) == ((old_generation + 1) & 0xff); +} + + +/* -transaction */ + +#define TCODE_IS_READ_REQUEST(tcode) (((tcode) & ~1) == 4) +#define TCODE_IS_BLOCK_PACKET(tcode) (((tcode) & 1) != 0) +#define TCODE_IS_REQUEST(tcode) (((tcode) & 2) == 0) +#define TCODE_IS_RESPONSE(tcode) (((tcode) & 2) != 0) +#define TCODE_HAS_REQUEST_DATA(tcode) (((tcode) & 12) != 4) +#define TCODE_HAS_RESPONSE_DATA(tcode) (((tcode) & 12) != 0) + +#define LOCAL_BUS 0xffc0 + +void fw_core_handle_request(struct fw_card *card, struct fw_packet *request); +void fw_core_handle_response(struct fw_card *card, struct fw_packet *packet); +void fw_fill_response(struct fw_packet *response, u32 *request_header, + int rcode, void *payload, size_t length); +void fw_flush_transactions(struct fw_card *card); +void fw_send_phy_config(struct fw_card *card, + int node_id, int generation, int gap_count); + +static inline int fw_stream_packet_destination_id(int tag, int channel, int sy) +{ + return tag << 14 | channel << 8 | sy; +} + +#endif /* _FIREWIRE_CORE_H */ diff --git a/drivers/firewire/fw-card.c b/drivers/firewire/fw-card.c index b6f55e262e7..ba6cd70b851 100644 --- a/drivers/firewire/fw-card.c +++ b/drivers/firewire/fw-card.c @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include #include #include @@ -34,9 +36,7 @@ #include #include -#include "fw-device.h" -#include "fw-topology.h" -#include "fw-transaction.h" +#include "core.h" int fw_compute_block_crc(u32 *block) { diff --git a/drivers/firewire/fw-cdev.c b/drivers/firewire/fw-cdev.c index 8a5e6ae2552..042c0454047 100644 --- a/drivers/firewire/fw-cdev.c +++ b/drivers/firewire/fw-cdev.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -41,9 +42,7 @@ #include #include -#include "fw-device.h" -#include "fw-topology.h" -#include "fw-transaction.h" +#include "core.h" struct client { u32 version; diff --git a/drivers/firewire/fw-device.c b/drivers/firewire/fw-device.c index 238acac5bad..65d84dd6c1d 100644 --- a/drivers/firewire/fw-device.c +++ b/drivers/firewire/fw-device.c @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include #include #include @@ -39,9 +41,7 @@ #include #include -#include "fw-device.h" -#include "fw-topology.h" -#include "fw-transaction.h" +#include "core.h" void fw_csr_iterator_init(struct fw_csr_iterator *ci, u32 * p) { @@ -94,8 +94,9 @@ static int fw_unit_match(struct device *dev, struct device_driver *drv) return 0; device = fw_device(unit->device.parent); + id = container_of(drv, struct fw_driver, driver)->id_table; - for (id = fw_driver(drv)->id_table; id->match_flags != 0; id++) { + for (; id->match_flags != 0; id++) { if (match_unit_directory(unit->directory, id->match_flags, id)) return 1; diff --git a/drivers/firewire/fw-device.h b/drivers/firewire/fw-device.h deleted file mode 100644 index e973c4361f4..00000000000 --- a/drivers/firewire/fw-device.h +++ /dev/null @@ -1,190 +0,0 @@ -/* - * Copyright (C) 2005-2006 Kristian Hoegsberg - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ - -#ifndef __fw_device_h -#define __fw_device_h - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -enum fw_device_state { - FW_DEVICE_INITIALIZING, - FW_DEVICE_RUNNING, - FW_DEVICE_GONE, - FW_DEVICE_SHUTDOWN, -}; - -struct fw_attribute_group { - struct attribute_group *groups[2]; - struct attribute_group group; - struct attribute *attrs[12]; -}; - -struct fw_node; -struct fw_card; - -/* - * Note, fw_device.generation always has to be read before fw_device.node_id. - * Use SMP memory barriers to ensure this. Otherwise requests will be sent - * to an outdated node_id if the generation was updated in the meantime due - * to a bus reset. - * - * Likewise, fw-core will take care to update .node_id before .generation so - * that whenever fw_device.generation is current WRT the actual bus generation, - * fw_device.node_id is guaranteed to be current too. - * - * The same applies to fw_device.card->node_id vs. fw_device.generation. - * - * fw_device.config_rom and fw_device.config_rom_length may be accessed during - * the lifetime of any fw_unit belonging to the fw_device, before device_del() - * was called on the last fw_unit. Alternatively, they may be accessed while - * holding fw_device_rwsem. - */ -struct fw_device { - atomic_t state; - struct fw_node *node; - int node_id; - int generation; - unsigned max_speed; - struct fw_card *card; - struct device device; - - struct mutex client_list_mutex; - struct list_head client_list; - - u32 *config_rom; - size_t config_rom_length; - int config_rom_retries; - unsigned is_local:1; - unsigned cmc:1; - unsigned bc_implemented:2; - - struct delayed_work work; - struct fw_attribute_group attribute_group; -}; - -static inline struct fw_device *fw_device(struct device *dev) -{ - return container_of(dev, struct fw_device, device); -} - -static inline int fw_device_is_shutdown(struct fw_device *device) -{ - return atomic_read(&device->state) == FW_DEVICE_SHUTDOWN; -} - -static inline struct fw_device *fw_device_get(struct fw_device *device) -{ - get_device(&device->device); - - return device; -} - -static inline void fw_device_put(struct fw_device *device) -{ - put_device(&device->device); -} - -struct fw_device *fw_device_get_by_devt(dev_t devt); -int fw_device_enable_phys_dma(struct fw_device *device); -void fw_device_set_broadcast_channel(struct fw_device *device, int generation); - -void fw_device_cdev_update(struct fw_device *device); -void fw_device_cdev_remove(struct fw_device *device); - -extern struct rw_semaphore fw_device_rwsem; -extern struct idr fw_device_idr; -extern int fw_cdev_major; - -/* - * fw_unit.directory must not be accessed after device_del(&fw_unit.device). - */ -struct fw_unit { - struct device device; - u32 *directory; - struct fw_attribute_group attribute_group; -}; - -static inline struct fw_unit *fw_unit(struct device *dev) -{ - return container_of(dev, struct fw_unit, device); -} - -static inline struct fw_unit *fw_unit_get(struct fw_unit *unit) -{ - get_device(&unit->device); - - return unit; -} - -static inline void fw_unit_put(struct fw_unit *unit) -{ - put_device(&unit->device); -} - -#define CSR_OFFSET 0x40 -#define CSR_LEAF 0x80 -#define CSR_DIRECTORY 0xc0 - -#define CSR_DESCRIPTOR 0x01 -#define CSR_VENDOR 0x03 -#define CSR_HARDWARE_VERSION 0x04 -#define CSR_NODE_CAPABILITIES 0x0c -#define CSR_UNIT 0x11 -#define CSR_SPECIFIER_ID 0x12 -#define CSR_VERSION 0x13 -#define CSR_DEPENDENT_INFO 0x14 -#define CSR_MODEL 0x17 -#define CSR_INSTANCE 0x18 -#define CSR_DIRECTORY_ID 0x20 - -struct fw_csr_iterator { - u32 *p; - u32 *end; -}; - -void fw_csr_iterator_init(struct fw_csr_iterator *ci, u32 *p); -int fw_csr_iterator_next(struct fw_csr_iterator *ci, - int *key, int *value); - -struct fw_driver { - struct device_driver driver; - /* Called when the parent device sits through a bus reset. */ - void (*update)(struct fw_unit *unit); - const struct ieee1394_device_id *id_table; -}; - -static inline struct fw_driver *fw_driver(struct device_driver *drv) -{ - return container_of(drv, struct fw_driver, driver); -} - -extern const struct file_operations fw_device_ops; - -#endif /* __fw_device_h */ diff --git a/drivers/firewire/fw-iso.c b/drivers/firewire/fw-iso.c index 0ff3e9c42eb..28076c892d7 100644 --- a/drivers/firewire/fw-iso.c +++ b/drivers/firewire/fw-iso.c @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -30,8 +31,7 @@ #include -#include "fw-topology.h" -#include "fw-transaction.h" +#include "core.h" /* * Isochronous DMA context management diff --git a/drivers/firewire/fw-ohci.c b/drivers/firewire/fw-ohci.c index d296d12909d..ecddd11b797 100644 --- a/drivers/firewire/fw-ohci.c +++ b/drivers/firewire/fw-ohci.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -45,8 +46,8 @@ #include #endif -#include "fw-ohci.h" -#include "fw-transaction.h" +#include "core.h" +#include "ohci.h" #define DESCRIPTOR_OUTPUT_MORE 0 #define DESCRIPTOR_OUTPUT_LAST (1 << 12) diff --git a/drivers/firewire/fw-ohci.h b/drivers/firewire/fw-ohci.h deleted file mode 100644 index a2fbb6240ca..00000000000 --- a/drivers/firewire/fw-ohci.h +++ /dev/null @@ -1,157 +0,0 @@ -#ifndef __fw_ohci_h -#define __fw_ohci_h - -/* OHCI register map */ - -#define OHCI1394_Version 0x000 -#define OHCI1394_GUID_ROM 0x004 -#define OHCI1394_ATRetries 0x008 -#define OHCI1394_CSRData 0x00C -#define OHCI1394_CSRCompareData 0x010 -#define OHCI1394_CSRControl 0x014 -#define OHCI1394_ConfigROMhdr 0x018 -#define OHCI1394_BusID 0x01C -#define OHCI1394_BusOptions 0x020 -#define OHCI1394_GUIDHi 0x024 -#define OHCI1394_GUIDLo 0x028 -#define OHCI1394_ConfigROMmap 0x034 -#define OHCI1394_PostedWriteAddressLo 0x038 -#define OHCI1394_PostedWriteAddressHi 0x03C -#define OHCI1394_VendorID 0x040 -#define OHCI1394_HCControlSet 0x050 -#define OHCI1394_HCControlClear 0x054 -#define OHCI1394_HCControl_BIBimageValid 0x80000000 -#define OHCI1394_HCControl_noByteSwapData 0x40000000 -#define OHCI1394_HCControl_programPhyEnable 0x00800000 -#define OHCI1394_HCControl_aPhyEnhanceEnable 0x00400000 -#define OHCI1394_HCControl_LPS 0x00080000 -#define OHCI1394_HCControl_postedWriteEnable 0x00040000 -#define OHCI1394_HCControl_linkEnable 0x00020000 -#define OHCI1394_HCControl_softReset 0x00010000 -#define OHCI1394_SelfIDBuffer 0x064 -#define OHCI1394_SelfIDCount 0x068 -#define OHCI1394_SelfIDCount_selfIDError 0x80000000 -#define OHCI1394_IRMultiChanMaskHiSet 0x070 -#define OHCI1394_IRMultiChanMaskHiClear 0x074 -#define OHCI1394_IRMultiChanMaskLoSet 0x078 -#define OHCI1394_IRMultiChanMaskLoClear 0x07C -#define OHCI1394_IntEventSet 0x080 -#define OHCI1394_IntEventClear 0x084 -#define OHCI1394_IntMaskSet 0x088 -#define OHCI1394_IntMaskClear 0x08C -#define OHCI1394_IsoXmitIntEventSet 0x090 -#define OHCI1394_IsoXmitIntEventClear 0x094 -#define OHCI1394_IsoXmitIntMaskSet 0x098 -#define OHCI1394_IsoXmitIntMaskClear 0x09C -#define OHCI1394_IsoRecvIntEventSet 0x0A0 -#define OHCI1394_IsoRecvIntEventClear 0x0A4 -#define OHCI1394_IsoRecvIntMaskSet 0x0A8 -#define OHCI1394_IsoRecvIntMaskClear 0x0AC -#define OHCI1394_InitialBandwidthAvailable 0x0B0 -#define OHCI1394_InitialChannelsAvailableHi 0x0B4 -#define OHCI1394_InitialChannelsAvailableLo 0x0B8 -#define OHCI1394_FairnessControl 0x0DC -#define OHCI1394_LinkControlSet 0x0E0 -#define OHCI1394_LinkControlClear 0x0E4 -#define OHCI1394_LinkControl_rcvSelfID (1 << 9) -#define OHCI1394_LinkControl_rcvPhyPkt (1 << 10) -#define OHCI1394_LinkControl_cycleTimerEnable (1 << 20) -#define OHCI1394_LinkControl_cycleMaster (1 << 21) -#define OHCI1394_LinkControl_cycleSource (1 << 22) -#define OHCI1394_NodeID 0x0E8 -#define OHCI1394_NodeID_idValid 0x80000000 -#define OHCI1394_NodeID_nodeNumber 0x0000003f -#define OHCI1394_NodeID_busNumber 0x0000ffc0 -#define OHCI1394_PhyControl 0x0EC -#define OHCI1394_PhyControl_Read(addr) (((addr) << 8) | 0x00008000) -#define OHCI1394_PhyControl_ReadDone 0x80000000 -#define OHCI1394_PhyControl_ReadData(r) (((r) & 0x00ff0000) >> 16) -#define OHCI1394_PhyControl_Write(addr, data) (((addr) << 8) | (data) | 0x00004000) -#define OHCI1394_PhyControl_WriteDone 0x00004000 -#define OHCI1394_IsochronousCycleTimer 0x0F0 -#define OHCI1394_AsReqFilterHiSet 0x100 -#define OHCI1394_AsReqFilterHiClear 0x104 -#define OHCI1394_AsReqFilterLoSet 0x108 -#define OHCI1394_AsReqFilterLoClear 0x10C -#define OHCI1394_PhyReqFilterHiSet 0x110 -#define OHCI1394_PhyReqFilterHiClear 0x114 -#define OHCI1394_PhyReqFilterLoSet 0x118 -#define OHCI1394_PhyReqFilterLoClear 0x11C -#define OHCI1394_PhyUpperBound 0x120 - -#define OHCI1394_AsReqTrContextBase 0x180 -#define OHCI1394_AsReqTrContextControlSet 0x180 -#define OHCI1394_AsReqTrContextControlClear 0x184 -#define OHCI1394_AsReqTrCommandPtr 0x18C - -#define OHCI1394_AsRspTrContextBase 0x1A0 -#define OHCI1394_AsRspTrContextControlSet 0x1A0 -#define OHCI1394_AsRspTrContextControlClear 0x1A4 -#define OHCI1394_AsRspTrCommandPtr 0x1AC - -#define OHCI1394_AsReqRcvContextBase 0x1C0 -#define OHCI1394_AsReqRcvContextControlSet 0x1C0 -#define OHCI1394_AsReqRcvContextControlClear 0x1C4 -#define OHCI1394_AsReqRcvCommandPtr 0x1CC - -#define OHCI1394_AsRspRcvContextBase 0x1E0 -#define OHCI1394_AsRspRcvContextControlSet 0x1E0 -#define OHCI1394_AsRspRcvContextControlClear 0x1E4 -#define OHCI1394_AsRspRcvCommandPtr 0x1EC - -/* Isochronous transmit registers */ -#define OHCI1394_IsoXmitContextBase(n) (0x200 + 16 * (n)) -#define OHCI1394_IsoXmitContextControlSet(n) (0x200 + 16 * (n)) -#define OHCI1394_IsoXmitContextControlClear(n) (0x204 + 16 * (n)) -#define OHCI1394_IsoXmitCommandPtr(n) (0x20C + 16 * (n)) - -/* Isochronous receive registers */ -#define OHCI1394_IsoRcvContextBase(n) (0x400 + 32 * (n)) -#define OHCI1394_IsoRcvContextControlSet(n) (0x400 + 32 * (n)) -#define OHCI1394_IsoRcvContextControlClear(n) (0x404 + 32 * (n)) -#define OHCI1394_IsoRcvCommandPtr(n) (0x40C + 32 * (n)) -#define OHCI1394_IsoRcvContextMatch(n) (0x410 + 32 * (n)) - -/* Interrupts Mask/Events */ -#define OHCI1394_reqTxComplete 0x00000001 -#define OHCI1394_respTxComplete 0x00000002 -#define OHCI1394_ARRQ 0x00000004 -#define OHCI1394_ARRS 0x00000008 -#define OHCI1394_RQPkt 0x00000010 -#define OHCI1394_RSPkt 0x00000020 -#define OHCI1394_isochTx 0x00000040 -#define OHCI1394_isochRx 0x00000080 -#define OHCI1394_postedWriteErr 0x00000100 -#define OHCI1394_lockRespErr 0x00000200 -#define OHCI1394_selfIDComplete 0x00010000 -#define OHCI1394_busReset 0x00020000 -#define OHCI1394_regAccessFail 0x00040000 -#define OHCI1394_phy 0x00080000 -#define OHCI1394_cycleSynch 0x00100000 -#define OHCI1394_cycle64Seconds 0x00200000 -#define OHCI1394_cycleLost 0x00400000 -#define OHCI1394_cycleInconsistent 0x00800000 -#define OHCI1394_unrecoverableError 0x01000000 -#define OHCI1394_cycleTooLong 0x02000000 -#define OHCI1394_phyRegRcvd 0x04000000 -#define OHCI1394_masterIntEnable 0x80000000 - -#define OHCI1394_evt_no_status 0x0 -#define OHCI1394_evt_long_packet 0x2 -#define OHCI1394_evt_missing_ack 0x3 -#define OHCI1394_evt_underrun 0x4 -#define OHCI1394_evt_overrun 0x5 -#define OHCI1394_evt_descriptor_read 0x6 -#define OHCI1394_evt_data_read 0x7 -#define OHCI1394_evt_data_write 0x8 -#define OHCI1394_evt_bus_reset 0x9 -#define OHCI1394_evt_timeout 0xa -#define OHCI1394_evt_tcode_err 0xb -#define OHCI1394_evt_reserved_b 0xc -#define OHCI1394_evt_reserved_c 0xd -#define OHCI1394_evt_unknown 0xe -#define OHCI1394_evt_flushed 0xf - -#define OHCI1394_phy_tcode 0xe - -#endif /* __fw_ohci_h */ diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c index 027b91f4a6e..d41cb6e455b 100644 --- a/drivers/firewire/fw-sbp2.c +++ b/drivers/firewire/fw-sbp2.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -58,9 +59,6 @@ #include #include -#include "fw-device.h" -#include "fw-transaction.h" - /* * So far only bridges from Oxford Semiconductor are known to support * concurrent logins. Depending on firmware, four or two concurrent logins diff --git a/drivers/firewire/fw-topology.c b/drivers/firewire/fw-topology.c index 6d0ea1bb7e2..fddf2b35893 100644 --- a/drivers/firewire/fw-topology.c +++ b/drivers/firewire/fw-topology.c @@ -20,6 +20,8 @@ #include #include +#include +#include #include #include #include @@ -31,8 +33,7 @@ #include #include -#include "fw-topology.h" -#include "fw-transaction.h" +#include "core.h" #define SELF_ID_PHY_ID(q) (((q) >> 24) & 0x3f) #define SELF_ID_EXTENDED(q) (((q) >> 23) & 0x01) @@ -45,6 +46,11 @@ #define SELF_ID_EXT_SEQUENCE(q) (((q) >> 20) & 0x07) +#define SELFID_PORT_CHILD 0x3 +#define SELFID_PORT_PARENT 0x2 +#define SELFID_PORT_NCONN 0x1 +#define SELFID_PORT_NONE 0x0 + static u32 *count_ports(u32 *sid, int *total_port_count, int *child_port_count) { u32 q; diff --git a/drivers/firewire/fw-topology.h b/drivers/firewire/fw-topology.h deleted file mode 100644 index 3c497bb4fae..00000000000 --- a/drivers/firewire/fw-topology.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (C) 2003-2006 Kristian Hoegsberg - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ - -#ifndef __fw_topology_h -#define __fw_topology_h - -#include -#include - -#include - -enum { - FW_NODE_CREATED, - FW_NODE_UPDATED, - FW_NODE_DESTROYED, - FW_NODE_LINK_ON, - FW_NODE_LINK_OFF, - FW_NODE_INITIATED_RESET, -}; - -struct fw_node { - u16 node_id; - u8 color; - u8 port_count; - u8 link_on : 1; - u8 initiated_reset : 1; - u8 b_path : 1; - u8 phy_speed : 2; /* As in the self ID packet. */ - u8 max_speed : 2; /* Minimum of all phy-speeds on the path from the - * local node to this node. */ - u8 max_depth : 4; /* Maximum depth to any leaf node */ - u8 max_hops : 4; /* Max hops in this sub tree */ - atomic_t ref_count; - - /* For serializing node topology into a list. */ - struct list_head link; - - /* Upper layer specific data. */ - void *data; - - struct fw_node *ports[0]; -}; - -static inline struct fw_node *fw_node_get(struct fw_node *node) -{ - atomic_inc(&node->ref_count); - - return node; -} - -static inline void fw_node_put(struct fw_node *node) -{ - if (atomic_dec_and_test(&node->ref_count)) - kfree(node); -} - -struct fw_card; -void fw_destroy_nodes(struct fw_card *card); - -int fw_compute_block_crc(u32 *block); - -#endif /* __fw_topology_h */ diff --git a/drivers/firewire/fw-transaction.c b/drivers/firewire/fw-transaction.c index 7008214e333..9a6ce9ab2a6 100644 --- a/drivers/firewire/fw-transaction.c +++ b/drivers/firewire/fw-transaction.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -38,8 +39,7 @@ #include -#include "fw-device.h" /* for fw_device_ops */ -#include "fw-transaction.h" +#include "core.h" #define HEADER_PRI(pri) ((pri) << 0) #define HEADER_TCODE(tcode) ((tcode) << 4) @@ -64,6 +64,10 @@ #define HEADER_DESTINATION_IS_BROADCAST(q) \ (((q) & HEADER_DESTINATION(0x3f)) == HEADER_DESTINATION(0x3f)) +#define PHY_PACKET_CONFIG 0x0 +#define PHY_PACKET_LINK_ON 0x1 +#define PHY_PACKET_SELF_ID 0x2 + #define PHY_CONFIG_GAP_COUNT(gap_count) (((gap_count) << 16) | (1 << 22)) #define PHY_CONFIG_ROOT_ID(node_id) ((((node_id) & 0x3f) << 24) | (1 << 23)) #define PHY_IDENTIFIER(id) ((id) << 30) diff --git a/drivers/firewire/fw-transaction.h b/drivers/firewire/fw-transaction.h deleted file mode 100644 index dfa799068f8..00000000000 --- a/drivers/firewire/fw-transaction.h +++ /dev/null @@ -1,446 +0,0 @@ -/* - * Copyright (C) 2003-2006 Kristian Hoegsberg - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ - -#ifndef __fw_transaction_h -#define __fw_transaction_h - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define TCODE_IS_READ_REQUEST(tcode) (((tcode) & ~1) == 4) -#define TCODE_IS_BLOCK_PACKET(tcode) (((tcode) & 1) != 0) -#define TCODE_IS_REQUEST(tcode) (((tcode) & 2) == 0) -#define TCODE_IS_RESPONSE(tcode) (((tcode) & 2) != 0) -#define TCODE_HAS_REQUEST_DATA(tcode) (((tcode) & 12) != 4) -#define TCODE_HAS_RESPONSE_DATA(tcode) (((tcode) & 12) != 0) - -#define LOCAL_BUS 0xffc0 - -#define SELFID_PORT_CHILD 0x3 -#define SELFID_PORT_PARENT 0x2 -#define SELFID_PORT_NCONN 0x1 -#define SELFID_PORT_NONE 0x0 - -#define PHY_PACKET_CONFIG 0x0 -#define PHY_PACKET_LINK_ON 0x1 -#define PHY_PACKET_SELF_ID 0x2 - -/* Bit fields _within_ the PHY registers. */ -#define PHY_LINK_ACTIVE 0x80 -#define PHY_CONTENDER 0x40 -#define PHY_BUS_RESET 0x40 -#define PHY_BUS_SHORT_RESET 0x40 - -#define CSR_REGISTER_BASE 0xfffff0000000ULL - -/* register offsets relative to CSR_REGISTER_BASE */ -#define CSR_STATE_CLEAR 0x0 -#define CSR_STATE_SET 0x4 -#define CSR_NODE_IDS 0x8 -#define CSR_RESET_START 0xc -#define CSR_SPLIT_TIMEOUT_HI 0x18 -#define CSR_SPLIT_TIMEOUT_LO 0x1c -#define CSR_CYCLE_TIME 0x200 -#define CSR_BUS_TIME 0x204 -#define CSR_BUSY_TIMEOUT 0x210 -#define CSR_BUS_MANAGER_ID 0x21c -#define CSR_BANDWIDTH_AVAILABLE 0x220 -#define CSR_CHANNELS_AVAILABLE 0x224 -#define CSR_CHANNELS_AVAILABLE_HI 0x224 -#define CSR_CHANNELS_AVAILABLE_LO 0x228 -#define CSR_BROADCAST_CHANNEL 0x234 -#define CSR_CONFIG_ROM 0x400 -#define CSR_CONFIG_ROM_END 0x800 -#define CSR_FCP_COMMAND 0xB00 -#define CSR_FCP_RESPONSE 0xD00 -#define CSR_FCP_END 0xF00 -#define CSR_TOPOLOGY_MAP 0x1000 -#define CSR_TOPOLOGY_MAP_END 0x1400 -#define CSR_SPEED_MAP 0x2000 -#define CSR_SPEED_MAP_END 0x3000 - -#define BANDWIDTH_AVAILABLE_INITIAL 4915 -#define BROADCAST_CHANNEL_INITIAL (1 << 31 | 31) -#define BROADCAST_CHANNEL_VALID (1 << 30) - -#define fw_notify(s, args...) printk(KERN_NOTICE KBUILD_MODNAME ": " s, ## args) -#define fw_error(s, args...) printk(KERN_ERR KBUILD_MODNAME ": " s, ## args) - -static inline void fw_memcpy_from_be32(void *_dst, void *_src, size_t size) -{ - u32 *dst = _dst; - __be32 *src = _src; - int i; - - for (i = 0; i < size / 4; i++) - dst[i] = be32_to_cpu(src[i]); -} - -static inline void fw_memcpy_to_be32(void *_dst, void *_src, size_t size) -{ - fw_memcpy_from_be32(_dst, _src, size); -} - -struct fw_card; -struct fw_packet; -struct fw_node; -struct fw_request; - -struct fw_descriptor { - struct list_head link; - size_t length; - u32 immediate; - u32 key; - const u32 *data; -}; - -int fw_core_add_descriptor(struct fw_descriptor *desc); -void fw_core_remove_descriptor(struct fw_descriptor *desc); - -typedef void (*fw_packet_callback_t)(struct fw_packet *packet, - struct fw_card *card, int status); - -typedef void (*fw_transaction_callback_t)(struct fw_card *card, int rcode, - void *data, size_t length, - void *callback_data); - -/* - * Important note: The callback must guarantee that either fw_send_response() - * or kfree() is called on the @request. - */ -typedef void (*fw_address_callback_t)(struct fw_card *card, - struct fw_request *request, - int tcode, int destination, int source, - int generation, int speed, - unsigned long long offset, - void *data, size_t length, - void *callback_data); - -struct fw_packet { - int speed; - int generation; - u32 header[4]; - size_t header_length; - void *payload; - size_t payload_length; - dma_addr_t payload_bus; - u32 timestamp; - - /* - * This callback is called when the packet transmission has - * completed; for successful transmission, the status code is - * the ack received from the destination, otherwise it's a - * negative errno: ENOMEM, ESTALE, ETIMEDOUT, ENODEV, EIO. - * The callback can be called from tasklet context and thus - * must never block. - */ - fw_packet_callback_t callback; - int ack; - struct list_head link; - void *driver_data; -}; - -struct fw_transaction { - int node_id; /* The generation is implied; it is always the current. */ - int tlabel; - int timestamp; - struct list_head link; - - struct fw_packet packet; - - /* - * The data passed to the callback is valid only during the - * callback. - */ - fw_transaction_callback_t callback; - void *callback_data; -}; - -struct fw_address_handler { - u64 offset; - size_t length; - fw_address_callback_t address_callback; - void *callback_data; - struct list_head link; -}; - -struct fw_address_region { - u64 start; - u64 end; -}; - -extern const struct fw_address_region fw_high_memory_region; - -int fw_core_add_address_handler(struct fw_address_handler *handler, - const struct fw_address_region *region); -void fw_core_remove_address_handler(struct fw_address_handler *handler); -void fw_fill_response(struct fw_packet *response, u32 *request_header, - int rcode, void *payload, size_t length); -void fw_send_response(struct fw_card *card, - struct fw_request *request, int rcode); - -extern struct bus_type fw_bus_type; - -struct fw_card { - const struct fw_card_driver *driver; - struct device *device; - struct kref kref; - struct completion done; - - int node_id; - int generation; - int current_tlabel, tlabel_mask; - struct list_head transaction_list; - struct timer_list flush_timer; - unsigned long reset_jiffies; - - unsigned long long guid; - unsigned max_receive; - int link_speed; - int config_rom_generation; - - spinlock_t lock; /* Take this lock when handling the lists in - * this struct. */ - struct fw_node *local_node; - struct fw_node *root_node; - struct fw_node *irm_node; - u8 color; /* must be u8 to match the definition in struct fw_node */ - int gap_count; - bool beta_repeaters_present; - - int index; - - struct list_head link; - - /* Work struct for BM duties. */ - struct delayed_work work; - int bm_retries; - int bm_generation; - - bool broadcast_channel_allocated; - u32 broadcast_channel; - u32 topology_map[(CSR_TOPOLOGY_MAP_END - CSR_TOPOLOGY_MAP) / 4]; -}; - -static inline struct fw_card *fw_card_get(struct fw_card *card) -{ - kref_get(&card->kref); - - return card; -} - -void fw_card_release(struct kref *kref); - -static inline void fw_card_put(struct fw_card *card) -{ - kref_put(&card->kref, fw_card_release); -} - -extern void fw_schedule_bm_work(struct fw_card *card, unsigned long delay); - -/* - * Check whether new_generation is the immediate successor of old_generation. - * Take counter roll-over at 255 (as per to OHCI) into account. - */ -static inline bool is_next_generation(int new_generation, int old_generation) -{ - return (new_generation & 0xff) == ((old_generation + 1) & 0xff); -} - -/* - * The iso packet format allows for an immediate header/payload part - * stored in 'header' immediately after the packet info plus an - * indirect payload part that is pointer to by the 'payload' field. - * Applications can use one or the other or both to implement simple - * low-bandwidth streaming (e.g. audio) or more advanced - * scatter-gather streaming (e.g. assembling video frame automatically). - */ - -struct fw_iso_packet { - u16 payload_length; /* Length of indirect payload. */ - u32 interrupt : 1; /* Generate interrupt on this packet */ - u32 skip : 1; /* Set to not send packet at all. */ - u32 tag : 2; - u32 sy : 4; - u32 header_length : 8; /* Length of immediate header. */ - u32 header[0]; -}; - -#define FW_ISO_CONTEXT_TRANSMIT 0 -#define FW_ISO_CONTEXT_RECEIVE 1 - -#define FW_ISO_CONTEXT_MATCH_TAG0 1 -#define FW_ISO_CONTEXT_MATCH_TAG1 2 -#define FW_ISO_CONTEXT_MATCH_TAG2 4 -#define FW_ISO_CONTEXT_MATCH_TAG3 8 -#define FW_ISO_CONTEXT_MATCH_ALL_TAGS 15 - -struct fw_iso_context; - -typedef void (*fw_iso_callback_t)(struct fw_iso_context *context, - u32 cycle, size_t header_length, - void *header, void *data); - -/* - * An iso buffer is just a set of pages mapped for DMA in the - * specified direction. Since the pages are to be used for DMA, they - * are not mapped into the kernel virtual address space. We store the - * DMA address in the page private. The helper function - * fw_iso_buffer_map() will map the pages into a given vma. - */ - -struct fw_iso_buffer { - enum dma_data_direction direction; - struct page **pages; - int page_count; -}; - -struct fw_iso_context { - struct fw_card *card; - int type; - int channel; - int speed; - size_t header_size; - fw_iso_callback_t callback; - void *callback_data; -}; - -int fw_iso_buffer_init(struct fw_iso_buffer *buffer, struct fw_card *card, - int page_count, enum dma_data_direction direction); -int fw_iso_buffer_map(struct fw_iso_buffer *buffer, struct vm_area_struct *vma); -void fw_iso_buffer_destroy(struct fw_iso_buffer *buffer, struct fw_card *card); - -struct fw_iso_context *fw_iso_context_create(struct fw_card *card, - int type, int channel, int speed, size_t header_size, - fw_iso_callback_t callback, void *callback_data); -int fw_iso_context_queue(struct fw_iso_context *ctx, - struct fw_iso_packet *packet, - struct fw_iso_buffer *buffer, - unsigned long payload); -int fw_iso_context_start(struct fw_iso_context *ctx, - int cycle, int sync, int tags); -int fw_iso_context_stop(struct fw_iso_context *ctx); -void fw_iso_context_destroy(struct fw_iso_context *ctx); - -void fw_iso_resource_manage(struct fw_card *card, int generation, - u64 channels_mask, int *channel, int *bandwidth, bool allocate); - -struct fw_card_driver { - /* - * Enable the given card with the given initial config rom. - * This function is expected to activate the card, and either - * enable the PHY or set the link_on bit and initiate a bus - * reset. - */ - int (*enable)(struct fw_card *card, u32 *config_rom, size_t length); - - int (*update_phy_reg)(struct fw_card *card, int address, - int clear_bits, int set_bits); - - /* - * Update the config rom for an enabled card. This function - * should change the config rom that is presented on the bus - * an initiate a bus reset. - */ - int (*set_config_rom)(struct fw_card *card, - u32 *config_rom, size_t length); - - void (*send_request)(struct fw_card *card, struct fw_packet *packet); - void (*send_response)(struct fw_card *card, struct fw_packet *packet); - /* Calling cancel is valid once a packet has been submitted. */ - int (*cancel_packet)(struct fw_card *card, struct fw_packet *packet); - - /* - * Allow the specified node ID to do direct DMA out and in of - * host memory. The card will disable this for all node when - * a bus reset happens, so driver need to reenable this after - * bus reset. Returns 0 on success, -ENODEV if the card - * doesn't support this, -ESTALE if the generation doesn't - * match. - */ - int (*enable_phys_dma)(struct fw_card *card, - int node_id, int generation); - - u64 (*get_bus_time)(struct fw_card *card); - - struct fw_iso_context * - (*allocate_iso_context)(struct fw_card *card, - int type, int channel, size_t header_size); - void (*free_iso_context)(struct fw_iso_context *ctx); - - int (*start_iso)(struct fw_iso_context *ctx, - s32 cycle, u32 sync, u32 tags); - - int (*queue_iso)(struct fw_iso_context *ctx, - struct fw_iso_packet *packet, - struct fw_iso_buffer *buffer, - unsigned long payload); - - int (*stop_iso)(struct fw_iso_context *ctx); -}; - -int fw_core_initiate_bus_reset(struct fw_card *card, int short_reset); - -void fw_send_request(struct fw_card *card, struct fw_transaction *t, - int tcode, int destination_id, int generation, int speed, - unsigned long long offset, void *payload, size_t length, - fw_transaction_callback_t callback, void *callback_data); -int fw_cancel_transaction(struct fw_card *card, - struct fw_transaction *transaction); -void fw_flush_transactions(struct fw_card *card); -int fw_run_transaction(struct fw_card *card, int tcode, int destination_id, - int generation, int speed, unsigned long long offset, - void *payload, size_t length); -void fw_send_phy_config(struct fw_card *card, - int node_id, int generation, int gap_count); - -static inline int fw_stream_packet_destination_id(int tag, int channel, int sy) -{ - return tag << 14 | channel << 8 | sy; -} - -/* - * Called by the topology code to inform the device code of node - * activity; found, lost, or updated nodes. - */ -void fw_node_event(struct fw_card *card, struct fw_node *node, int event); - -/* API used by card level drivers */ - -void fw_card_initialize(struct fw_card *card, - const struct fw_card_driver *driver, struct device *device); -int fw_card_add(struct fw_card *card, - u32 max_receive, u32 link_speed, u64 guid); -void fw_core_remove_card(struct fw_card *card); -void fw_core_handle_bus_reset(struct fw_card *card, int node_id, - int generation, int self_id_count, u32 *self_ids); -void fw_core_handle_request(struct fw_card *card, struct fw_packet *request); -void fw_core_handle_response(struct fw_card *card, struct fw_packet *packet); - -extern int fw_irm_set_broadcast_channel_register(struct device *dev, - void *data); - -#endif /* __fw_transaction_h */ diff --git a/drivers/firewire/ohci.h b/drivers/firewire/ohci.h new file mode 100644 index 00000000000..ba492d85c51 --- /dev/null +++ b/drivers/firewire/ohci.h @@ -0,0 +1,157 @@ +#ifndef _FIREWIRE_OHCI_H +#define _FIREWIRE_OHCI_H + +/* OHCI register map */ + +#define OHCI1394_Version 0x000 +#define OHCI1394_GUID_ROM 0x004 +#define OHCI1394_ATRetries 0x008 +#define OHCI1394_CSRData 0x00C +#define OHCI1394_CSRCompareData 0x010 +#define OHCI1394_CSRControl 0x014 +#define OHCI1394_ConfigROMhdr 0x018 +#define OHCI1394_BusID 0x01C +#define OHCI1394_BusOptions 0x020 +#define OHCI1394_GUIDHi 0x024 +#define OHCI1394_GUIDLo 0x028 +#define OHCI1394_ConfigROMmap 0x034 +#define OHCI1394_PostedWriteAddressLo 0x038 +#define OHCI1394_PostedWriteAddressHi 0x03C +#define OHCI1394_VendorID 0x040 +#define OHCI1394_HCControlSet 0x050 +#define OHCI1394_HCControlClear 0x054 +#define OHCI1394_HCControl_BIBimageValid 0x80000000 +#define OHCI1394_HCControl_noByteSwapData 0x40000000 +#define OHCI1394_HCControl_programPhyEnable 0x00800000 +#define OHCI1394_HCControl_aPhyEnhanceEnable 0x00400000 +#define OHCI1394_HCControl_LPS 0x00080000 +#define OHCI1394_HCControl_postedWriteEnable 0x00040000 +#define OHCI1394_HCControl_linkEnable 0x00020000 +#define OHCI1394_HCControl_softReset 0x00010000 +#define OHCI1394_SelfIDBuffer 0x064 +#define OHCI1394_SelfIDCount 0x068 +#define OHCI1394_SelfIDCount_selfIDError 0x80000000 +#define OHCI1394_IRMultiChanMaskHiSet 0x070 +#define OHCI1394_IRMultiChanMaskHiClear 0x074 +#define OHCI1394_IRMultiChanMaskLoSet 0x078 +#define OHCI1394_IRMultiChanMaskLoClear 0x07C +#define OHCI1394_IntEventSet 0x080 +#define OHCI1394_IntEventClear 0x084 +#define OHCI1394_IntMaskSet 0x088 +#define OHCI1394_IntMaskClear 0x08C +#define OHCI1394_IsoXmitIntEventSet 0x090 +#define OHCI1394_IsoXmitIntEventClear 0x094 +#define OHCI1394_IsoXmitIntMaskSet 0x098 +#define OHCI1394_IsoXmitIntMaskClear 0x09C +#define OHCI1394_IsoRecvIntEventSet 0x0A0 +#define OHCI1394_IsoRecvIntEventClear 0x0A4 +#define OHCI1394_IsoRecvIntMaskSet 0x0A8 +#define OHCI1394_IsoRecvIntMaskClear 0x0AC +#define OHCI1394_InitialBandwidthAvailable 0x0B0 +#define OHCI1394_InitialChannelsAvailableHi 0x0B4 +#define OHCI1394_InitialChannelsAvailableLo 0x0B8 +#define OHCI1394_FairnessControl 0x0DC +#define OHCI1394_LinkControlSet 0x0E0 +#define OHCI1394_LinkControlClear 0x0E4 +#define OHCI1394_LinkControl_rcvSelfID (1 << 9) +#define OHCI1394_LinkControl_rcvPhyPkt (1 << 10) +#define OHCI1394_LinkControl_cycleTimerEnable (1 << 20) +#define OHCI1394_LinkControl_cycleMaster (1 << 21) +#define OHCI1394_LinkControl_cycleSource (1 << 22) +#define OHCI1394_NodeID 0x0E8 +#define OHCI1394_NodeID_idValid 0x80000000 +#define OHCI1394_NodeID_nodeNumber 0x0000003f +#define OHCI1394_NodeID_busNumber 0x0000ffc0 +#define OHCI1394_PhyControl 0x0EC +#define OHCI1394_PhyControl_Read(addr) (((addr) << 8) | 0x00008000) +#define OHCI1394_PhyControl_ReadDone 0x80000000 +#define OHCI1394_PhyControl_ReadData(r) (((r) & 0x00ff0000) >> 16) +#define OHCI1394_PhyControl_Write(addr, data) (((addr) << 8) | (data) | 0x00004000) +#define OHCI1394_PhyControl_WriteDone 0x00004000 +#define OHCI1394_IsochronousCycleTimer 0x0F0 +#define OHCI1394_AsReqFilterHiSet 0x100 +#define OHCI1394_AsReqFilterHiClear 0x104 +#define OHCI1394_AsReqFilterLoSet 0x108 +#define OHCI1394_AsReqFilterLoClear 0x10C +#define OHCI1394_PhyReqFilterHiSet 0x110 +#define OHCI1394_PhyReqFilterHiClear 0x114 +#define OHCI1394_PhyReqFilterLoSet 0x118 +#define OHCI1394_PhyReqFilterLoClear 0x11C +#define OHCI1394_PhyUpperBound 0x120 + +#define OHCI1394_AsReqTrContextBase 0x180 +#define OHCI1394_AsReqTrContextControlSet 0x180 +#define OHCI1394_AsReqTrContextControlClear 0x184 +#define OHCI1394_AsReqTrCommandPtr 0x18C + +#define OHCI1394_AsRspTrContextBase 0x1A0 +#define OHCI1394_AsRspTrContextControlSet 0x1A0 +#define OHCI1394_AsRspTrContextControlClear 0x1A4 +#define OHCI1394_AsRspTrCommandPtr 0x1AC + +#define OHCI1394_AsReqRcvContextBase 0x1C0 +#define OHCI1394_AsReqRcvContextControlSet 0x1C0 +#define OHCI1394_AsReqRcvContextControlClear 0x1C4 +#define OHCI1394_AsReqRcvCommandPtr 0x1CC + +#define OHCI1394_AsRspRcvContextBase 0x1E0 +#define OHCI1394_AsRspRcvContextControlSet 0x1E0 +#define OHCI1394_AsRspRcvContextControlClear 0x1E4 +#define OHCI1394_AsRspRcvCommandPtr 0x1EC + +/* Isochronous transmit registers */ +#define OHCI1394_IsoXmitContextBase(n) (0x200 + 16 * (n)) +#define OHCI1394_IsoXmitContextControlSet(n) (0x200 + 16 * (n)) +#define OHCI1394_IsoXmitContextControlClear(n) (0x204 + 16 * (n)) +#define OHCI1394_IsoXmitCommandPtr(n) (0x20C + 16 * (n)) + +/* Isochronous receive registers */ +#define OHCI1394_IsoRcvContextBase(n) (0x400 + 32 * (n)) +#define OHCI1394_IsoRcvContextControlSet(n) (0x400 + 32 * (n)) +#define OHCI1394_IsoRcvContextControlClear(n) (0x404 + 32 * (n)) +#define OHCI1394_IsoRcvCommandPtr(n) (0x40C + 32 * (n)) +#define OHCI1394_IsoRcvContextMatch(n) (0x410 + 32 * (n)) + +/* Interrupts Mask/Events */ +#define OHCI1394_reqTxComplete 0x00000001 +#define OHCI1394_respTxComplete 0x00000002 +#define OHCI1394_ARRQ 0x00000004 +#define OHCI1394_ARRS 0x00000008 +#define OHCI1394_RQPkt 0x00000010 +#define OHCI1394_RSPkt 0x00000020 +#define OHCI1394_isochTx 0x00000040 +#define OHCI1394_isochRx 0x00000080 +#define OHCI1394_postedWriteErr 0x00000100 +#define OHCI1394_lockRespErr 0x00000200 +#define OHCI1394_selfIDComplete 0x00010000 +#define OHCI1394_busReset 0x00020000 +#define OHCI1394_regAccessFail 0x00040000 +#define OHCI1394_phy 0x00080000 +#define OHCI1394_cycleSynch 0x00100000 +#define OHCI1394_cycle64Seconds 0x00200000 +#define OHCI1394_cycleLost 0x00400000 +#define OHCI1394_cycleInconsistent 0x00800000 +#define OHCI1394_unrecoverableError 0x01000000 +#define OHCI1394_cycleTooLong 0x02000000 +#define OHCI1394_phyRegRcvd 0x04000000 +#define OHCI1394_masterIntEnable 0x80000000 + +#define OHCI1394_evt_no_status 0x0 +#define OHCI1394_evt_long_packet 0x2 +#define OHCI1394_evt_missing_ack 0x3 +#define OHCI1394_evt_underrun 0x4 +#define OHCI1394_evt_overrun 0x5 +#define OHCI1394_evt_descriptor_read 0x6 +#define OHCI1394_evt_data_read 0x7 +#define OHCI1394_evt_data_write 0x8 +#define OHCI1394_evt_bus_reset 0x9 +#define OHCI1394_evt_timeout 0xa +#define OHCI1394_evt_tcode_err 0xb +#define OHCI1394_evt_reserved_b 0xc +#define OHCI1394_evt_reserved_c 0xd +#define OHCI1394_evt_unknown 0xe +#define OHCI1394_evt_flushed 0xf + +#define OHCI1394_phy_tcode 0xe + +#endif /* _FIREWIRE_OHCI_H */ diff --git a/include/linux/firewire.h b/include/linux/firewire.h new file mode 100644 index 00000000000..e979f9b22cb --- /dev/null +++ b/include/linux/firewire.h @@ -0,0 +1,350 @@ +#ifndef _LINUX_FIREWIRE_H +#define _LINUX_FIREWIRE_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define fw_notify(s, args...) printk(KERN_NOTICE KBUILD_MODNAME ": " s, ## args) +#define fw_error(s, args...) printk(KERN_ERR KBUILD_MODNAME ": " s, ## args) + +static inline void fw_memcpy_from_be32(void *_dst, void *_src, size_t size) +{ + u32 *dst = _dst; + __be32 *src = _src; + int i; + + for (i = 0; i < size / 4; i++) + dst[i] = be32_to_cpu(src[i]); +} + +static inline void fw_memcpy_to_be32(void *_dst, void *_src, size_t size) +{ + fw_memcpy_from_be32(_dst, _src, size); +} +#define CSR_REGISTER_BASE 0xfffff0000000ULL + +/* register offsets are relative to CSR_REGISTER_BASE */ +#define CSR_STATE_CLEAR 0x0 +#define CSR_STATE_SET 0x4 +#define CSR_NODE_IDS 0x8 +#define CSR_RESET_START 0xc +#define CSR_SPLIT_TIMEOUT_HI 0x18 +#define CSR_SPLIT_TIMEOUT_LO 0x1c +#define CSR_CYCLE_TIME 0x200 +#define CSR_BUS_TIME 0x204 +#define CSR_BUSY_TIMEOUT 0x210 +#define CSR_BUS_MANAGER_ID 0x21c +#define CSR_BANDWIDTH_AVAILABLE 0x220 +#define CSR_CHANNELS_AVAILABLE 0x224 +#define CSR_CHANNELS_AVAILABLE_HI 0x224 +#define CSR_CHANNELS_AVAILABLE_LO 0x228 +#define CSR_BROADCAST_CHANNEL 0x234 +#define CSR_CONFIG_ROM 0x400 +#define CSR_CONFIG_ROM_END 0x800 +#define CSR_FCP_COMMAND 0xB00 +#define CSR_FCP_RESPONSE 0xD00 +#define CSR_FCP_END 0xF00 +#define CSR_TOPOLOGY_MAP 0x1000 +#define CSR_TOPOLOGY_MAP_END 0x1400 +#define CSR_SPEED_MAP 0x2000 +#define CSR_SPEED_MAP_END 0x3000 + +#define CSR_OFFSET 0x40 +#define CSR_LEAF 0x80 +#define CSR_DIRECTORY 0xc0 + +#define CSR_DESCRIPTOR 0x01 +#define CSR_VENDOR 0x03 +#define CSR_HARDWARE_VERSION 0x04 +#define CSR_NODE_CAPABILITIES 0x0c +#define CSR_UNIT 0x11 +#define CSR_SPECIFIER_ID 0x12 +#define CSR_VERSION 0x13 +#define CSR_DEPENDENT_INFO 0x14 +#define CSR_MODEL 0x17 +#define CSR_INSTANCE 0x18 +#define CSR_DIRECTORY_ID 0x20 + +struct fw_csr_iterator { + u32 *p; + u32 *end; +}; + +void fw_csr_iterator_init(struct fw_csr_iterator *ci, u32 *p); +int fw_csr_iterator_next(struct fw_csr_iterator *ci, int *key, int *value); + +extern struct bus_type fw_bus_type; + +struct fw_card_driver; +struct fw_node; + +struct fw_card { + const struct fw_card_driver *driver; + struct device *device; + struct kref kref; + struct completion done; + + int node_id; + int generation; + int current_tlabel, tlabel_mask; + struct list_head transaction_list; + struct timer_list flush_timer; + unsigned long reset_jiffies; + + unsigned long long guid; + unsigned max_receive; + int link_speed; + int config_rom_generation; + + spinlock_t lock; /* Take this lock when handling the lists in + * this struct. */ + struct fw_node *local_node; + struct fw_node *root_node; + struct fw_node *irm_node; + u8 color; /* must be u8 to match the definition in struct fw_node */ + int gap_count; + bool beta_repeaters_present; + + int index; + + struct list_head link; + + /* Work struct for BM duties. */ + struct delayed_work work; + int bm_retries; + int bm_generation; + + bool broadcast_channel_allocated; + u32 broadcast_channel; + u32 topology_map[(CSR_TOPOLOGY_MAP_END - CSR_TOPOLOGY_MAP) / 4]; +}; + +static inline struct fw_card *fw_card_get(struct fw_card *card) +{ + kref_get(&card->kref); + + return card; +} + +void fw_card_release(struct kref *kref); + +static inline void fw_card_put(struct fw_card *card) +{ + kref_put(&card->kref, fw_card_release); +} + +struct fw_attribute_group { + struct attribute_group *groups[2]; + struct attribute_group group; + struct attribute *attrs[12]; +}; + +enum fw_device_state { + FW_DEVICE_INITIALIZING, + FW_DEVICE_RUNNING, + FW_DEVICE_GONE, + FW_DEVICE_SHUTDOWN, +}; + +/* + * Note, fw_device.generation always has to be read before fw_device.node_id. + * Use SMP memory barriers to ensure this. Otherwise requests will be sent + * to an outdated node_id if the generation was updated in the meantime due + * to a bus reset. + * + * Likewise, fw-core will take care to update .node_id before .generation so + * that whenever fw_device.generation is current WRT the actual bus generation, + * fw_device.node_id is guaranteed to be current too. + * + * The same applies to fw_device.card->node_id vs. fw_device.generation. + * + * fw_device.config_rom and fw_device.config_rom_length may be accessed during + * the lifetime of any fw_unit belonging to the fw_device, before device_del() + * was called on the last fw_unit. Alternatively, they may be accessed while + * holding fw_device_rwsem. + */ +struct fw_device { + atomic_t state; + struct fw_node *node; + int node_id; + int generation; + unsigned max_speed; + struct fw_card *card; + struct device device; + + struct mutex client_list_mutex; + struct list_head client_list; + + u32 *config_rom; + size_t config_rom_length; + int config_rom_retries; + unsigned is_local:1; + unsigned cmc:1; + unsigned bc_implemented:2; + + struct delayed_work work; + struct fw_attribute_group attribute_group; +}; + +static inline struct fw_device *fw_device(struct device *dev) +{ + return container_of(dev, struct fw_device, device); +} + +static inline int fw_device_is_shutdown(struct fw_device *device) +{ + return atomic_read(&device->state) == FW_DEVICE_SHUTDOWN; +} + +static inline struct fw_device *fw_device_get(struct fw_device *device) +{ + get_device(&device->device); + + return device; +} + +static inline void fw_device_put(struct fw_device *device) +{ + put_device(&device->device); +} + +int fw_device_enable_phys_dma(struct fw_device *device); + +/* + * fw_unit.directory must not be accessed after device_del(&fw_unit.device). + */ +struct fw_unit { + struct device device; + u32 *directory; + struct fw_attribute_group attribute_group; +}; + +static inline struct fw_unit *fw_unit(struct device *dev) +{ + return container_of(dev, struct fw_unit, device); +} + +static inline struct fw_unit *fw_unit_get(struct fw_unit *unit) +{ + get_device(&unit->device); + + return unit; +} + +static inline void fw_unit_put(struct fw_unit *unit) +{ + put_device(&unit->device); +} + +struct ieee1394_device_id; + +struct fw_driver { + struct device_driver driver; + /* Called when the parent device sits through a bus reset. */ + void (*update)(struct fw_unit *unit); + const struct ieee1394_device_id *id_table; +}; + +struct fw_packet; +struct fw_request; + +typedef void (*fw_packet_callback_t)(struct fw_packet *packet, + struct fw_card *card, int status); +typedef void (*fw_transaction_callback_t)(struct fw_card *card, int rcode, + void *data, size_t length, + void *callback_data); +/* + * Important note: The callback must guarantee that either fw_send_response() + * or kfree() is called on the @request. + */ +typedef void (*fw_address_callback_t)(struct fw_card *card, + struct fw_request *request, + int tcode, int destination, int source, + int generation, int speed, + unsigned long long offset, + void *data, size_t length, + void *callback_data); + +struct fw_packet { + int speed; + int generation; + u32 header[4]; + size_t header_length; + void *payload; + size_t payload_length; + dma_addr_t payload_bus; + u32 timestamp; + + /* + * This callback is called when the packet transmission has + * completed; for successful transmission, the status code is + * the ack received from the destination, otherwise it's a + * negative errno: ENOMEM, ESTALE, ETIMEDOUT, ENODEV, EIO. + * The callback can be called from tasklet context and thus + * must never block. + */ + fw_packet_callback_t callback; + int ack; + struct list_head link; + void *driver_data; +}; + +struct fw_transaction { + int node_id; /* The generation is implied; it is always the current. */ + int tlabel; + int timestamp; + struct list_head link; + + struct fw_packet packet; + + /* + * The data passed to the callback is valid only during the + * callback. + */ + fw_transaction_callback_t callback; + void *callback_data; +}; + +struct fw_address_handler { + u64 offset; + size_t length; + fw_address_callback_t address_callback; + void *callback_data; + struct list_head link; +}; + +struct fw_address_region { + u64 start; + u64 end; +}; + +extern const struct fw_address_region fw_high_memory_region; + +int fw_core_add_address_handler(struct fw_address_handler *handler, + const struct fw_address_region *region); +void fw_core_remove_address_handler(struct fw_address_handler *handler); +void fw_send_response(struct fw_card *card, + struct fw_request *request, int rcode); +void fw_send_request(struct fw_card *card, struct fw_transaction *t, + int tcode, int destination_id, int generation, int speed, + unsigned long long offset, void *payload, size_t length, + fw_transaction_callback_t callback, void *callback_data); +int fw_cancel_transaction(struct fw_card *card, + struct fw_transaction *transaction); +int fw_run_transaction(struct fw_card *card, int tcode, int destination_id, + int generation, int speed, unsigned long long offset, + void *payload, size_t length); + +#endif /* _LINUX_FIREWIRE_H */ -- cgit v1.2.3-70-g09d2 From 1b8e69662e1a086878bf930a6042daf7f8a076cc Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 5 Jun 2009 14:37:23 +0000 Subject: pnp: add PNP resource range checking function Add a PNP resource range check function, indicating whether a resource has been assigned to any device. Signed-off-by: Bjorn Helgaas [apw@canonical.com: fixed up exports et al] Signed-off-by: Andy Whitcroft Signed-off-by: Eric Anholt --- drivers/pnp/resource.c | 18 ++++++++++++++++++ include/linux/pnp.h | 2 ++ 2 files changed, 20 insertions(+) (limited to 'include') diff --git a/drivers/pnp/resource.c b/drivers/pnp/resource.c index f604061d2bb..ba976542788 100644 --- a/drivers/pnp/resource.c +++ b/drivers/pnp/resource.c @@ -638,6 +638,24 @@ int pnp_possible_config(struct pnp_dev *dev, int type, resource_size_t start, } EXPORT_SYMBOL(pnp_possible_config); +int pnp_range_reserved(resource_size_t start, resource_size_t end) +{ + struct pnp_dev *dev; + struct pnp_resource *pnp_res; + resource_size_t *dev_start, *dev_end; + + pnp_for_each_dev(dev) { + list_for_each_entry(pnp_res, &dev->resources, list) { + dev_start = &pnp_res->res.start; + dev_end = &pnp_res->res.end; + if (ranged_conflict(&start, &end, dev_start, dev_end)) + return 1; + } + } + return 0; +} +EXPORT_SYMBOL(pnp_range_reserved); + /* format is: pnp_reserve_irq=irq1[,irq2] .... */ static int __init pnp_setup_reserve_irq(char *str) { diff --git a/include/linux/pnp.h b/include/linux/pnp.h index ca3c8877302..b063c7328ba 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -446,6 +446,7 @@ int pnp_start_dev(struct pnp_dev *dev); int pnp_stop_dev(struct pnp_dev *dev); int pnp_activate_dev(struct pnp_dev *dev); int pnp_disable_dev(struct pnp_dev *dev); +int pnp_range_reserved(resource_size_t start, resource_size_t end); /* protocol helpers */ int pnp_is_active(struct pnp_dev *dev); @@ -476,6 +477,7 @@ static inline int pnp_start_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_stop_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_activate_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_disable_dev(struct pnp_dev *dev) { return -ENODEV; } +static inline int pnp_range_reserved(resource_size_t start, resource_size_t end) { return 0;} /* protocol helpers */ static inline int pnp_is_active(struct pnp_dev *dev) { return 0; } -- cgit v1.2.3-70-g09d2 From 3f7440a6b771169e1f11fa582e53a4259b682809 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 5 Jun 2009 17:40:04 +0200 Subject: ALSA: Clean up 64bit division functions Replace the house-made div64_32() with the standard div_u64*() functions. Signed-off-by: Takashi Iwai --- include/sound/pcm.h | 74 ----------------------------------------------- sound/core/oss/pcm_oss.c | 5 ++-- sound/core/pcm_lib.c | 3 +- sound/pci/rme9652/hdsp.c | 7 ++--- sound/pci/rme9652/hdspm.c | 4 +-- 5 files changed, 9 insertions(+), 84 deletions(-) (limited to 'include') diff --git a/include/sound/pcm.h b/include/sound/pcm.h index c1729689161..0caf71e1694 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -486,80 +486,6 @@ void snd_pcm_detach_substream(struct snd_pcm_substream *substream); void snd_pcm_vma_notify_data(void *client, void *data); int snd_pcm_mmap_data(struct snd_pcm_substream *substream, struct file *file, struct vm_area_struct *area); -#if BITS_PER_LONG >= 64 - -static inline void div64_32(u_int64_t *n, u_int32_t div, u_int32_t *rem) -{ - *rem = *n % div; - *n /= div; -} - -#elif defined(i386) - -static inline void div64_32(u_int64_t *n, u_int32_t div, u_int32_t *rem) -{ - u_int32_t low, high; - low = *n & 0xffffffff; - high = *n >> 32; - if (high) { - u_int32_t high1 = high % div; - high /= div; - asm("divl %2":"=a" (low), "=d" (*rem):"rm" (div), "a" (low), "d" (high1)); - *n = (u_int64_t)high << 32 | low; - } else { - *n = low / div; - *rem = low % div; - } -} -#else - -static inline void divl(u_int32_t high, u_int32_t low, - u_int32_t div, - u_int32_t *q, u_int32_t *r) -{ - u_int64_t n = (u_int64_t)high << 32 | low; - u_int64_t d = (u_int64_t)div << 31; - u_int32_t q1 = 0; - int c = 32; - while (n > 0xffffffffU) { - q1 <<= 1; - if (n >= d) { - n -= d; - q1 |= 1; - } - d >>= 1; - c--; - } - q1 <<= c; - if (n) { - low = n; - *q = q1 | (low / div); - *r = low % div; - } else { - *r = 0; - *q = q1; - } - return; -} - -static inline void div64_32(u_int64_t *n, u_int32_t div, u_int32_t *rem) -{ - u_int32_t low, high; - low = *n & 0xffffffff; - high = *n >> 32; - if (high) { - u_int32_t high1 = high % div; - u_int32_t low1 = low; - high /= div; - divl(high1, low1, div, &low, rem); - *n = (u_int64_t)high << 32 | low; - } else { - *n = low / div; - *rem = low % div; - } -} -#endif - /* * PCM library */ diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index dda000b9684..dbe406b8259 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -617,9 +618,7 @@ static long snd_pcm_oss_bytes(struct snd_pcm_substream *substream, long frames) #else { u64 bsize = (u64)runtime->oss.buffer_bytes * (u64)bytes; - u32 rem; - div64_32(&bsize, buffer_size, &rem); - return (long)bsize; + return div_u64(bsize, buffer_size); } #endif } diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index d659995ac3a..a7482874c45 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -452,7 +453,7 @@ static inline unsigned int muldiv32(unsigned int a, unsigned int b, *r = 0; return UINT_MAX; } - div64_32(&n, c, r); + n = div_u64_rem(n, c, r); if (n >= UINT_MAX) { *r = 0; return UINT_MAX; diff --git a/sound/pci/rme9652/hdsp.c b/sound/pci/rme9652/hdsp.c index 314e73531bd..bcfdbb5ebc4 100644 --- a/sound/pci/rme9652/hdsp.c +++ b/sound/pci/rme9652/hdsp.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -1047,7 +1048,6 @@ static int hdsp_set_interrupt_interval(struct hdsp *s, unsigned int frames) static void hdsp_set_dds_value(struct hdsp *hdsp, int rate) { u64 n; - u32 r; if (rate >= 112000) rate /= 4; @@ -1055,7 +1055,7 @@ static void hdsp_set_dds_value(struct hdsp *hdsp, int rate) rate /= 2; n = DDS_NUMERATOR; - div64_32(&n, rate, &r); + n = div_u64(n, rate); /* n should be less than 2^32 for being written to FREQ register */ snd_BUG_ON(n >> 32); /* HDSP_freqReg and HDSP_resetPointer are the same, so keep the DDS @@ -3097,7 +3097,6 @@ static int snd_hdsp_get_adat_sync_check(struct snd_kcontrol *kcontrol, struct sn static int hdsp_dds_offset(struct hdsp *hdsp) { u64 n; - u32 r; unsigned int dds_value = hdsp->dds_value; int system_sample_rate = hdsp->system_sample_rate; @@ -3109,7 +3108,7 @@ static int hdsp_dds_offset(struct hdsp *hdsp) * dds_value = n / rate * rate = n / dds_value */ - div64_32(&n, dds_value, &r); + n = div_u64(n, dds_value); if (system_sample_rate >= 112000) n *= 4; else if (system_sample_rate >= 56000) diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c index bac2dc0c5d8..0dce331a2a3 100644 --- a/sound/pci/rme9652/hdspm.c +++ b/sound/pci/rme9652/hdspm.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -831,7 +832,6 @@ static int hdspm_set_interrupt_interval(struct hdspm * s, unsigned int frames) static void hdspm_set_dds_value(struct hdspm *hdspm, int rate) { u64 n; - u32 r; if (rate >= 112000) rate /= 4; @@ -844,7 +844,7 @@ static void hdspm_set_dds_value(struct hdspm *hdspm, int rate) */ /* n = 104857600000000ULL; */ /* = 2^20 * 10^8 */ n = 110100480000000ULL; /* Value checked for AES32 and MADI */ - div64_32(&n, rate, &r); + n = div_u64(n, rate); /* n should be less than 2^32 for being written to FREQ register */ snd_BUG_ON(n >> 32); hdspm_write(hdspm, HDSPM_freqReg, (u32)n); -- cgit v1.2.3-70-g09d2 From ac4bcf889469ffbca88f234d3184452886a47905 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 5 Jun 2009 14:44:52 +0200 Subject: perf_counter: Change PERF_SAMPLE_CONFIG into PERF_SAMPLE_ID The purpose of PERF_SAMPLE_CONFIG was to identify the counters, since then we've added counter ids, use those instead. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 +- kernel/perf_counter.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 40dc0e273d9..9cea32a0655 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -104,7 +104,7 @@ enum perf_counter_sample_format { PERF_SAMPLE_ADDR = 1U << 3, PERF_SAMPLE_GROUP = 1U << 4, PERF_SAMPLE_CALLCHAIN = 1U << 5, - PERF_SAMPLE_CONFIG = 1U << 6, + PERF_SAMPLE_ID = 1U << 6, PERF_SAMPLE_CPU = 1U << 7, }; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 37a5a241ca7..e75b91a76a5 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -2392,8 +2392,8 @@ static void perf_counter_output(struct perf_counter *counter, header.size += sizeof(u64); } - if (sample_type & PERF_SAMPLE_CONFIG) { - header.type |= PERF_SAMPLE_CONFIG; + if (sample_type & PERF_SAMPLE_ID) { + header.type |= PERF_SAMPLE_ID; header.size += sizeof(u64); } @@ -2439,8 +2439,8 @@ static void perf_counter_output(struct perf_counter *counter, if (sample_type & PERF_SAMPLE_ADDR) perf_output_put(&handle, addr); - if (sample_type & PERF_SAMPLE_CONFIG) - perf_output_put(&handle, counter->attr.config); + if (sample_type & PERF_SAMPLE_ID) + perf_output_put(&handle, counter->id); if (sample_type & PERF_SAMPLE_CPU) perf_output_put(&handle, cpu_entry); -- cgit v1.2.3-70-g09d2 From 689802b2d0536e72281dc959ab9cb34fb3c304cf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 5 Jun 2009 15:05:43 +0200 Subject: perf_counter: Add PERF_SAMPLE_PERIOD In order to allow easy tracking of the period, also provide means of adding it to the sample data. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 ++ kernel/perf_counter.c | 10 ++++++++++ 2 files changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 9cea32a0655..6bc25003973 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -106,6 +106,7 @@ enum perf_counter_sample_format { PERF_SAMPLE_CALLCHAIN = 1U << 5, PERF_SAMPLE_ID = 1U << 6, PERF_SAMPLE_CPU = 1U << 7, + PERF_SAMPLE_PERIOD = 1U << 8, }; /* @@ -260,6 +261,7 @@ enum perf_event_type { * struct { * struct perf_event_header header; * u64 time; + * u64 id; * u64 sample_period; * }; */ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index e75b91a76a5..f8390668c39 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -2404,6 +2404,11 @@ static void perf_counter_output(struct perf_counter *counter, cpu_entry.cpu = raw_smp_processor_id(); } + if (sample_type & PERF_SAMPLE_PERIOD) { + header.type |= PERF_SAMPLE_PERIOD; + header.size += sizeof(u64); + } + if (sample_type & PERF_SAMPLE_GROUP) { header.type |= PERF_SAMPLE_GROUP; header.size += sizeof(u64) + @@ -2445,6 +2450,9 @@ static void perf_counter_output(struct perf_counter *counter, if (sample_type & PERF_SAMPLE_CPU) perf_output_put(&handle, cpu_entry); + if (sample_type & PERF_SAMPLE_PERIOD) + perf_output_put(&handle, counter->hw.sample_period); + /* * XXX PERF_SAMPLE_GROUP vs inherited counters seems difficult. */ @@ -2835,6 +2843,7 @@ static void perf_log_period(struct perf_counter *counter, u64 period) struct { struct perf_event_header header; u64 time; + u64 id; u64 period; } freq_event = { .header = { @@ -2843,6 +2852,7 @@ static void perf_log_period(struct perf_counter *counter, u64 period) .size = sizeof(freq_event), }, .time = sched_clock(), + .id = counter->id, .period = period, }; -- cgit v1.2.3-70-g09d2 From 6a24ed6c6082ec65d19331a4bfa30c0512a1a822 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 5 Jun 2009 18:01:29 +0200 Subject: perf_counter: Fix frequency adjustment for < HZ Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 3 +++ kernel/perf_counter.c | 32 +++++++++++++++++++++++++------- 2 files changed, 28 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 6bc25003973..4f9d39ecdc0 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -373,6 +373,9 @@ struct hw_perf_counter { u64 sample_period; atomic64_t period_left; u64 interrupts; + + u64 freq_count; + u64 freq_interrupts; #endif }; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index f8390668c39..47c92fb927f 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1187,8 +1187,9 @@ static void perf_log_period(struct perf_counter *counter, u64 period); static void perf_adjust_freq(struct perf_counter_context *ctx) { struct perf_counter *counter; + struct hw_perf_counter *hwc; u64 interrupts, sample_period; - u64 events, period; + u64 events, period, freq; s64 delta; spin_lock(&ctx->lock); @@ -1196,8 +1197,10 @@ static void perf_adjust_freq(struct perf_counter_context *ctx) if (counter->state != PERF_COUNTER_STATE_ACTIVE) continue; - interrupts = counter->hw.interrupts; - counter->hw.interrupts = 0; + hwc = &counter->hw; + + interrupts = hwc->interrupts; + hwc->interrupts = 0; if (interrupts == MAX_INTERRUPTS) { perf_log_throttle(counter, 1); @@ -1208,20 +1211,35 @@ static void perf_adjust_freq(struct perf_counter_context *ctx) if (!counter->attr.freq || !counter->attr.sample_freq) continue; - events = HZ * interrupts * counter->hw.sample_period; + if (counter->attr.sample_freq < HZ) { + freq = counter->attr.sample_freq; + + hwc->freq_count += freq; + hwc->freq_interrupts += interrupts; + + if (hwc->freq_count < HZ) + continue; + + interrupts = hwc->freq_interrupts; + hwc->freq_interrupts = 0; + hwc->freq_count -= HZ; + } else + freq = HZ; + + events = freq * interrupts * hwc->sample_period; period = div64_u64(events, counter->attr.sample_freq); - delta = (s64)(1 + period - counter->hw.sample_period); + delta = (s64)(1 + period - hwc->sample_period); delta >>= 1; - sample_period = counter->hw.sample_period + delta; + sample_period = hwc->sample_period + delta; if (!sample_period) sample_period = 1; perf_log_period(counter, sample_period); - counter->hw.sample_period = sample_period; + hwc->sample_period = sample_period; } spin_unlock(&ctx->lock); } -- cgit v1.2.3-70-g09d2 From 24430abc88c67e3df2f06c96a6ccd73bda4c92f0 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 21 Apr 2009 11:02:01 +0300 Subject: Kill jffs2-user.h This file does not define any kernel-userspace API, all it does it defines few helpers for userspace. Instead, userspace should have a private copy of this file. The main (if not the only) user is the mtd-utils package, but it already has a private copy of this file. This patch also removes references to 'jffs2-user.h' from 'Kbuild' and MAINTAINERS' files. Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- MAINTAINERS | 1 - include/mtd/Kbuild | 1 - include/mtd/jffs2-user.h | 34 ---------------------------------- 3 files changed, 36 deletions(-) delete mode 100644 include/mtd/jffs2-user.h (limited to 'include') diff --git a/MAINTAINERS b/MAINTAINERS index 2b349ba4add..3276e1c256b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3171,7 +3171,6 @@ W: http://www.linux-mtd.infradead.org/doc/jffs2.html S: Maintained F: fs/jffs2/ F: include/linux/jffs2.h -F: include/mtd/jffs2-user.h JOURNALLING LAYER FOR BLOCK DEVICES (JBD) P: Stephen Tweedie diff --git a/include/mtd/Kbuild b/include/mtd/Kbuild index 8eb018f9600..192f8fb7d54 100644 --- a/include/mtd/Kbuild +++ b/include/mtd/Kbuild @@ -1,5 +1,4 @@ header-y += inftl-user.h -header-y += jffs2-user.h header-y += mtd-abi.h header-y += mtd-user.h header-y += nftl-user.h diff --git a/include/mtd/jffs2-user.h b/include/mtd/jffs2-user.h deleted file mode 100644 index fa94b0eb67c..00000000000 --- a/include/mtd/jffs2-user.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * JFFS2 definitions for use in user space only - */ - -#ifndef __JFFS2_USER_H__ -#define __JFFS2_USER_H__ - -/* This file is blessed for inclusion by userspace */ -#include -#include -#include -#include - -#undef cpu_to_je16 -#undef cpu_to_je32 -#undef cpu_to_jemode -#undef je16_to_cpu -#undef je32_to_cpu -#undef jemode_to_cpu - -extern int target_endian; - -#define t16(x) ({ __u16 __b = (x); (target_endian==__BYTE_ORDER)?__b:bswap_16(__b); }) -#define t32(x) ({ __u32 __b = (x); (target_endian==__BYTE_ORDER)?__b:bswap_32(__b); }) - -#define cpu_to_je16(x) ((jint16_t){t16(x)}) -#define cpu_to_je32(x) ((jint32_t){t32(x)}) -#define cpu_to_jemode(x) ((jmode_t){t32(x)}) - -#define je16_to_cpu(x) (t16((x).v16)) -#define je32_to_cpu(x) (t32((x).v32)) -#define jemode_to_cpu(x) (t32((x).m)) - -#endif /* __JFFS2_USER_H__ */ -- cgit v1.2.3-70-g09d2 From ee8f37688966ab1438d0cf42e0cb7c6595d9592c Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 5 May 2009 11:04:19 +0300 Subject: mtd: OneNAND: add support for OneNAND manufactured by Numonyx In addition to adding the Numonyx manufacturer code, this patch also ensures 'sync. write' is disabled when reading identification data - something that the Numonyx chip objects to, but the Samsung chip seems to ignore. Signed-off-by: Adrian Hunter Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- drivers/mtd/onenand/onenand_base.c | 3 ++- include/linux/mtd/onenand.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c index 30d6999e5f9..2346857a275 100644 --- a/drivers/mtd/onenand/onenand_base.c +++ b/drivers/mtd/onenand/onenand_base.c @@ -2576,6 +2576,7 @@ static void onenand_print_device_info(int device, int version) static const struct onenand_manufacturers onenand_manuf_ids[] = { {ONENAND_MFR_SAMSUNG, "Samsung"}, + {ONENAND_MFR_NUMONYX, "Numonyx"}, }; /** @@ -2621,7 +2622,7 @@ static int onenand_probe(struct mtd_info *mtd) /* Save system configuration 1 */ syscfg = this->read_word(this->base + ONENAND_REG_SYS_CFG1); /* Clear Sync. Burst Read mode to read BootRAM */ - this->write_word((syscfg & ~ONENAND_SYS_CFG1_SYNC_READ), this->base + ONENAND_REG_SYS_CFG1); + this->write_word((syscfg & ~ONENAND_SYS_CFG1_SYNC_READ & ~ONENAND_SYS_CFG1_SYNC_WRITE), this->base + ONENAND_REG_SYS_CFG1); /* Send the command for reading device ID from BootRAM */ this->write_word(ONENAND_CMD_READID, this->base + ONENAND_BOOTRAM); diff --git a/include/linux/mtd/onenand.h b/include/linux/mtd/onenand.h index 9aa2a9149b5..0fa3ac4ad57 100644 --- a/include/linux/mtd/onenand.h +++ b/include/linux/mtd/onenand.h @@ -176,6 +176,7 @@ struct onenand_chip { * OneNAND Flash Manufacturer ID Codes */ #define ONENAND_MFR_SAMSUNG 0xec +#define ONENAND_MFR_NUMONYX 0x20 /** * struct onenand_manufacturers - NAND Flash Manufacturer ID Structure -- cgit v1.2.3-70-g09d2 From d6fed9e9fc5eefae5be0ecf222bac7e7496e8e74 Mon Sep 17 00:00:00 2001 From: Alexander Clouter Date: Mon, 11 May 2009 19:28:01 +0100 Subject: mtd: extend plat_nand for (read|write)_buf This patch adds (write|read)_buf callbacks to plat_nand. The NAND on the TS-7800 provisioned by the FPGA allows readw() and readl() to be used which gives a 2.5x speed up. To be able to use this from the plat_nand driver a hook for read_buf (and also write_buf whilst we are in there) need to be made available. This patch adds the hook. Signed-off-by: Alexander Clouter Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- drivers/mtd/nand/plat_nand.c | 2 ++ include/linux/mtd/nand.h | 6 ++++++ 2 files changed, 8 insertions(+) (limited to 'include') diff --git a/drivers/mtd/nand/plat_nand.c b/drivers/mtd/nand/plat_nand.c index 28ffd4e8bb2..47a2105b967 100644 --- a/drivers/mtd/nand/plat_nand.c +++ b/drivers/mtd/nand/plat_nand.c @@ -61,6 +61,8 @@ static int __devinit plat_nand_probe(struct platform_device *pdev) data->chip.cmd_ctrl = pdata->ctrl.cmd_ctrl; data->chip.dev_ready = pdata->ctrl.dev_ready; data->chip.select_chip = pdata->ctrl.select_chip; + data->chip.write_buf = pdata->ctrl.write_buf; + data->chip.read_buf = pdata->ctrl.read_buf; data->chip.chip_delay = pdata->chip.chip_delay; data->chip.options |= pdata->chip.options; diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 7efb9be3466..0e35375ea79 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -584,6 +584,8 @@ struct platform_nand_chip { * @select_chip: platform specific chip select function * @cmd_ctrl: platform specific function for controlling * ALE/CLE/nCE. Also used to write command and address + * @write_buf: platform specific function for write buffer + * @read_buf: platform specific function for read buffer * @priv: private data to transport driver specific settings * * All fields are optional and depend on the hardware driver requirements @@ -594,6 +596,10 @@ struct platform_nand_ctrl { void (*select_chip)(struct mtd_info *mtd, int chip); void (*cmd_ctrl)(struct mtd_info *mtd, int dat, unsigned int ctrl); + void (*write_buf)(struct mtd_info *mtd, + const uint8_t *buf, int len); + void (*read_buf)(struct mtd_info *mtd, + uint8_t *buf, int len); void *priv; }; -- cgit v1.2.3-70-g09d2 From bf95efd41b1a760128eb25402791b0a4941eb655 Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Tue, 12 May 2009 13:46:58 -0700 Subject: mtd: plat_nand: add platform probe/remove callbacks Add optional probe and remove callbacks to the plat_nand driver. Some platforms may require additional setup, such as configuring the memory controller, before the nand device can be accessed. This patch provides an optional callback to handle this setup as well as a callback to teardown the setup. Signed-off-by: H Hartley Sweeten Tested-by: Alexander Clouter Signed-off-by: Andrew Morton Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- drivers/mtd/nand/plat_nand.c | 13 +++++++++++-- include/linux/mtd/nand.h | 7 +++++++ 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/mtd/nand/plat_nand.c b/drivers/mtd/nand/plat_nand.c index 47a2105b967..22e0ce78841 100644 --- a/drivers/mtd/nand/plat_nand.c +++ b/drivers/mtd/nand/plat_nand.c @@ -72,6 +72,13 @@ static int __devinit plat_nand_probe(struct platform_device *pdev) platform_set_drvdata(pdev, data); + /* Handle any platform specific setup */ + if (pdata->ctrl.probe) { + res = pdata->ctrl.probe(pdev); + if (res) + goto out; + } + /* Scan to find existance of the device */ if (nand_scan(&data->mtd, 1)) { res = -ENXIO; @@ -101,6 +108,8 @@ static int __devinit plat_nand_probe(struct platform_device *pdev) nand_release(&data->mtd); out: + if (pdata->ctrl.remove) + pdata->ctrl.remove(pdev); platform_set_drvdata(pdev, NULL); iounmap(data->io_base); kfree(data); @@ -113,15 +122,15 @@ out: static int __devexit plat_nand_remove(struct platform_device *pdev) { struct plat_nand_data *data = platform_get_drvdata(pdev); -#ifdef CONFIG_MTD_PARTITIONS struct platform_nand_data *pdata = pdev->dev.platform_data; -#endif nand_release(&data->mtd); #ifdef CONFIG_MTD_PARTITIONS if (data->parts && data->parts != pdata->chip.partitions) kfree(data->parts); #endif + if (pdata->ctrl.remove) + pdata->ctrl.remove(pdev); iounmap(data->io_base); kfree(data); diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 0e35375ea79..7f2d6935655 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -577,8 +577,13 @@ struct platform_nand_chip { void *priv; }; +/* Keep gcc happy */ +struct platform_device; + /** * struct platform_nand_ctrl - controller level device structure + * @probe: platform specific function to probe/setup hardware + * @remove: platform specific function to remove/teardown hardware * @hwcontrol: platform specific hardware control structure * @dev_ready: platform specific function to read ready/busy pin * @select_chip: platform specific chip select function @@ -591,6 +596,8 @@ struct platform_nand_chip { * All fields are optional and depend on the hardware driver requirements */ struct platform_nand_ctrl { + int (*probe)(struct platform_device *pdev); + void (*remove)(struct platform_device *pdev); void (*hwcontrol)(struct mtd_info *mtd, int cmd); int (*dev_ready)(struct mtd_info *mtd); void (*select_chip)(struct mtd_info *mtd, int chip); -- cgit v1.2.3-70-g09d2 From f36e20c01ad0104688f2eaebdf2213e749929c97 Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Tue, 12 May 2009 13:46:59 -0700 Subject: mtd: plat_nand: allow platform to set partitions Add optional callback to allow platform to initialize partitions. Static partitions on a nand device could vary depending on the size of the device. This patch allows an optional platform callback to be used to setup this partition information at runtime. Scan order is: 1) chip.part_probe_types 2) chip.set_parts 3) chip.partitions 4) full mtd device (fallback for no partitions) Some of the existing nand drivers could possibly be replaced by the plat_nand driver by using this patch. These include autcpu12.c and ts7250.c drivers. Signed-off-by: H Hartley Sweeten Signed-off-by: Andrew Morton Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- drivers/mtd/nand/plat_nand.c | 2 ++ include/linux/mtd/nand.h | 3 +++ 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/drivers/mtd/nand/plat_nand.c b/drivers/mtd/nand/plat_nand.c index 22e0ce78841..4e16c6f5bdd 100644 --- a/drivers/mtd/nand/plat_nand.c +++ b/drivers/mtd/nand/plat_nand.c @@ -95,6 +95,8 @@ static int __devinit plat_nand_probe(struct platform_device *pdev) return 0; } } + if (pdata->chip.set_parts) + pdata->chip.set_parts(data->mtd.size, &pdata->chip); if (pdata->chip.partitions) { data->parts = pdata->chip.partitions; res = add_mtd_partitions(&data->mtd, data->parts, diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 7f2d6935655..4030ebada49 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -563,6 +563,7 @@ extern int nand_do_read(struct mtd_info *mtd, loff_t from, size_t len, * @options: Option flags, e.g. 16bit buswidth * @ecclayout: ecc layout info structure * @part_probe_types: NULL-terminated array of probe types + * @set_parts: platform specific function to set partitions * @priv: hardware controller specific settings */ struct platform_nand_chip { @@ -574,6 +575,8 @@ struct platform_nand_chip { int chip_delay; unsigned int options; const char **part_probe_types; + void (*set_parts)(uint64_t size, + struct platform_nand_chip *chip); void *priv; }; -- cgit v1.2.3-70-g09d2 From 2ac6bf4ddc87c3b6b609f8fa82f6ebbffeac12f4 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 5 Jun 2009 10:36:24 -0700 Subject: IB/mlx4: Add strong ordering to local inval and fast reg work requests The ConnectX Programmer's Reference Manual states that the "SO" bit must be set when posting Fast Register and Local Invalidate send work requests. When this bit is set, the work request will be executed only after all previous work requests on the send queue have been executed. (If the bit is not set, Fast Register and Local Invalidate WQEs may begin execution too early, which violates the defined semantics for these operations) This fixes the issue with NFS/RDMA reported in Signed-off-by: Jack Morgenstein Cc: Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/qp.c | 4 ++++ include/linux/mlx4/qp.h | 1 + 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 20724aee76f..c4a02648c8a 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1585,12 +1585,16 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case IB_WR_LOCAL_INV: + ctrl->srcrb_flags |= + cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER); set_local_inv_seg(wqe, wr->ex.invalidate_rkey); wqe += sizeof (struct mlx4_wqe_local_inval_seg); size += sizeof (struct mlx4_wqe_local_inval_seg) / 16; break; case IB_WR_FAST_REG_MR: + ctrl->srcrb_flags |= + cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER); set_fmr_seg(wqe, wr); wqe += sizeof (struct mlx4_wqe_fmr_seg); size += sizeof (struct mlx4_wqe_fmr_seg) / 16; diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index bf8f11982da..9f29d86e5dc 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -165,6 +165,7 @@ enum { MLX4_WQE_CTRL_IP_CSUM = 1 << 4, MLX4_WQE_CTRL_TCP_UDP_CSUM = 1 << 5, MLX4_WQE_CTRL_INS_VLAN = 1 << 6, + MLX4_WQE_CTRL_STRONG_ORDER = 1 << 7, }; struct mlx4_wqe_ctrl_seg { -- cgit v1.2.3-70-g09d2 From 5988af2319781bc8e0ce418affec4e09cfa77907 Mon Sep 17 00:00:00 2001 From: Rohit Hagargundgi Date: Tue, 12 May 2009 13:46:57 -0700 Subject: mtd: Flex-OneNAND support Add support for Samsung Flex-OneNAND devices. Flex-OneNAND combines SLC and MLC technologies into a single device. SLC area provides increased reliability and speed, suitable for storing code such as bootloader, kernel and root file system. MLC area provides high density and is suitable for storing user data. SLC and MLC regions can be configured through kernel parameter. [akpm@linux-foundation.org: export flexoand_region and onenand_addr] Signed-off-by: Rohit Hagargundgi Signed-off-by: Kyungmin Park Cc: Vishak G Signed-off-by: Andrew Morton Signed-off-by: David Woodhouse --- Documentation/kernel-parameters.txt | 10 + drivers/mtd/onenand/onenand_base.c | 857 ++++++++++++++++++++++++++++++++---- drivers/mtd/onenand/onenand_bbt.c | 14 +- drivers/mtd/onenand/onenand_sim.c | 81 +++- include/linux/mtd/onenand.h | 18 + include/linux/mtd/onenand_regs.h | 20 +- 6 files changed, 913 insertions(+), 87 deletions(-) (limited to 'include') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index e87bdbfbcc7..12df135f8af 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1380,6 +1380,16 @@ and is between 256 and 4096 characters. It is defined in the file mtdparts= [MTD] See drivers/mtd/cmdlinepart.c. + onenand.bdry= [HW,MTD] Flex-OneNAND Boundary Configuration + + Format: [die0_boundary][,die0_lock][,die1_boundary][,die1_lock] + + boundary - index of last SLC block on Flex-OneNAND. + The remaining blocks are configured as MLC blocks. + lock - Configure if Flex-OneNAND boundary should be locked. + Once locked, the boundary cannot be changed. + 1 indicates lock status, 0 indicates unlock status. + mtdset= [ARM] ARM/S3C2412 JIVE boot control diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c index 2346857a275..8d4c9c25373 100644 --- a/drivers/mtd/onenand/onenand_base.c +++ b/drivers/mtd/onenand/onenand_base.c @@ -9,6 +9,10 @@ * auto-placement support, read-while load support, various fixes * Copyright (C) Nokia Corporation, 2007 * + * Vishak G , Rohit Hagargundgi + * Flex-OneNAND support + * Copyright (C) Samsung Electronics, 2008 + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. @@ -27,6 +31,30 @@ #include +/* Default Flex-OneNAND boundary and lock respectively */ +static int flex_bdry[MAX_DIES * 2] = { -1, 0, -1, 0 }; + +/** + * onenand_oob_128 - oob info for Flex-Onenand with 4KB page + * For now, we expose only 64 out of 80 ecc bytes + */ +static struct nand_ecclayout onenand_oob_128 = { + .eccbytes = 64, + .eccpos = { + 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, + 102, 103, 104, 105 + }, + .oobfree = { + {2, 4}, {18, 4}, {34, 4}, {50, 4}, + {66, 4}, {82, 4}, {98, 4}, {114, 4} + } +}; + /** * onenand_oob_64 - oob info for large (2KB) page */ @@ -65,6 +93,14 @@ static const unsigned char ffchars[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 48 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 64 */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 80 */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 96 */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 112 */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 128 */ }; /** @@ -170,6 +206,70 @@ static int onenand_buffer_address(int dataram1, int sectors, int count) return ((bsa << ONENAND_BSA_SHIFT) | bsc); } +/** + * flexonenand_block- For given address return block number + * @param this - OneNAND device structure + * @param addr - Address for which block number is needed + */ +static unsigned flexonenand_block(struct onenand_chip *this, loff_t addr) +{ + unsigned boundary, blk, die = 0; + + if (ONENAND_IS_DDP(this) && addr >= this->diesize[0]) { + die = 1; + addr -= this->diesize[0]; + } + + boundary = this->boundary[die]; + + blk = addr >> (this->erase_shift - 1); + if (blk > boundary) + blk = (blk + boundary + 1) >> 1; + + blk += die ? this->density_mask : 0; + return blk; +} + +inline unsigned onenand_block(struct onenand_chip *this, loff_t addr) +{ + if (!FLEXONENAND(this)) + return addr >> this->erase_shift; + return flexonenand_block(this, addr); +} + +/** + * flexonenand_addr - Return address of the block + * @this: OneNAND device structure + * @block: Block number on Flex-OneNAND + * + * Return address of the block + */ +static loff_t flexonenand_addr(struct onenand_chip *this, int block) +{ + loff_t ofs = 0; + int die = 0, boundary; + + if (ONENAND_IS_DDP(this) && block >= this->density_mask) { + block -= this->density_mask; + die = 1; + ofs = this->diesize[0]; + } + + boundary = this->boundary[die]; + ofs += (loff_t)block << (this->erase_shift - 1); + if (block > (boundary + 1)) + ofs += (loff_t)(block - boundary - 1) << (this->erase_shift - 1); + return ofs; +} + +loff_t onenand_addr(struct onenand_chip *this, int block) +{ + if (!FLEXONENAND(this)) + return (loff_t)block << this->erase_shift; + return flexonenand_addr(this, block); +} +EXPORT_SYMBOL(onenand_addr); + /** * onenand_get_density - [DEFAULT] Get OneNAND density * @param dev_id OneNAND device ID @@ -182,6 +282,22 @@ static inline int onenand_get_density(int dev_id) return (density & ONENAND_DEVICE_DENSITY_MASK); } +/** + * flexonenand_region - [Flex-OneNAND] Return erase region of addr + * @param mtd MTD device structure + * @param addr address whose erase region needs to be identified + */ +int flexonenand_region(struct mtd_info *mtd, loff_t addr) +{ + int i; + + for (i = 0; i < mtd->numeraseregions; i++) + if (addr < mtd->eraseregions[i].offset) + break; + return i - 1; +} +EXPORT_SYMBOL(flexonenand_region); + /** * onenand_command - [DEFAULT] Send command to OneNAND device * @param mtd MTD device structure @@ -207,16 +323,28 @@ static int onenand_command(struct mtd_info *mtd, int cmd, loff_t addr, size_t le page = -1; break; + case FLEXONENAND_CMD_PI_ACCESS: + /* addr contains die index */ + block = addr * this->density_mask; + page = -1; + break; + case ONENAND_CMD_ERASE: case ONENAND_CMD_BUFFERRAM: case ONENAND_CMD_OTP_ACCESS: - block = (int) (addr >> this->erase_shift); + block = onenand_block(this, addr); page = -1; break; + case FLEXONENAND_CMD_READ_PI: + cmd = ONENAND_CMD_READ; + block = addr * this->density_mask; + page = 0; + break; + default: - block = (int) (addr >> this->erase_shift); - page = (int) (addr >> this->page_shift); + block = onenand_block(this, addr); + page = (int) (addr - onenand_addr(this, block)) >> this->page_shift; if (ONENAND_IS_2PLANE(this)) { /* Make the even block number */ @@ -236,7 +364,7 @@ static int onenand_command(struct mtd_info *mtd, int cmd, loff_t addr, size_t le value = onenand_bufferram_address(this, block); this->write_word(value, this->base + ONENAND_REG_START_ADDRESS2); - if (ONENAND_IS_2PLANE(this)) + if (ONENAND_IS_MLC(this) || ONENAND_IS_2PLANE(this)) /* It is always BufferRAM0 */ ONENAND_SET_BUFFERRAM0(this); else @@ -258,13 +386,18 @@ static int onenand_command(struct mtd_info *mtd, int cmd, loff_t addr, size_t le if (page != -1) { /* Now we use page size operation */ - int sectors = 4, count = 4; + int sectors = 0, count = 0; int dataram; switch (cmd) { + case FLEXONENAND_CMD_RECOVER_LSB: case ONENAND_CMD_READ: case ONENAND_CMD_READOOB: - dataram = ONENAND_SET_NEXT_BUFFERRAM(this); + if (ONENAND_IS_MLC(this)) + /* It is always BufferRAM0 */ + dataram = ONENAND_SET_BUFFERRAM0(this); + else + dataram = ONENAND_SET_NEXT_BUFFERRAM(this); break; default: @@ -292,6 +425,30 @@ static int onenand_command(struct mtd_info *mtd, int cmd, loff_t addr, size_t le return 0; } +/** + * onenand_read_ecc - return ecc status + * @param this onenand chip structure + */ +static inline int onenand_read_ecc(struct onenand_chip *this) +{ + int ecc, i, result = 0; + + if (!FLEXONENAND(this)) + return this->read_word(this->base + ONENAND_REG_ECC_STATUS); + + for (i = 0; i < 4; i++) { + ecc = this->read_word(this->base + ONENAND_REG_ECC_STATUS + i); + if (likely(!ecc)) + continue; + if (ecc & FLEXONENAND_UNCORRECTABLE_ERROR) + return ONENAND_ECC_2BIT_ALL; + else + result = ONENAND_ECC_1BIT_ALL; + } + + return result; +} + /** * onenand_wait - [DEFAULT] wait until the command is done * @param mtd MTD device structure @@ -331,14 +488,14 @@ static int onenand_wait(struct mtd_info *mtd, int state) * power off recovery (POR) test, it should read ECC status first */ if (interrupt & ONENAND_INT_READ) { - int ecc = this->read_word(this->base + ONENAND_REG_ECC_STATUS); + int ecc = onenand_read_ecc(this); if (ecc) { if (ecc & ONENAND_ECC_2BIT_ALL) { printk(KERN_ERR "onenand_wait: ECC error = 0x%04x\n", ecc); mtd->ecc_stats.failed++; return -EBADMSG; } else if (ecc & ONENAND_ECC_1BIT_ALL) { - printk(KERN_INFO "onenand_wait: correctable ECC error = 0x%04x\n", ecc); + printk(KERN_DEBUG "onenand_wait: correctable ECC error = 0x%04x\n", ecc); mtd->ecc_stats.corrected++; } } @@ -656,7 +813,7 @@ static int onenand_check_bufferram(struct mtd_info *mtd, loff_t addr) if (found && ONENAND_IS_DDP(this)) { /* Select DataRAM for DDP */ - int block = (int) (addr >> this->erase_shift); + int block = onenand_block(this, addr); int value = onenand_bufferram_address(this, block); this->write_word(value, this->base + ONENAND_REG_START_ADDRESS2); } @@ -815,6 +972,149 @@ static int onenand_transfer_auto_oob(struct mtd_info *mtd, uint8_t *buf, int col return 0; } +/** + * onenand_recover_lsb - [Flex-OneNAND] Recover LSB page data + * @param mtd MTD device structure + * @param addr address to recover + * @param status return value from onenand_wait / onenand_bbt_wait + * + * MLC NAND Flash cell has paired pages - LSB page and MSB page. LSB page has + * lower page address and MSB page has higher page address in paired pages. + * If power off occurs during MSB page program, the paired LSB page data can + * become corrupt. LSB page recovery read is a way to read LSB page though page + * data are corrupted. When uncorrectable error occurs as a result of LSB page + * read after power up, issue LSB page recovery read. + */ +static int onenand_recover_lsb(struct mtd_info *mtd, loff_t addr, int status) +{ + struct onenand_chip *this = mtd->priv; + int i; + + /* Recovery is only for Flex-OneNAND */ + if (!FLEXONENAND(this)) + return status; + + /* check if we failed due to uncorrectable error */ + if (status != -EBADMSG && status != ONENAND_BBT_READ_ECC_ERROR) + return status; + + /* check if address lies in MLC region */ + i = flexonenand_region(mtd, addr); + if (mtd->eraseregions[i].erasesize < (1 << this->erase_shift)) + return status; + + /* We are attempting to reread, so decrement stats.failed + * which was incremented by onenand_wait due to read failure + */ + printk(KERN_INFO "onenand_recover_lsb: Attempting to recover from uncorrectable read\n"); + mtd->ecc_stats.failed--; + + /* Issue the LSB page recovery command */ + this->command(mtd, FLEXONENAND_CMD_RECOVER_LSB, addr, this->writesize); + return this->wait(mtd, FL_READING); +} + +/** + * onenand_mlc_read_ops_nolock - MLC OneNAND read main and/or out-of-band + * @param mtd MTD device structure + * @param from offset to read from + * @param ops: oob operation description structure + * + * MLC OneNAND / Flex-OneNAND has 4KB page size and 4KB dataram. + * So, read-while-load is not present. + */ +static int onenand_mlc_read_ops_nolock(struct mtd_info *mtd, loff_t from, + struct mtd_oob_ops *ops) +{ + struct onenand_chip *this = mtd->priv; + struct mtd_ecc_stats stats; + size_t len = ops->len; + size_t ooblen = ops->ooblen; + u_char *buf = ops->datbuf; + u_char *oobbuf = ops->oobbuf; + int read = 0, column, thislen; + int oobread = 0, oobcolumn, thisooblen, oobsize; + int ret = 0; + int writesize = this->writesize; + + DEBUG(MTD_DEBUG_LEVEL3, "onenand_mlc_read_ops_nolock: from = 0x%08x, len = %i\n", (unsigned int) from, (int) len); + + if (ops->mode == MTD_OOB_AUTO) + oobsize = this->ecclayout->oobavail; + else + oobsize = mtd->oobsize; + + oobcolumn = from & (mtd->oobsize - 1); + + /* Do not allow reads past end of device */ + if (from + len > mtd->size) { + printk(KERN_ERR "onenand_mlc_read_ops_nolock: Attempt read beyond end of device\n"); + ops->retlen = 0; + ops->oobretlen = 0; + return -EINVAL; + } + + stats = mtd->ecc_stats; + + while (read < len) { + cond_resched(); + + thislen = min_t(int, writesize, len - read); + + column = from & (writesize - 1); + if (column + thislen > writesize) + thislen = writesize - column; + + if (!onenand_check_bufferram(mtd, from)) { + this->command(mtd, ONENAND_CMD_READ, from, writesize); + + ret = this->wait(mtd, FL_READING); + if (unlikely(ret)) + ret = onenand_recover_lsb(mtd, from, ret); + onenand_update_bufferram(mtd, from, !ret); + if (ret == -EBADMSG) + ret = 0; + } + + this->read_bufferram(mtd, ONENAND_DATARAM, buf, column, thislen); + if (oobbuf) { + thisooblen = oobsize - oobcolumn; + thisooblen = min_t(int, thisooblen, ooblen - oobread); + + if (ops->mode == MTD_OOB_AUTO) + onenand_transfer_auto_oob(mtd, oobbuf, oobcolumn, thisooblen); + else + this->read_bufferram(mtd, ONENAND_SPARERAM, oobbuf, oobcolumn, thisooblen); + oobread += thisooblen; + oobbuf += thisooblen; + oobcolumn = 0; + } + + read += thislen; + if (read == len) + break; + + from += thislen; + buf += thislen; + } + + /* + * Return success, if no ECC failures, else -EBADMSG + * fs driver will take care of that, because + * retlen == desired len and result == -EBADMSG + */ + ops->retlen = read; + ops->oobretlen = oobread; + + if (ret) + return ret; + + if (mtd->ecc_stats.failed - stats.failed) + return -EBADMSG; + + return mtd->ecc_stats.corrected - stats.corrected ? -EUCLEAN : 0; +} + /** * onenand_read_ops_nolock - [OneNAND Interface] OneNAND read main and/or out-of-band * @param mtd MTD device structure @@ -962,7 +1262,7 @@ static int onenand_read_oob_nolock(struct mtd_info *mtd, loff_t from, size_t len = ops->ooblen; mtd_oob_mode_t mode = ops->mode; u_char *buf = ops->oobbuf; - int ret = 0; + int ret = 0, readcmd; from += ops->ooboffs; @@ -993,17 +1293,22 @@ static int onenand_read_oob_nolock(struct mtd_info *mtd, loff_t from, stats = mtd->ecc_stats; + readcmd = ONENAND_IS_MLC(this) ? ONENAND_CMD_READ : ONENAND_CMD_READOOB; + while (read < len) { cond_resched(); thislen = oobsize - column; thislen = min_t(int, thislen, len); - this->command(mtd, ONENAND_CMD_READOOB, from, mtd->oobsize); + this->command(mtd, readcmd, from, mtd->oobsize); onenand_update_bufferram(mtd, from, 0); ret = this->wait(mtd, FL_READING); + if (unlikely(ret)) + ret = onenand_recover_lsb(mtd, from, ret); + if (ret && ret != -EBADMSG) { printk(KERN_ERR "onenand_read_oob_nolock: read failed = 0x%x\n", ret); break; @@ -1053,6 +1358,7 @@ static int onenand_read_oob_nolock(struct mtd_info *mtd, loff_t from, static int onenand_read(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf) { + struct onenand_chip *this = mtd->priv; struct mtd_oob_ops ops = { .len = len, .ooblen = 0, @@ -1062,7 +1368,9 @@ static int onenand_read(struct mtd_info *mtd, loff_t from, size_t len, int ret; onenand_get_device(mtd, FL_READING); - ret = onenand_read_ops_nolock(mtd, from, &ops); + ret = ONENAND_IS_MLC(this) ? + onenand_mlc_read_ops_nolock(mtd, from, &ops) : + onenand_read_ops_nolock(mtd, from, &ops); onenand_release_device(mtd); *retlen = ops.retlen; @@ -1080,6 +1388,7 @@ static int onenand_read(struct mtd_info *mtd, loff_t from, size_t len, static int onenand_read_oob(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops) { + struct onenand_chip *this = mtd->priv; int ret; switch (ops->mode) { @@ -1094,7 +1403,9 @@ static int onenand_read_oob(struct mtd_info *mtd, loff_t from, onenand_get_device(mtd, FL_READING); if (ops->datbuf) - ret = onenand_read_ops_nolock(mtd, from, ops); + ret = ONENAND_IS_MLC(this) ? + onenand_mlc_read_ops_nolock(mtd, from, ops) : + onenand_read_ops_nolock(mtd, from, ops); else ret = onenand_read_oob_nolock(mtd, from, ops); onenand_release_device(mtd); @@ -1128,11 +1439,11 @@ static int onenand_bbt_wait(struct mtd_info *mtd, int state) ctrl = this->read_word(this->base + ONENAND_REG_CTRL_STATUS); if (interrupt & ONENAND_INT_READ) { - int ecc = this->read_word(this->base + ONENAND_REG_ECC_STATUS); + int ecc = onenand_read_ecc(this); if (ecc & ONENAND_ECC_2BIT_ALL) { printk(KERN_INFO "onenand_bbt_wait: ecc error = 0x%04x" ", controller error 0x%04x\n", ecc, ctrl); - return ONENAND_BBT_READ_ERROR; + return ONENAND_BBT_READ_ECC_ERROR; } } else { printk(KERN_ERR "onenand_bbt_wait: read timeout!" @@ -1163,7 +1474,7 @@ int onenand_bbt_read_oob(struct mtd_info *mtd, loff_t from, { struct onenand_chip *this = mtd->priv; int read = 0, thislen, column; - int ret = 0; + int ret = 0, readcmd; size_t len = ops->ooblen; u_char *buf = ops->oobbuf; @@ -1183,17 +1494,22 @@ int onenand_bbt_read_oob(struct mtd_info *mtd, loff_t from, column = from & (mtd->oobsize - 1); + readcmd = ONENAND_IS_MLC(this) ? ONENAND_CMD_READ : ONENAND_CMD_READOOB; + while (read < len) { cond_resched(); thislen = mtd->oobsize - column; thislen = min_t(int, thislen, len); - this->command(mtd, ONENAND_CMD_READOOB, from, mtd->oobsize); + this->command(mtd, readcmd, from, mtd->oobsize); onenand_update_bufferram(mtd, from, 0); ret = onenand_bbt_wait(mtd, FL_READING); + if (unlikely(ret)) + ret = onenand_recover_lsb(mtd, from, ret); + if (ret) break; @@ -1230,9 +1546,11 @@ static int onenand_verify_oob(struct mtd_info *mtd, const u_char *buf, loff_t to { struct onenand_chip *this = mtd->priv; u_char *oob_buf = this->oob_buf; - int status, i; + int status, i, readcmd; + + readcmd = ONENAND_IS_MLC(this) ? ONENAND_CMD_READ : ONENAND_CMD_READOOB; - this->command(mtd, ONENAND_CMD_READOOB, to, mtd->oobsize); + this->command(mtd, readcmd, to, mtd->oobsize); onenand_update_bufferram(mtd, to, 0); status = this->wait(mtd, FL_READING); if (status) @@ -1633,7 +1951,7 @@ static int onenand_write_oob_nolock(struct mtd_info *mtd, loff_t to, { struct onenand_chip *this = mtd->priv; int column, ret = 0, oobsize; - int written = 0; + int written = 0, oobcmd; u_char *oobbuf; size_t len = ops->ooblen; const u_char *buf = ops->oobbuf; @@ -1675,6 +1993,8 @@ static int onenand_write_oob_nolock(struct mtd_info *mtd, loff_t to, oobbuf = this->oob_buf; + oobcmd = ONENAND_IS_MLC(this) ? ONENAND_CMD_PROG : ONENAND_CMD_PROGOOB; + /* Loop until all data write */ while (written < len) { int thislen = min_t(int, oobsize, len - written); @@ -1692,7 +2012,14 @@ static int onenand_write_oob_nolock(struct mtd_info *mtd, loff_t to, memcpy(oobbuf + column, buf, thislen); this->write_bufferram(mtd, ONENAND_SPARERAM, oobbuf, 0, mtd->oobsize); - this->command(mtd, ONENAND_CMD_PROGOOB, to, mtd->oobsize); + if (ONENAND_IS_MLC(this)) { + /* Set main area of DataRAM to 0xff*/ + memset(this->page_buf, 0xff, mtd->writesize); + this->write_bufferram(mtd, ONENAND_DATARAM, + this->page_buf, 0, mtd->writesize); + } + + this->command(mtd, oobcmd, to, mtd->oobsize); onenand_update_bufferram(mtd, to, 0); if (ONENAND_IS_2PLANE(this)) { @@ -1815,29 +2142,48 @@ static int onenand_erase(struct mtd_info *mtd, struct erase_info *instr) { struct onenand_chip *this = mtd->priv; unsigned int block_size; - loff_t addr; - int len; - int ret = 0; + loff_t addr = instr->addr; + loff_t len = instr->len; + int ret = 0, i; + struct mtd_erase_region_info *region = NULL; + loff_t region_end = 0; DEBUG(MTD_DEBUG_LEVEL3, "onenand_erase: start = 0x%012llx, len = %llu\n", (unsigned long long) instr->addr, (unsigned long long) instr->len); - block_size = (1 << this->erase_shift); - - /* Start address must align on block boundary */ - if (unlikely(instr->addr & (block_size - 1))) { - printk(KERN_ERR "onenand_erase: Unaligned address\n"); + /* Do not allow erase past end of device */ + if (unlikely((len + addr) > mtd->size)) { + printk(KERN_ERR "onenand_erase: Erase past end of device\n"); return -EINVAL; } - /* Length must align on block boundary */ - if (unlikely(instr->len & (block_size - 1))) { - printk(KERN_ERR "onenand_erase: Length not block aligned\n"); - return -EINVAL; + if (FLEXONENAND(this)) { + /* Find the eraseregion of this address */ + i = flexonenand_region(mtd, addr); + region = &mtd->eraseregions[i]; + + block_size = region->erasesize; + region_end = region->offset + region->erasesize * region->numblocks; + + /* Start address within region must align on block boundary. + * Erase region's start offset is always block start address. + */ + if (unlikely((addr - region->offset) & (block_size - 1))) { + printk(KERN_ERR "onenand_erase: Unaligned address\n"); + return -EINVAL; + } + } else { + block_size = 1 << this->erase_shift; + + /* Start address must align on block boundary */ + if (unlikely(addr & (block_size - 1))) { + printk(KERN_ERR "onenand_erase: Unaligned address\n"); + return -EINVAL; + } } - /* Do not allow erase past end of device */ - if (unlikely((instr->len + instr->addr) > mtd->size)) { - printk(KERN_ERR "onenand_erase: Erase past end of device\n"); + /* Length must align on block boundary */ + if (unlikely(len & (block_size - 1))) { + printk(KERN_ERR "onenand_erase: Length not block aligned\n"); return -EINVAL; } @@ -1847,9 +2193,6 @@ static int onenand_erase(struct mtd_info *mtd, struct erase_info *instr) onenand_get_device(mtd, FL_ERASING); /* Loop throught the pages */ - len = instr->len; - addr = instr->addr; - instr->state = MTD_ERASING; while (len) { @@ -1869,7 +2212,8 @@ static int onenand_erase(struct mtd_info *mtd, struct erase_info *instr) ret = this->wait(mtd, FL_ERASING); /* Check, if it is write protected */ if (ret) { - printk(KERN_ERR "onenand_erase: Failed erase, block %d\n", (unsigned) (addr >> this->erase_shift)); + printk(KERN_ERR "onenand_erase: Failed erase, block %d\n", + onenand_block(this, addr)); instr->state = MTD_ERASE_FAILED; instr->fail_addr = addr; goto erase_exit; @@ -1877,6 +2221,22 @@ static int onenand_erase(struct mtd_info *mtd, struct erase_info *instr) len -= block_size; addr += block_size; + + if (addr == region_end) { + if (!len) + break; + region++; + + block_size = region->erasesize; + region_end = region->offset + region->erasesize * region->numblocks; + + if (len & (block_size - 1)) { + /* FIXME: This should be handled at MTD partitioning level. */ + printk(KERN_ERR "onenand_erase: Unaligned address\n"); + goto erase_exit; + } + } + } instr->state = MTD_ERASE_DONE; @@ -1955,13 +2315,17 @@ static int onenand_default_block_markbad(struct mtd_info *mtd, loff_t ofs) int block; /* Get block number */ - block = ((int) ofs) >> bbm->bbt_erase_shift; + block = onenand_block(this, ofs); if (bbm->bbt) bbm->bbt[block >> 2] |= 0x01 << ((block & 0x03) << 1); /* We write two bytes, so we dont have to mess with 16 bit access */ ofs += mtd->oobsize + (bbm->badblockpos & ~0x01); - return onenand_write_oob_nolock(mtd, ofs, &ops); + /* FIXME : What to do when marking SLC block in partition + * with MLC erasesize? For now, it is not advisable to + * create partitions containing both SLC and MLC regions. + */ + return onenand_write_oob_nolock(mtd, ofs, &ops); } /** @@ -2005,8 +2369,8 @@ static int onenand_do_lock_cmd(struct mtd_info *mtd, loff_t ofs, size_t len, int int start, end, block, value, status; int wp_status_mask; - start = ofs >> this->erase_shift; - end = len >> this->erase_shift; + start = onenand_block(this, ofs); + end = onenand_block(this, ofs + len) - 1; if (cmd == ONENAND_CMD_LOCK) wp_status_mask = ONENAND_WP_LS; @@ -2018,7 +2382,7 @@ static int onenand_do_lock_cmd(struct mtd_info *mtd, loff_t ofs, size_t len, int /* Set start block address */ this->write_word(start, this->base + ONENAND_REG_START_BLOCK_ADDRESS); /* Set end block address */ - this->write_word(start + end - 1, this->base + ONENAND_REG_END_BLOCK_ADDRESS); + this->write_word(end, this->base + ONENAND_REG_END_BLOCK_ADDRESS); /* Write lock command */ this->command(mtd, cmd, 0, 0); @@ -2039,7 +2403,7 @@ static int onenand_do_lock_cmd(struct mtd_info *mtd, loff_t ofs, size_t len, int } /* Block lock scheme */ - for (block = start; block < start + end; block++) { + for (block = start; block < end + 1; block++) { /* Set block address */ value = onenand_block_address(this, block); this->write_word(value, this->base + ONENAND_REG_START_ADDRESS1); @@ -2147,7 +2511,7 @@ static void onenand_unlock_all(struct mtd_info *mtd) { struct onenand_chip *this = mtd->priv; loff_t ofs = 0; - size_t len = this->chipsize; + loff_t len = mtd->size; if (this->options & ONENAND_HAS_UNLOCK_ALL) { /* Set start block address */ @@ -2168,7 +2532,7 @@ static void onenand_unlock_all(struct mtd_info *mtd) return; /* Workaround for all block unlock in DDP */ - if (ONENAND_IS_DDP(this)) { + if (ONENAND_IS_DDP(this) && !FLEXONENAND(this)) { /* All blocks on another chip */ ofs = this->chipsize >> 1; len = this->chipsize >> 1; @@ -2210,7 +2574,9 @@ static int do_otp_read(struct mtd_info *mtd, loff_t from, size_t len, this->command(mtd, ONENAND_CMD_OTP_ACCESS, 0, 0); this->wait(mtd, FL_OTPING); - ret = onenand_read_ops_nolock(mtd, from, &ops); + ret = ONENAND_IS_MLC(this) ? + onenand_mlc_read_ops_nolock(mtd, from, &ops) : + onenand_read_ops_nolock(mtd, from, &ops); /* Exit OTP access mode */ this->command(mtd, ONENAND_CMD_RESET, 0, 0); @@ -2277,21 +2643,32 @@ static int do_otp_lock(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf) { struct onenand_chip *this = mtd->priv; - struct mtd_oob_ops ops = { - .mode = MTD_OOB_PLACE, - .ooblen = len, - .oobbuf = buf, - .ooboffs = 0, - }; + struct mtd_oob_ops ops; int ret; /* Enter OTP access mode */ this->command(mtd, ONENAND_CMD_OTP_ACCESS, 0, 0); this->wait(mtd, FL_OTPING); - ret = onenand_write_oob_nolock(mtd, from, &ops); - - *retlen = ops.oobretlen; + if (FLEXONENAND(this)) { + /* + * For Flex-OneNAND, we write lock mark to 1st word of sector 4 of + * main area of page 49. + */ + ops.len = mtd->writesize; + ops.ooblen = 0; + ops.datbuf = buf; + ops.oobbuf = NULL; + ret = onenand_write_ops_nolock(mtd, mtd->writesize * 49, &ops); + *retlen = ops.retlen; + } else { + ops.mode = MTD_OOB_PLACE; + ops.ooblen = len; + ops.oobbuf = buf; + ops.ooboffs = 0; + ret = onenand_write_oob_nolock(mtd, from, &ops); + *retlen = ops.oobretlen; + } /* Exit OTP access mode */ this->command(mtd, ONENAND_CMD_RESET, 0, 0); @@ -2475,27 +2852,34 @@ static int onenand_lock_user_prot_reg(struct mtd_info *mtd, loff_t from, size_t len) { struct onenand_chip *this = mtd->priv; - u_char *oob_buf = this->oob_buf; + u_char *buf = FLEXONENAND(this) ? this->page_buf : this->oob_buf; size_t retlen; int ret; - memset(oob_buf, 0xff, mtd->oobsize); + memset(buf, 0xff, FLEXONENAND(this) ? this->writesize + : mtd->oobsize); /* * Note: OTP lock operation * OTP block : 0xXXFC * 1st block : 0xXXF3 (If chip support) * Both : 0xXXF0 (If chip support) */ - oob_buf[ONENAND_OTP_LOCK_OFFSET] = 0xFC; + if (FLEXONENAND(this)) + buf[FLEXONENAND_OTP_LOCK_OFFSET] = 0xFC; + else + buf[ONENAND_OTP_LOCK_OFFSET] = 0xFC; /* * Write lock mark to 8th word of sector0 of page0 of the spare0. * We write 16 bytes spare area instead of 2 bytes. + * For Flex-OneNAND, we write lock mark to 1st word of sector 4 of + * main area of page 49. */ + from = 0; - len = 16; + len = FLEXONENAND(this) ? mtd->writesize : 16; - ret = onenand_otp_walk(mtd, from, len, &retlen, oob_buf, do_otp_lock, MTD_OTP_USER); + ret = onenand_otp_walk(mtd, from, len, &retlen, buf, do_otp_lock, MTD_OTP_USER); return ret ? : retlen; } @@ -2542,6 +2926,14 @@ static void onenand_check_features(struct mtd_info *mtd) break; } + if (ONENAND_IS_MLC(this)) + this->options &= ~ONENAND_HAS_2PLANE; + + if (FLEXONENAND(this)) { + this->options &= ~ONENAND_HAS_CONT_LOCK; + this->options |= ONENAND_HAS_UNLOCK_ALL; + } + if (this->options & ONENAND_HAS_CONT_LOCK) printk(KERN_DEBUG "Lock scheme is Continuous Lock\n"); if (this->options & ONENAND_HAS_UNLOCK_ALL) @@ -2559,14 +2951,16 @@ static void onenand_check_features(struct mtd_info *mtd) */ static void onenand_print_device_info(int device, int version) { - int vcc, demuxed, ddp, density; + int vcc, demuxed, ddp, density, flexonenand; vcc = device & ONENAND_DEVICE_VCC_MASK; demuxed = device & ONENAND_DEVICE_IS_DEMUX; ddp = device & ONENAND_DEVICE_IS_DDP; density = onenand_get_density(device); - printk(KERN_INFO "%sOneNAND%s %dMB %sV 16-bit (0x%02x)\n", - demuxed ? "" : "Muxed ", + flexonenand = device & DEVICE_IS_FLEXONENAND; + printk(KERN_INFO "%s%sOneNAND%s %dMB %sV 16-bit (0x%02x)\n", + demuxed ? "" : "Muxed ", + flexonenand ? "Flex-" : "", ddp ? "(DDP)" : "", (16 << density), vcc ? "2.65/3.3" : "1.8", @@ -2605,6 +2999,280 @@ static int onenand_check_maf(int manuf) return (i == size); } +/** +* flexonenand_get_boundary - Reads the SLC boundary +* @param onenand_info - onenand info structure +**/ +static int flexonenand_get_boundary(struct mtd_info *mtd) +{ + struct onenand_chip *this = mtd->priv; + unsigned die, bdry; + int ret, syscfg, locked; + + /* Disable ECC */ + syscfg = this->read_word(this->base + ONENAND_REG_SYS_CFG1); + this->write_word((syscfg | 0x0100), this->base + ONENAND_REG_SYS_CFG1); + + for (die = 0; die < this->dies; die++) { + this->command(mtd, FLEXONENAND_CMD_PI_ACCESS, die, 0); + this->wait(mtd, FL_SYNCING); + + this->command(mtd, FLEXONENAND_CMD_READ_PI, die, 0); + ret = this->wait(mtd, FL_READING); + + bdry = this->read_word(this->base + ONENAND_DATARAM); + if ((bdry >> FLEXONENAND_PI_UNLOCK_SHIFT) == 3) + locked = 0; + else + locked = 1; + this->boundary[die] = bdry & FLEXONENAND_PI_MASK; + + this->command(mtd, ONENAND_CMD_RESET, 0, 0); + ret = this->wait(mtd, FL_RESETING); + + printk(KERN_INFO "Die %d boundary: %d%s\n", die, + this->boundary[die], locked ? "(Locked)" : "(Unlocked)"); + } + + /* Enable ECC */ + this->write_word(syscfg, this->base + ONENAND_REG_SYS_CFG1); + return 0; +} + +/** + * flexonenand_get_size - Fill up fields in onenand_chip and mtd_info + * boundary[], diesize[], mtd->size, mtd->erasesize + * @param mtd - MTD device structure + */ +static void flexonenand_get_size(struct mtd_info *mtd) +{ + struct onenand_chip *this = mtd->priv; + int die, i, eraseshift, density; + int blksperdie, maxbdry; + loff_t ofs; + + density = onenand_get_density(this->device_id); + blksperdie = ((loff_t)(16 << density) << 20) >> (this->erase_shift); + blksperdie >>= ONENAND_IS_DDP(this) ? 1 : 0; + maxbdry = blksperdie - 1; + eraseshift = this->erase_shift - 1; + + mtd->numeraseregions = this->dies << 1; + + /* This fills up the device boundary */ + flexonenand_get_boundary(mtd); + die = ofs = 0; + i = -1; + for (; die < this->dies; die++) { + if (!die || this->boundary[die-1] != maxbdry) { + i++; + mtd->eraseregions[i].offset = ofs; + mtd->eraseregions[i].erasesize = 1 << eraseshift; + mtd->eraseregions[i].numblocks = + this->boundary[die] + 1; + ofs += mtd->eraseregions[i].numblocks << eraseshift; + eraseshift++; + } else { + mtd->numeraseregions -= 1; + mtd->eraseregions[i].numblocks += + this->boundary[die] + 1; + ofs += (this->boundary[die] + 1) << (eraseshift - 1); + } + if (this->boundary[die] != maxbdry) { + i++; + mtd->eraseregions[i].offset = ofs; + mtd->eraseregions[i].erasesize = 1 << eraseshift; + mtd->eraseregions[i].numblocks = maxbdry ^ + this->boundary[die]; + ofs += mtd->eraseregions[i].numblocks << eraseshift; + eraseshift--; + } else + mtd->numeraseregions -= 1; + } + + /* Expose MLC erase size except when all blocks are SLC */ + mtd->erasesize = 1 << this->erase_shift; + if (mtd->numeraseregions == 1) + mtd->erasesize >>= 1; + + printk(KERN_INFO "Device has %d eraseregions\n", mtd->numeraseregions); + for (i = 0; i < mtd->numeraseregions; i++) + printk(KERN_INFO "[offset: 0x%08x, erasesize: 0x%05x," + " numblocks: %04u]\n", + (unsigned int) mtd->eraseregions[i].offset, + mtd->eraseregions[i].erasesize, + mtd->eraseregions[i].numblocks); + + for (die = 0, mtd->size = 0; die < this->dies; die++) { + this->diesize[die] = (loff_t)blksperdie << this->erase_shift; + this->diesize[die] -= (loff_t)(this->boundary[die] + 1) + << (this->erase_shift - 1); + mtd->size += this->diesize[die]; + } +} + +/** + * flexonenand_check_blocks_erased - Check if blocks are erased + * @param mtd_info - mtd info structure + * @param start - first erase block to check + * @param end - last erase block to check + * + * Converting an unerased block from MLC to SLC + * causes byte values to change. Since both data and its ECC + * have changed, reads on the block give uncorrectable error. + * This might lead to the block being detected as bad. + * + * Avoid this by ensuring that the block to be converted is + * erased. + */ +static int flexonenand_check_blocks_erased(struct mtd_info *mtd, int start, int end) +{ + struct onenand_chip *this = mtd->priv; + int i, ret; + int block; + struct mtd_oob_ops ops = { + .mode = MTD_OOB_PLACE, + .ooboffs = 0, + .ooblen = mtd->oobsize, + .datbuf = NULL, + .oobbuf = this->oob_buf, + }; + loff_t addr; + + printk(KERN_DEBUG "Check blocks from %d to %d\n", start, end); + + for (block = start; block <= end; block++) { + addr = flexonenand_addr(this, block); + if (onenand_block_isbad_nolock(mtd, addr, 0)) + continue; + + /* + * Since main area write results in ECC write to spare, + * it is sufficient to check only ECC bytes for change. + */ + ret = onenand_read_oob_nolock(mtd, addr, &ops); + if (ret) + return ret; + + for (i = 0; i < mtd->oobsize; i++) + if (this->oob_buf[i] != 0xff) + break; + + if (i != mtd->oobsize) { + printk(KERN_WARNING "Block %d not erased.\n", block); + return 1; + } + } + + return 0; +} + +/** + * flexonenand_set_boundary - Writes the SLC boundary + * @param mtd - mtd info structure + */ +int flexonenand_set_boundary(struct mtd_info *mtd, int die, + int boundary, int lock) +{ + struct onenand_chip *this = mtd->priv; + int ret, density, blksperdie, old, new, thisboundary; + loff_t addr; + + /* Change only once for SDP Flex-OneNAND */ + if (die && (!ONENAND_IS_DDP(this))) + return 0; + + /* boundary value of -1 indicates no required change */ + if (boundary < 0 || boundary == this->boundary[die]) + return 0; + + density = onenand_get_density(this->device_id); + blksperdie = ((16 << density) << 20) >> this->erase_shift; + blksperdie >>= ONENAND_IS_DDP(this) ? 1 : 0; + + if (boundary >= blksperdie) { + printk(KERN_ERR "flexonenand_set_boundary: Invalid boundary value. " + "Boundary not changed.\n"); + return -EINVAL; + } + + /* Check if converting blocks are erased */ + old = this->boundary[die] + (die * this->density_mask); + new = boundary + (die * this->density_mask); + ret = flexonenand_check_blocks_erased(mtd, min(old, new) + 1, max(old, new)); + if (ret) { + printk(KERN_ERR "flexonenand_set_boundary: Please erase blocks before boundary change\n"); + return ret; + } + + this->command(mtd, FLEXONENAND_CMD_PI_ACCESS, die, 0); + this->wait(mtd, FL_SYNCING); + + /* Check is boundary is locked */ + this->command(mtd, FLEXONENAND_CMD_READ_PI, die, 0); + ret = this->wait(mtd, FL_READING); + + thisboundary = this->read_word(this->base + ONENAND_DATARAM); + if ((thisboundary >> FLEXONENAND_PI_UNLOCK_SHIFT) != 3) { + printk(KERN_ERR "flexonenand_set_boundary: boundary locked\n"); + ret = 1; + goto out; + } + + printk(KERN_INFO "flexonenand_set_boundary: Changing die %d boundary: %d%s\n", + die, boundary, lock ? "(Locked)" : "(Unlocked)"); + + addr = die ? this->diesize[0] : 0; + + boundary &= FLEXONENAND_PI_MASK; + boundary |= lock ? 0 : (3 << FLEXONENAND_PI_UNLOCK_SHIFT); + + this->command(mtd, ONENAND_CMD_ERASE, addr, 0); + ret = this->wait(mtd, FL_ERASING); + if (ret) { + printk(KERN_ERR "flexonenand_set_boundary: Failed PI erase for Die %d\n", die); + goto out; + } + + this->write_word(boundary, this->base + ONENAND_DATARAM); + this->command(mtd, ONENAND_CMD_PROG, addr, 0); + ret = this->wait(mtd, FL_WRITING); + if (ret) { + printk(KERN_ERR "flexonenand_set_boundary: Failed PI write for Die %d\n", die); + goto out; + } + + this->command(mtd, FLEXONENAND_CMD_PI_UPDATE, die, 0); + ret = this->wait(mtd, FL_WRITING); +out: + this->write_word(ONENAND_CMD_RESET, this->base + ONENAND_REG_COMMAND); + this->wait(mtd, FL_RESETING); + if (!ret) + /* Recalculate device size on boundary change*/ + flexonenand_get_size(mtd); + + return ret; +} + +/** + * flexonenand_setup - capture Flex-OneNAND boundary and lock + * values passed as kernel parameters + * @param s kernel parameter string + */ +static int flexonenand_setup(char *s) +{ + int ints[5], i; + + s = get_options(s, 5, ints); + + for (i = 0; i < ints[0]; i++) + flex_bdry[i] = ints[i + 1]; + + return 1; +} + +__setup("onenand.bdry=", flexonenand_setup); + /** * onenand_probe - [OneNAND Interface] Probe the OneNAND device * @param mtd MTD device structure @@ -2647,6 +3315,7 @@ static int onenand_probe(struct mtd_info *mtd) maf_id = this->read_word(this->base + ONENAND_REG_MANUFACTURER_ID); dev_id = this->read_word(this->base + ONENAND_REG_DEVICE_ID); ver_id = this->read_word(this->base + ONENAND_REG_VERSION_ID); + this->technology = this->read_word(this->base + ONENAND_REG_TECHNOLOGY); /* Check OneNAND device */ if (maf_id != bram_maf_id || dev_id != bram_dev_id) @@ -2658,29 +3327,55 @@ static int onenand_probe(struct mtd_info *mtd) this->version_id = ver_id; density = onenand_get_density(dev_id); + if (FLEXONENAND(this)) { + this->dies = ONENAND_IS_DDP(this) ? 2 : 1; + /* Maximum possible erase regions */ + mtd->numeraseregions = this->dies << 1; + mtd->eraseregions = kzalloc(sizeof(struct mtd_erase_region_info) + * (this->dies << 1), GFP_KERNEL); + if (!mtd->eraseregions) + return -ENOMEM; + } + + /* + * For Flex-OneNAND, chipsize represents maximum possible device size. + * mtd->size represents the actual device size. + */ this->chipsize = (16 << density) << 20; - /* Set density mask. it is used for DDP */ - if (ONENAND_IS_DDP(this)) - this->density_mask = (1 << (density + 6)); - else - this->density_mask = 0; /* OneNAND page size & block size */ /* The data buffer size is equal to page size */ mtd->writesize = this->read_word(this->base + ONENAND_REG_DATA_BUFFER_SIZE); + /* We use the full BufferRAM */ + if (ONENAND_IS_MLC(this)) + mtd->writesize <<= 1; + mtd->oobsize = mtd->writesize >> 5; /* Pages per a block are always 64 in OneNAND */ mtd->erasesize = mtd->writesize << 6; + /* + * Flex-OneNAND SLC area has 64 pages per block. + * Flex-OneNAND MLC area has 128 pages per block. + * Expose MLC erase size to find erase_shift and page_mask. + */ + if (FLEXONENAND(this)) + mtd->erasesize <<= 1; this->erase_shift = ffs(mtd->erasesize) - 1; this->page_shift = ffs(mtd->writesize) - 1; this->page_mask = (1 << (this->erase_shift - this->page_shift)) - 1; + /* Set density mask. it is used for DDP */ + if (ONENAND_IS_DDP(this)) + this->density_mask = this->chipsize >> (this->erase_shift + 1); /* It's real page size */ this->writesize = mtd->writesize; /* REVIST: Multichip handling */ - mtd->size = this->chipsize; + if (FLEXONENAND(this)) + flexonenand_get_size(mtd); + else + mtd->size = this->chipsize; /* Check OneNAND features */ onenand_check_features(mtd); @@ -2735,7 +3430,7 @@ static void onenand_resume(struct mtd_info *mtd) */ int onenand_scan(struct mtd_info *mtd, int maxchips) { - int i; + int i, ret; struct onenand_chip *this = mtd->priv; if (!this->read_word) @@ -2797,6 +3492,10 @@ int onenand_scan(struct mtd_info *mtd, int maxchips) * Allow subpage writes up to oobsize. */ switch (mtd->oobsize) { + case 128: + this->ecclayout = &onenand_oob_128; + mtd->subpage_sft = 0; + break; case 64: this->ecclayout = &onenand_oob_64; mtd->subpage_sft = 2; @@ -2862,7 +3561,16 @@ int onenand_scan(struct mtd_info *mtd, int maxchips) /* Unlock whole block */ onenand_unlock_all(mtd); - return this->scan_bbt(mtd); + ret = this->scan_bbt(mtd); + if ((!FLEXONENAND(this)) || ret) + return ret; + + /* Change Flex-OneNAND boundaries if required */ + for (i = 0; i < MAX_DIES; i++) + flexonenand_set_boundary(mtd, i, flex_bdry[2 * i], + flex_bdry[(2 * i) + 1]); + + return 0; } /** @@ -2891,6 +3599,7 @@ void onenand_release(struct mtd_info *mtd) kfree(this->page_buf); if (this->options & ONENAND_OOBBUF_ALLOC) kfree(this->oob_buf); + kfree(mtd->eraseregions); } EXPORT_SYMBOL_GPL(onenand_scan); diff --git a/drivers/mtd/onenand/onenand_bbt.c b/drivers/mtd/onenand/onenand_bbt.c index 2f53b51c680..a91fcac1af0 100644 --- a/drivers/mtd/onenand/onenand_bbt.c +++ b/drivers/mtd/onenand/onenand_bbt.c @@ -63,6 +63,7 @@ static int create_bbt(struct mtd_info *mtd, uint8_t *buf, struct nand_bbt_descr loff_t from; size_t readlen, ooblen; struct mtd_oob_ops ops; + int rgn; printk(KERN_INFO "Scanning device for bad blocks\n"); @@ -76,7 +77,7 @@ static int create_bbt(struct mtd_info *mtd, uint8_t *buf, struct nand_bbt_descr /* Note that numblocks is 2 * (real numblocks) here; * see i += 2 below as it makses shifting and masking less painful */ - numblocks = mtd->size >> (bbm->bbt_erase_shift - 1); + numblocks = this->chipsize >> (bbm->bbt_erase_shift - 1); startblock = 0; from = 0; @@ -106,7 +107,12 @@ static int create_bbt(struct mtd_info *mtd, uint8_t *buf, struct nand_bbt_descr } } i += 2; - from += (1 << bbm->bbt_erase_shift); + + if (FLEXONENAND(this)) { + rgn = flexonenand_region(mtd, from); + from += mtd->eraseregions[rgn].erasesize; + } else + from += (1 << bbm->bbt_erase_shift); } return 0; @@ -143,7 +149,7 @@ static int onenand_isbad_bbt(struct mtd_info *mtd, loff_t offs, int allowbbt) uint8_t res; /* Get block number * 2 */ - block = (int) (offs >> (bbm->bbt_erase_shift - 1)); + block = (int) (onenand_block(this, offs) << 1); res = (bbm->bbt[block >> 3] >> (block & 0x06)) & 0x03; DEBUG(MTD_DEBUG_LEVEL2, "onenand_isbad_bbt: bbt info for offs 0x%08x: (block %d) 0x%02x\n", @@ -178,7 +184,7 @@ int onenand_scan_bbt(struct mtd_info *mtd, struct nand_bbt_descr *bd) struct bbm_info *bbm = this->bbm; int len, ret = 0; - len = mtd->size >> (this->erase_shift + 2); + len = this->chipsize >> (this->erase_shift + 2); /* Allocate memory (2bit per block) and clear the memory bad block table */ bbm->bbt = kzalloc(len, GFP_KERNEL); if (!bbm->bbt) { diff --git a/drivers/mtd/onenand/onenand_sim.c b/drivers/mtd/onenand/onenand_sim.c index d64200b7c94..f6e3c8aebd3 100644 --- a/drivers/mtd/onenand/onenand_sim.c +++ b/drivers/mtd/onenand/onenand_sim.c @@ -6,6 +6,10 @@ * Copyright © 2005-2007 Samsung Electronics * Kyungmin Park * + * Vishak G , Rohit Hagargundgi + * Flex-OneNAND simulator support + * Copyright (C) Samsung Electronics, 2008 + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. @@ -24,16 +28,38 @@ #ifndef CONFIG_ONENAND_SIM_MANUFACTURER #define CONFIG_ONENAND_SIM_MANUFACTURER 0xec #endif + #ifndef CONFIG_ONENAND_SIM_DEVICE_ID #define CONFIG_ONENAND_SIM_DEVICE_ID 0x04 #endif + +#define CONFIG_FLEXONENAND ((CONFIG_ONENAND_SIM_DEVICE_ID >> 9) & 1) + #ifndef CONFIG_ONENAND_SIM_VERSION_ID #define CONFIG_ONENAND_SIM_VERSION_ID 0x1e #endif +#ifndef CONFIG_ONENAND_SIM_TECHNOLOGY_ID +#define CONFIG_ONENAND_SIM_TECHNOLOGY_ID CONFIG_FLEXONENAND +#endif + +/* Initial boundary values for Flex-OneNAND Simulator */ +#ifndef CONFIG_FLEXONENAND_SIM_DIE0_BOUNDARY +#define CONFIG_FLEXONENAND_SIM_DIE0_BOUNDARY 0x01 +#endif + +#ifndef CONFIG_FLEXONENAND_SIM_DIE1_BOUNDARY +#define CONFIG_FLEXONENAND_SIM_DIE1_BOUNDARY 0x01 +#endif + static int manuf_id = CONFIG_ONENAND_SIM_MANUFACTURER; static int device_id = CONFIG_ONENAND_SIM_DEVICE_ID; static int version_id = CONFIG_ONENAND_SIM_VERSION_ID; +static int technology_id = CONFIG_ONENAND_SIM_TECHNOLOGY_ID; +static int boundary[] = { + CONFIG_FLEXONENAND_SIM_DIE0_BOUNDARY, + CONFIG_FLEXONENAND_SIM_DIE1_BOUNDARY, +}; struct onenand_flash { void __iomem *base; @@ -57,12 +83,18 @@ struct onenand_flash { (writew(v, this->base + ONENAND_REG_WP_STATUS)) /* It has all 0xff chars */ -#define MAX_ONENAND_PAGESIZE (2048 + 64) +#define MAX_ONENAND_PAGESIZE (4096 + 128) static unsigned char *ffchars; +#if CONFIG_FLEXONENAND +#define PARTITION_NAME "Flex-OneNAND simulator partition" +#else +#define PARTITION_NAME "OneNAND simulator partition" +#endif + static struct mtd_partition os_partitions[] = { { - .name = "OneNAND simulator partition", + .name = PARTITION_NAME, .offset = 0, .size = MTDPART_SIZ_FULL, }, @@ -104,6 +136,7 @@ static void onenand_lock_handle(struct onenand_chip *this, int cmd) switch (cmd) { case ONENAND_CMD_UNLOCK: + case ONENAND_CMD_UNLOCK_ALL: if (block_lock_scheme) ONENAND_SET_WP_STATUS(ONENAND_WP_US, this); else @@ -228,10 +261,12 @@ static void onenand_data_handle(struct onenand_chip *this, int cmd, { struct mtd_info *mtd = &info->mtd; struct onenand_flash *flash = this->priv; - int main_offset, spare_offset; + int main_offset, spare_offset, die = 0; void __iomem *src; void __iomem *dest; unsigned int i; + static int pi_operation; + int erasesize, rgn; if (dataram) { main_offset = mtd->writesize; @@ -241,10 +276,27 @@ static void onenand_data_handle(struct onenand_chip *this, int cmd, spare_offset = 0; } + if (pi_operation) { + die = readw(this->base + ONENAND_REG_START_ADDRESS2); + die >>= ONENAND_DDP_SHIFT; + } + switch (cmd) { + case FLEXONENAND_CMD_PI_ACCESS: + pi_operation = 1; + break; + + case ONENAND_CMD_RESET: + pi_operation = 0; + break; + case ONENAND_CMD_READ: src = ONENAND_CORE(flash) + offset; dest = ONENAND_MAIN_AREA(this, main_offset); + if (pi_operation) { + writew(boundary[die], this->base + ONENAND_DATARAM); + break; + } memcpy(dest, src, mtd->writesize); /* Fall through */ @@ -257,6 +309,10 @@ static void onenand_data_handle(struct onenand_chip *this, int cmd, case ONENAND_CMD_PROG: src = ONENAND_MAIN_AREA(this, main_offset); dest = ONENAND_CORE(flash) + offset; + if (pi_operation) { + boundary[die] = readw(this->base + ONENAND_DATARAM); + break; + } /* To handle partial write */ for (i = 0; i < (1 << mtd->subpage_sft); i++) { int off = i * this->subpagesize; @@ -284,9 +340,18 @@ static void onenand_data_handle(struct onenand_chip *this, int cmd, break; case ONENAND_CMD_ERASE: - memset(ONENAND_CORE(flash) + offset, 0xff, mtd->erasesize); + if (pi_operation) + break; + + if (FLEXONENAND(this)) { + rgn = flexonenand_region(mtd, offset); + erasesize = mtd->eraseregions[rgn].erasesize; + } else + erasesize = mtd->erasesize; + + memset(ONENAND_CORE(flash) + offset, 0xff, erasesize); memset(ONENAND_CORE_SPARE(flash, this, offset), 0xff, - (mtd->erasesize >> 5)); + (erasesize >> 5)); break; default: @@ -339,7 +404,7 @@ static void onenand_command_handle(struct onenand_chip *this, int cmd) } if (block != -1) - offset += block << this->erase_shift; + offset = onenand_addr(this, block); if (page != -1) offset += page << this->page_shift; @@ -390,6 +455,7 @@ static int __init flash_init(struct onenand_flash *flash) } density = device_id >> ONENAND_DEVICE_DENSITY_SHIFT; + density &= ONENAND_DEVICE_DENSITY_MASK; size = ((16 << 20) << density); ONENAND_CORE(flash) = vmalloc(size + (size >> 5)); @@ -405,8 +471,9 @@ static int __init flash_init(struct onenand_flash *flash) writew(manuf_id, flash->base + ONENAND_REG_MANUFACTURER_ID); writew(device_id, flash->base + ONENAND_REG_DEVICE_ID); writew(version_id, flash->base + ONENAND_REG_VERSION_ID); + writew(technology_id, flash->base + ONENAND_REG_TECHNOLOGY); - if (density < 2) + if (density < 2 && (!CONFIG_FLEXONENAND)) buffer_size = 0x0400; /* 1KiB page */ else buffer_size = 0x0800; /* 2KiB page */ diff --git a/include/linux/mtd/onenand.h b/include/linux/mtd/onenand.h index 0fa3ac4ad57..9aab82c1c74 100644 --- a/include/linux/mtd/onenand.h +++ b/include/linux/mtd/onenand.h @@ -17,6 +17,7 @@ #include #include +#define MAX_DIES 2 #define MAX_BUFFERRAM 2 /* Scan and identify a OneNAND device */ @@ -51,7 +52,12 @@ struct onenand_bufferram { /** * struct onenand_chip - OneNAND Private Flash Chip Data * @base: [BOARDSPECIFIC] address to access OneNAND + * @dies: [INTERN][FLEX-ONENAND] number of dies on chip + * @boundary: [INTERN][FLEX-ONENAND] Boundary of the dies + * @diesize: [INTERN][FLEX-ONENAND] Size of the dies * @chipsize: [INTERN] the size of one chip for multichip arrays + * FIXME For Flex-OneNAND, chipsize holds maximum possible + * device size ie when all blocks are considered MLC * @device_id: [INTERN] device ID * @density_mask: chip density, used for DDP devices * @verstion_id: [INTERN] version ID @@ -92,9 +98,13 @@ struct onenand_bufferram { */ struct onenand_chip { void __iomem *base; + unsigned dies; + unsigned boundary[MAX_DIES]; + loff_t diesize[MAX_DIES]; unsigned int chipsize; unsigned int device_id; unsigned int version_id; + unsigned int technology; unsigned int density_mask; unsigned int options; @@ -145,6 +155,8 @@ struct onenand_chip { #define ONENAND_SET_BUFFERRAM0(this) (this->bufferram_index = 0) #define ONENAND_SET_BUFFERRAM1(this) (this->bufferram_index = 1) +#define FLEXONENAND(this) \ + (this->device_id & DEVICE_IS_FLEXONENAND) #define ONENAND_GET_SYS_CFG1(this) \ (this->read_word(this->base + ONENAND_REG_SYS_CFG1)) #define ONENAND_SET_SYS_CFG1(v, this) \ @@ -153,6 +165,9 @@ struct onenand_chip { #define ONENAND_IS_DDP(this) \ (this->device_id & ONENAND_DEVICE_IS_DDP) +#define ONENAND_IS_MLC(this) \ + (this->technology & ONENAND_TECHNOLOGY_IS_MLC) + #ifdef CONFIG_MTD_ONENAND_2X_PROGRAM #define ONENAND_IS_2PLANE(this) \ (this->options & ONENAND_HAS_2PLANE) @@ -190,5 +205,8 @@ struct onenand_manufacturers { int onenand_bbt_read_oob(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops); +unsigned onenand_block(struct onenand_chip *this, loff_t addr); +loff_t onenand_addr(struct onenand_chip *this, int block); +int flexonenand_region(struct mtd_info *mtd, loff_t addr); #endif /* __LINUX_MTD_ONENAND_H */ diff --git a/include/linux/mtd/onenand_regs.h b/include/linux/mtd/onenand_regs.h index 0c6bbe28f38..86a6bbef646 100644 --- a/include/linux/mtd/onenand_regs.h +++ b/include/linux/mtd/onenand_regs.h @@ -67,6 +67,9 @@ /* * Device ID Register F001h (R) */ +#define DEVICE_IS_FLEXONENAND (1 << 9) +#define FLEXONENAND_PI_MASK (0x3ff) +#define FLEXONENAND_PI_UNLOCK_SHIFT (14) #define ONENAND_DEVICE_DENSITY_MASK (0xf) #define ONENAND_DEVICE_DENSITY_SHIFT (4) #define ONENAND_DEVICE_IS_DDP (1 << 3) @@ -83,6 +86,11 @@ */ #define ONENAND_VERSION_PROCESS_SHIFT (8) +/* + * Technology Register F006h (R) + */ +#define ONENAND_TECHNOLOGY_IS_MLC (1 << 0) + /* * Start Address 1 F100h (R/W) & Start Address 2 F101h (R/W) */ @@ -93,7 +101,8 @@ /* * Start Address 8 F107h (R/W) */ -#define ONENAND_FPA_MASK (0x3f) +/* Note: It's actually 0x3f in case of SLC */ +#define ONENAND_FPA_MASK (0x7f) #define ONENAND_FPA_SHIFT (2) #define ONENAND_FSA_MASK (0x03) @@ -105,7 +114,8 @@ #define ONENAND_BSA_BOOTRAM (0 << 2) #define ONENAND_BSA_DATARAM0 (2 << 2) #define ONENAND_BSA_DATARAM1 (3 << 2) -#define ONENAND_BSC_MASK (0x03) +/* Note: It's actually 0x03 in case of SLC */ +#define ONENAND_BSC_MASK (0x07) /* * Command Register F220h (R/W) @@ -124,9 +134,13 @@ #define ONENAND_CMD_RESET (0xF0) #define ONENAND_CMD_OTP_ACCESS (0x65) #define ONENAND_CMD_READID (0x90) +#define FLEXONENAND_CMD_PI_UPDATE (0x05) +#define FLEXONENAND_CMD_PI_ACCESS (0x66) +#define FLEXONENAND_CMD_RECOVER_LSB (0x05) /* NOTE: Those are not *REAL* commands */ #define ONENAND_CMD_BUFFERRAM (0x1978) +#define FLEXONENAND_CMD_READ_PI (0x1985) /* * System Configuration 1 Register F221h (R, R/W) @@ -192,10 +206,12 @@ #define ONENAND_ECC_1BIT_ALL (0x5555) #define ONENAND_ECC_2BIT (1 << 1) #define ONENAND_ECC_2BIT_ALL (0xAAAA) +#define FLEXONENAND_UNCORRECTABLE_ERROR (0x1010) /* * One-Time Programmable (OTP) */ +#define FLEXONENAND_OTP_LOCK_OFFSET (2048) #define ONENAND_OTP_LOCK_OFFSET (14) #endif /* __ONENAND_REG_H */ -- cgit v1.2.3-70-g09d2 From 31bb999ee73748068ddc271dd99b22dcc418efe3 Mon Sep 17 00:00:00 2001 From: Kyungmin Park Date: Tue, 12 May 2009 13:46:57 -0700 Subject: mtd: onenand: add bbt_wait & unlock_all as replaceable for some platform Add bbt_wait & unlock_all as replaceable for some platform such as s3c64xx s3c64xx has its own OneNAND controller and another interface Signed-off-by: Kyungmin Park Signed-off-by: Andrew Morton Signed-off-by: David Woodhouse --- drivers/mtd/onenand/onenand_base.c | 12 ++++++++++-- include/linux/mtd/onenand.h | 5 +++++ 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c index 8d4c9c25373..864327ed7fb 100644 --- a/drivers/mtd/onenand/onenand_base.c +++ b/drivers/mtd/onenand/onenand_base.c @@ -1506,7 +1506,7 @@ int onenand_bbt_read_oob(struct mtd_info *mtd, loff_t from, onenand_update_bufferram(mtd, from, 0); - ret = onenand_bbt_wait(mtd, FL_READING); + ret = this->bbt_wait(mtd, FL_READING); if (unlikely(ret)) ret = onenand_recover_lsb(mtd, from, ret); @@ -2527,6 +2527,10 @@ static void onenand_unlock_all(struct mtd_info *mtd) & ONENAND_CTRL_ONGO) continue; + /* Don't check lock status */ + if (this->options & ONENAND_SKIP_UNLOCK_CHECK) + return; + /* Check lock status */ if (onenand_check_lock_status(this)) return; @@ -3442,6 +3446,10 @@ int onenand_scan(struct mtd_info *mtd, int maxchips) this->command = onenand_command; if (!this->wait) onenand_setup_wait(mtd); + if (!this->bbt_wait) + this->bbt_wait = onenand_bbt_wait; + if (!this->unlock_all) + this->unlock_all = onenand_unlock_all; if (!this->read_bufferram) this->read_bufferram = onenand_read_bufferram; @@ -3559,7 +3567,7 @@ int onenand_scan(struct mtd_info *mtd, int maxchips) mtd->owner = THIS_MODULE; /* Unlock whole block */ - onenand_unlock_all(mtd); + this->unlock_all(mtd); ret = this->scan_bbt(mtd); if ((!FLEXONENAND(this)) || ret) diff --git a/include/linux/mtd/onenand.h b/include/linux/mtd/onenand.h index 9aab82c1c74..8ed87337438 100644 --- a/include/linux/mtd/onenand.h +++ b/include/linux/mtd/onenand.h @@ -74,6 +74,8 @@ struct onenand_bufferram { * @command: [REPLACEABLE] hardware specific function for writing * commands to the chip * @wait: [REPLACEABLE] hardware specific function for wait on ready + * @bbt_wait: [REPLACEABLE] hardware specific function for bbt wait on ready + * @unlock_all: [REPLACEABLE] hardware specific function for unlock all * @read_bufferram: [REPLACEABLE] hardware specific function for BufferRAM Area * @write_bufferram: [REPLACEABLE] hardware specific function for BufferRAM Area * @read_word: [REPLACEABLE] hardware specific function for read @@ -118,6 +120,8 @@ struct onenand_chip { int (*command)(struct mtd_info *mtd, int cmd, loff_t address, size_t len); int (*wait)(struct mtd_info *mtd, int state); + int (*bbt_wait)(struct mtd_info *mtd, int state); + void (*unlock_all)(struct mtd_info *mtd); int (*read_bufferram)(struct mtd_info *mtd, int area, unsigned char *buffer, int offset, size_t count); int (*write_bufferram)(struct mtd_info *mtd, int area, @@ -184,6 +188,7 @@ struct onenand_chip { #define ONENAND_HAS_CONT_LOCK (0x0001) #define ONENAND_HAS_UNLOCK_ALL (0x0002) #define ONENAND_HAS_2PLANE (0x0004) +#define ONENAND_SKIP_UNLOCK_CHECK (0x0100) #define ONENAND_PAGEBUF_ALLOC (0x1000) #define ONENAND_OOBBUF_ALLOC (0x2000) -- cgit v1.2.3-70-g09d2 From a21ca2cac582886a3e95c8bb84ff7c52d4d15e54 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 6 Jun 2009 09:58:57 +0200 Subject: perf_counter: Separate out attr->type from attr->config Counter type is a frequently used value and we do a lot of bit juggling by encoding and decoding it from attr->config. Clean this up by creating a separate attr->type field. Also clean up the various similarly complex user-space bits all around counter attribute management. The net improvement is significant, and it will be easier to add a new major type (which is what triggered this cleanup). (This changes the ABI, all tools are adapted.) (PowerPC build-tested.) Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- Documentation/perf_counter/builtin-record.c | 105 ++++++++++------------ Documentation/perf_counter/builtin-stat.c | 76 +++++++--------- Documentation/perf_counter/builtin-top.c | 67 +++++--------- Documentation/perf_counter/perf.h | 2 - Documentation/perf_counter/util/parse-events.c | 120 ++++++++++++++----------- Documentation/perf_counter/util/parse-events.h | 7 +- arch/powerpc/kernel/perf_counter.c | 6 +- arch/x86/kernel/cpu/perf_counter.c | 8 +- include/linux/perf_counter.h | 65 +++----------- kernel/perf_counter.c | 14 ++- 10 files changed, 196 insertions(+), 274 deletions(-) (limited to 'include') diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c index c22ea0c7472..130fd88266b 100644 --- a/Documentation/perf_counter/builtin-record.c +++ b/Documentation/perf_counter/builtin-record.c @@ -20,10 +20,10 @@ #define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1) #define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask)) -static long default_interval = 100000; -static long event_count[MAX_COUNTERS]; - static int fd[MAX_NR_CPUS][MAX_COUNTERS]; + +static long default_interval = 100000; + static int nr_cpus = 0; static unsigned int page_size; static unsigned int mmap_pages = 128; @@ -38,22 +38,44 @@ static int inherit = 1; static int force = 0; static int append_file = 0; -const unsigned int default_count[] = { - 1000000, - 1000000, - 10000, - 10000, - 1000000, - 10000, +static long samples; +static struct timeval last_read; +static struct timeval this_read; + +static __u64 bytes_written; + +static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; + +static int nr_poll; +static int nr_cpu; + +struct mmap_event { + struct perf_event_header header; + __u32 pid; + __u32 tid; + __u64 start; + __u64 len; + __u64 pgoff; + char filename[PATH_MAX]; +}; + +struct comm_event { + struct perf_event_header header; + __u32 pid; + __u32 tid; + char comm[16]; }; + struct mmap_data { - int counter; - void *base; - unsigned int mask; - unsigned int prev; + int counter; + void *base; + unsigned int mask; + unsigned int prev; }; +static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; + static unsigned int mmap_read_head(struct mmap_data *md) { struct perf_counter_mmap_page *pc = md->base; @@ -65,11 +87,6 @@ static unsigned int mmap_read_head(struct mmap_data *md) return head; } -static long samples; -static struct timeval last_read, this_read; - -static __u64 bytes_written; - static void mmap_read(struct mmap_data *md) { unsigned int head = mmap_read_head(md); @@ -157,29 +174,6 @@ static void sig_handler(int sig) done = 1; } -static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; -static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; - -static int nr_poll; -static int nr_cpu; - -struct mmap_event { - struct perf_event_header header; - __u32 pid; - __u32 tid; - __u64 start; - __u64 len; - __u64 pgoff; - char filename[PATH_MAX]; -}; - -struct comm_event { - struct perf_event_header header; - __u32 pid; - __u32 tid; - char comm[16]; -}; - static void pid_synthesize_comm_event(pid_t pid, int full) { struct comm_event comm_ev; @@ -341,24 +335,21 @@ static int group_fd; static void create_counter(int counter, int cpu, pid_t pid) { - struct perf_counter_attr attr; + struct perf_counter_attr *attr = attrs + counter; int track = 1; - memset(&attr, 0, sizeof(attr)); - attr.config = event_id[counter]; - attr.sample_period = event_count[counter]; - attr.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_PERIOD; + attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_PERIOD; if (freq) { - attr.freq = 1; - attr.sample_freq = freq; + attr->freq = 1; + attr->sample_freq = freq; } - attr.mmap = track; - attr.comm = track; - attr.inherit = (cpu < 0) && inherit; + attr->mmap = track; + attr->comm = track; + attr->inherit = (cpu < 0) && inherit; track = 0; /* only the first counter needs these */ - fd[nr_cpu][counter] = sys_perf_counter_open(&attr, pid, cpu, group_fd, 0); + fd[nr_cpu][counter] = sys_perf_counter_open(attr, pid, cpu, group_fd, 0); if (fd[nr_cpu][counter] < 0) { int err = errno; @@ -542,16 +533,14 @@ int cmd_record(int argc, const char **argv, const char *prefix) if (!argc && target_pid == -1 && !system_wide) usage_with_options(record_usage, options); - if (!nr_counters) { + if (!nr_counters) nr_counters = 1; - event_id[0] = 0; - } for (counter = 0; counter < nr_counters; counter++) { - if (event_count[counter]) + if (attrs[counter].sample_period) continue; - event_count[counter] = default_interval; + attrs[counter].sample_period = default_interval; } return __cmd_record(argc, argv); diff --git a/Documentation/perf_counter/builtin-stat.c b/Documentation/perf_counter/builtin-stat.c index 4fc0d80440e..9711e552423 100644 --- a/Documentation/perf_counter/builtin-stat.c +++ b/Documentation/perf_counter/builtin-stat.c @@ -44,23 +44,22 @@ #include -static int system_wide = 0; -static int inherit = 1; +static struct perf_counter_attr default_attrs[MAX_COUNTERS] = { -static __u64 default_event_id[MAX_COUNTERS] = { - EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), - EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), - EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), - EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_TASK_CLOCK }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_CONTEXT_SWITCHES }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_CPU_MIGRATIONS }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_PAGE_FAULTS }, - EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), - EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), - EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), - EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_CPU_CYCLES }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_INSTRUCTIONS }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_CACHE_REFERENCES }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_CACHE_MISSES }, }; -static int default_interval = 100000; -static int event_count[MAX_COUNTERS]; +static int system_wide = 0; +static int inherit = 1; + static int fd[MAX_NR_CPUS][MAX_COUNTERS]; static int target_pid = -1; @@ -86,22 +85,16 @@ static __u64 walltime_nsecs; static void create_perfstat_counter(int counter) { - struct perf_counter_attr attr; - - memset(&attr, 0, sizeof(attr)); - attr.config = event_id[counter]; - attr.sample_type = 0; - attr.exclude_kernel = event_mask[counter] & EVENT_MASK_KERNEL; - attr.exclude_user = event_mask[counter] & EVENT_MASK_USER; + struct perf_counter_attr *attr = attrs + counter; if (scale) - attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | - PERF_FORMAT_TOTAL_TIME_RUNNING; + attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING; if (system_wide) { int cpu; for (cpu = 0; cpu < nr_cpus; cpu ++) { - fd[cpu][counter] = sys_perf_counter_open(&attr, -1, cpu, -1, 0); + fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0); if (fd[cpu][counter] < 0) { printf("perfstat error: syscall returned with %d (%s)\n", fd[cpu][counter], strerror(errno)); @@ -109,10 +102,10 @@ static void create_perfstat_counter(int counter) } } } else { - attr.inherit = inherit; - attr.disabled = 1; + attr->inherit = inherit; + attr->disabled = 1; - fd[0][counter] = sys_perf_counter_open(&attr, 0, -1, -1, 0); + fd[0][counter] = sys_perf_counter_open(attr, 0, -1, -1, 0); if (fd[0][counter] < 0) { printf("perfstat error: syscall returned with %d (%s)\n", fd[0][counter], strerror(errno)); @@ -126,9 +119,13 @@ static void create_perfstat_counter(int counter) */ static inline int nsec_counter(int counter) { - if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK)) + if (attrs[counter].type != PERF_TYPE_SOFTWARE) + return 0; + + if (attrs[counter].config == PERF_COUNT_CPU_CLOCK) return 1; - if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) + + if (attrs[counter].config == PERF_COUNT_TASK_CLOCK) return 1; return 0; @@ -177,7 +174,8 @@ static void read_counter(int counter) /* * Save the full runtime - to allow normalization during printout: */ - if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) + if (attrs[counter].type == PERF_TYPE_SOFTWARE && + attrs[counter].config == PERF_COUNT_TASK_CLOCK) runtime_nsecs = count[0]; } @@ -203,8 +201,8 @@ static void print_counter(int counter) fprintf(stderr, " %14.6f %-20s", msecs, event_name(counter)); - if (event_id[counter] == - EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) { + if (attrs[counter].type == PERF_TYPE_SOFTWARE && + attrs[counter].config == PERF_COUNT_TASK_CLOCK) { fprintf(stderr, " # %11.3f CPU utilization factor", (double)count[0] / (double)walltime_nsecs); @@ -300,8 +298,6 @@ static char events_help_msg[EVENTS_HELP_MAX]; static const struct option options[] = { OPT_CALLBACK('e', "event", NULL, "event", events_help_msg, parse_events), - OPT_INTEGER('c', "count", &default_interval, - "event period to sample"), OPT_BOOLEAN('i', "inherit", &inherit, "child tasks inherit counters"), OPT_INTEGER('p', "pid", &target_pid, @@ -315,27 +311,19 @@ static const struct option options[] = { int cmd_stat(int argc, const char **argv, const char *prefix) { - int counter; - page_size = sysconf(_SC_PAGE_SIZE); create_events_help(events_help_msg); - memcpy(event_id, default_event_id, sizeof(default_event_id)); + + memcpy(attrs, default_attrs, sizeof(attrs)); argc = parse_options(argc, argv, options, stat_usage, 0); if (!argc) usage_with_options(stat_usage, options); - if (!nr_counters) { + if (!nr_counters) nr_counters = 8; - } - - for (counter = 0; counter < nr_counters; counter++) { - if (event_count[counter]) - continue; - event_count[counter] = default_interval; - } nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); assert(nr_cpus <= MAX_NR_CPUS); assert(nr_cpus >= 0); diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c index b2f480b5a13..98a6d53e17b 100644 --- a/Documentation/perf_counter/builtin-top.c +++ b/Documentation/perf_counter/builtin-top.c @@ -48,22 +48,11 @@ #include #include -static int system_wide = 0; +static int fd[MAX_NR_CPUS][MAX_COUNTERS]; -static __u64 default_event_id[MAX_COUNTERS] = { - EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), - EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), - EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), - EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), +static int system_wide = 0; - EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), - EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), - EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), - EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), -}; -static int default_interval = 100000; -static int event_count[MAX_COUNTERS]; -static int fd[MAX_NR_CPUS][MAX_COUNTERS]; +static int default_interval = 100000; static __u64 count_filter = 5; static int print_entries = 15; @@ -85,15 +74,6 @@ static int delay_secs = 2; static int zero; static int dump_symtab; -static const unsigned int default_count[] = { - 1000000, - 1000000, - 10000, - 10000, - 1000000, - 10000, -}; - /* * Symbols */ @@ -112,7 +92,7 @@ struct sym_entry { struct sym_entry *sym_filter_entry; -struct dso *kernel_dso; +struct dso *kernel_dso; /* * Symbols will be added here in record_ip and will get out @@ -213,7 +193,7 @@ static void print_sym_table(void) 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec))); if (nr_counters == 1) { - printf("%d", event_count[0]); + printf("%Ld", attrs[0].sample_period); if (freq) printf("Hz "); else @@ -421,10 +401,10 @@ static void process_event(uint64_t ip, int counter) } struct mmap_data { - int counter; - void *base; - unsigned int mask; - unsigned int prev; + int counter; + void *base; + unsigned int mask; + unsigned int prev; }; static unsigned int mmap_read_head(struct mmap_data *md) @@ -539,7 +519,7 @@ static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; static int __cmd_top(void) { - struct perf_counter_attr attr; + struct perf_counter_attr *attr; pthread_t thread; int i, counter, group_fd, nr_poll = 0; unsigned int cpu; @@ -553,13 +533,12 @@ static int __cmd_top(void) if (target_pid == -1 && profile_cpu == -1) cpu = i; - memset(&attr, 0, sizeof(attr)); - attr.config = event_id[counter]; - attr.sample_period = event_count[counter]; - attr.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; - attr.freq = freq; + attr = attrs + counter; - fd[i][counter] = sys_perf_counter_open(&attr, target_pid, cpu, group_fd, 0); + attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; + attr->freq = freq; + + fd[i][counter] = sys_perf_counter_open(attr, target_pid, cpu, group_fd, 0); if (fd[i][counter] < 0) { int err = errno; @@ -670,7 +649,6 @@ int cmd_top(int argc, const char **argv, const char *prefix) page_size = sysconf(_SC_PAGE_SIZE); create_events_help(events_help_msg); - memcpy(event_id, default_event_id, sizeof(default_event_id)); argc = parse_options(argc, argv, options, top_usage, 0); if (argc) @@ -688,19 +666,22 @@ int cmd_top(int argc, const char **argv, const char *prefix) profile_cpu = -1; } - if (!nr_counters) { + if (!nr_counters) nr_counters = 1; - event_id[0] = 0; - } if (delay_secs < 1) delay_secs = 1; + parse_symbols(); + + /* + * Fill in the ones not specifically initialized via -c: + */ for (counter = 0; counter < nr_counters; counter++) { - if (event_count[counter]) + if (attrs[counter].sample_period) continue; - event_count[counter] = default_interval; + attrs[counter].sample_period = default_interval; } nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); @@ -710,7 +691,5 @@ int cmd_top(int argc, const char **argv, const char *prefix) if (target_pid != -1 || profile_cpu != -1) nr_cpus = 1; - parse_symbols(); - return __cmd_top(); } diff --git a/Documentation/perf_counter/perf.h b/Documentation/perf_counter/perf.h index 10622a48b40..af0a5046d74 100644 --- a/Documentation/perf_counter/perf.h +++ b/Documentation/perf_counter/perf.h @@ -64,6 +64,4 @@ sys_perf_counter_open(struct perf_counter_attr *attr_uptr, #define MAX_COUNTERS 256 #define MAX_NR_CPUS 256 -#define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id)) - #endif diff --git a/Documentation/perf_counter/util/parse-events.c b/Documentation/perf_counter/util/parse-events.c index 2fdfd1d923f..eb56bd99657 100644 --- a/Documentation/perf_counter/util/parse-events.c +++ b/Documentation/perf_counter/util/parse-events.c @@ -6,37 +6,39 @@ #include "exec_cmd.h" #include "string.h" -int nr_counters; +int nr_counters; -__u64 event_id[MAX_COUNTERS] = { }; -int event_mask[MAX_COUNTERS]; +struct perf_counter_attr attrs[MAX_COUNTERS]; struct event_symbol { - __u64 event; - char *symbol; + __u8 type; + __u64 config; + char *symbol; }; +#define C(x, y) .type = PERF_TYPE_##x, .config = PERF_COUNT_##y + static struct event_symbol event_symbols[] = { - {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cpu-cycles", }, - {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cycles", }, - {EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), "instructions", }, - {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), "cache-references", }, - {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), "cache-misses", }, - {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branch-instructions", }, - {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branches", }, - {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES), "branch-misses", }, - {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES), "bus-cycles", }, - - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK), "cpu-clock", }, - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), "task-clock", }, - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "page-faults", }, - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "faults", }, - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN), "minor-faults", }, - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ), "major-faults", }, - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "context-switches", }, - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "cs", }, - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "cpu-migrations", }, - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "migrations", }, + { C(HARDWARE, CPU_CYCLES), "cpu-cycles", }, + { C(HARDWARE, CPU_CYCLES), "cycles", }, + { C(HARDWARE, INSTRUCTIONS), "instructions", }, + { C(HARDWARE, CACHE_REFERENCES), "cache-references", }, + { C(HARDWARE, CACHE_MISSES), "cache-misses", }, + { C(HARDWARE, BRANCH_INSTRUCTIONS), "branch-instructions", }, + { C(HARDWARE, BRANCH_INSTRUCTIONS), "branches", }, + { C(HARDWARE, BRANCH_MISSES), "branch-misses", }, + { C(HARDWARE, BUS_CYCLES), "bus-cycles", }, + + { C(SOFTWARE, CPU_CLOCK), "cpu-clock", }, + { C(SOFTWARE, TASK_CLOCK), "task-clock", }, + { C(SOFTWARE, PAGE_FAULTS), "page-faults", }, + { C(SOFTWARE, PAGE_FAULTS), "faults", }, + { C(SOFTWARE, PAGE_FAULTS_MIN), "minor-faults", }, + { C(SOFTWARE, PAGE_FAULTS_MAJ), "major-faults", }, + { C(SOFTWARE, CONTEXT_SWITCHES), "context-switches", }, + { C(SOFTWARE, CONTEXT_SWITCHES), "cs", }, + { C(SOFTWARE, CPU_MIGRATIONS), "cpu-migrations", }, + { C(SOFTWARE, CPU_MIGRATIONS), "migrations", }, }; #define __PERF_COUNTER_FIELD(config, name) \ @@ -67,27 +69,26 @@ static char *sw_event_names[] = { "major faults", }; -char *event_name(int ctr) +char *event_name(int counter) { - __u64 config = event_id[ctr]; - int type = PERF_COUNTER_TYPE(config); - int id = PERF_COUNTER_ID(config); + __u64 config = attrs[counter].config; + int type = attrs[counter].type; static char buf[32]; - if (PERF_COUNTER_RAW(config)) { - sprintf(buf, "raw 0x%llx", PERF_COUNTER_CONFIG(config)); + if (attrs[counter].type == PERF_TYPE_RAW) { + sprintf(buf, "raw 0x%llx", config); return buf; } switch (type) { case PERF_TYPE_HARDWARE: - if (id < PERF_HW_EVENTS_MAX) - return hw_event_names[id]; + if (config < PERF_HW_EVENTS_MAX) + return hw_event_names[config]; return "unknown-hardware"; case PERF_TYPE_SOFTWARE: - if (id < PERF_SW_EVENTS_MAX) - return sw_event_names[id]; + if (config < PERF_SW_EVENTS_MAX) + return sw_event_names[config]; return "unknown-software"; default: @@ -101,15 +102,19 @@ char *event_name(int ctr) * Each event can have multiple symbolic names. * Symbolic names are (almost) exactly matched. */ -static __u64 match_event_symbols(const char *str) +static int match_event_symbols(const char *str, struct perf_counter_attr *attr) { __u64 config, id; int type; unsigned int i; const char *sep, *pstr; - if (str[0] == 'r' && hex2u64(str + 1, &config) > 0) - return config | PERF_COUNTER_RAW_MASK; + if (str[0] == 'r' && hex2u64(str + 1, &config) > 0) { + attr->type = PERF_TYPE_RAW; + attr->config = config; + + return 0; + } pstr = str; sep = strchr(pstr, ':'); @@ -121,35 +126,45 @@ static __u64 match_event_symbols(const char *str) if (sep) { pstr = sep + 1; if (strchr(pstr, 'k')) - event_mask[nr_counters] |= EVENT_MASK_USER; + attr->exclude_user = 1; if (strchr(pstr, 'u')) - event_mask[nr_counters] |= EVENT_MASK_KERNEL; + attr->exclude_kernel = 1; } - return EID(type, id); + attr->type = type; + attr->config = id; + + return 0; } for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { if (!strncmp(str, event_symbols[i].symbol, - strlen(event_symbols[i].symbol))) - return event_symbols[i].event; + strlen(event_symbols[i].symbol))) { + + attr->type = event_symbols[i].type; + attr->config = event_symbols[i].config; + + return 0; + } } - return ~0ULL; + return -EINVAL; } int parse_events(const struct option *opt, const char *str, int unset) { - __u64 config; + struct perf_counter_attr attr; + int ret; + memset(&attr, 0, sizeof(attr)); again: if (nr_counters == MAX_COUNTERS) return -1; - config = match_event_symbols(str); - if (config == ~0ULL) - return -1; + ret = match_event_symbols(str, &attr); + if (ret < 0) + return ret; - event_id[nr_counters] = config; + attrs[nr_counters] = attr; nr_counters++; str = strstr(str, ","); @@ -168,7 +183,6 @@ void create_events_help(char *events_help_msg) { unsigned int i; char *str; - __u64 e; str = events_help_msg; @@ -178,9 +192,8 @@ void create_events_help(char *events_help_msg) for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { int type, id; - e = event_symbols[i].event; - type = PERF_COUNTER_TYPE(e); - id = PERF_COUNTER_ID(e); + type = event_symbols[i].type; + id = event_symbols[i].config; if (i) str += sprintf(str, "|"); @@ -191,4 +204,3 @@ void create_events_help(char *events_help_msg) str += sprintf(str, "|rNNN]"); } - diff --git a/Documentation/perf_counter/util/parse-events.h b/Documentation/perf_counter/util/parse-events.h index 0da306bb902..542971c495b 100644 --- a/Documentation/perf_counter/util/parse-events.h +++ b/Documentation/perf_counter/util/parse-events.h @@ -3,12 +3,9 @@ * Parse symbolic events/counts passed in as options: */ -extern int nr_counters; -extern __u64 event_id[MAX_COUNTERS]; -extern int event_mask[MAX_COUNTERS]; +extern int nr_counters; -#define EVENT_MASK_KERNEL 1 -#define EVENT_MASK_USER 2 +extern struct perf_counter_attr attrs[MAX_COUNTERS]; extern char *event_name(int ctr); diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 232b00a36f7..4786ad9a288 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -867,13 +867,13 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) if (!ppmu) return ERR_PTR(-ENXIO); - if (!perf_event_raw(&counter->attr)) { - ev = perf_event_id(&counter->attr); + if (counter->attr.type != PERF_TYPE_RAW) { + ev = counter->attr.config; if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) return ERR_PTR(-EOPNOTSUPP); ev = ppmu->generic_events[ev]; } else { - ev = perf_event_config(&counter->attr); + ev = counter->attr.config; } counter->hw.config_base = ev; counter->hw.idx = 0; diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 8f53f3a7da2..430e048f285 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -292,15 +292,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter) /* * Raw event type provide the config in the event structure */ - if (perf_event_raw(attr)) { - hwc->config |= x86_pmu.raw_event(perf_event_config(attr)); + if (attr->type == PERF_TYPE_RAW) { + hwc->config |= x86_pmu.raw_event(attr->config); } else { - if (perf_event_id(attr) >= x86_pmu.max_events) + if (attr->config >= x86_pmu.max_events) return -EINVAL; /* * The generic map: */ - hwc->config |= x86_pmu.event_map(perf_event_id(attr)); + hwc->config |= x86_pmu.event_map(attr->config); } counter->destroy = hw_perf_counter_destroy; diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 4f9d39ecdc0..f794c69b34c 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -73,26 +73,6 @@ enum sw_event_ids { PERF_SW_EVENTS_MAX = 7, }; -#define __PERF_COUNTER_MASK(name) \ - (((1ULL << PERF_COUNTER_##name##_BITS) - 1) << \ - PERF_COUNTER_##name##_SHIFT) - -#define PERF_COUNTER_RAW_BITS 1 -#define PERF_COUNTER_RAW_SHIFT 63 -#define PERF_COUNTER_RAW_MASK __PERF_COUNTER_MASK(RAW) - -#define PERF_COUNTER_CONFIG_BITS 63 -#define PERF_COUNTER_CONFIG_SHIFT 0 -#define PERF_COUNTER_CONFIG_MASK __PERF_COUNTER_MASK(CONFIG) - -#define PERF_COUNTER_TYPE_BITS 7 -#define PERF_COUNTER_TYPE_SHIFT 56 -#define PERF_COUNTER_TYPE_MASK __PERF_COUNTER_MASK(TYPE) - -#define PERF_COUNTER_EVENT_BITS 56 -#define PERF_COUNTER_EVENT_SHIFT 0 -#define PERF_COUNTER_EVENT_MASK __PERF_COUNTER_MASK(EVENT) - /* * Bits that can be set in attr.sample_type to request information * in the overflow packets. @@ -125,10 +105,13 @@ enum perf_counter_read_format { */ struct perf_counter_attr { /* - * The MSB of the config word signifies if the rest contains cpu - * specific (raw) counter configuration data, if unset, the next - * 7 bits are an event type and the rest of the bits are the event - * identifier. + * Major type: hardware/software/tracepoint/etc. + */ + __u32 type; + __u32 __reserved_1; + + /* + * Type specific configuration information. */ __u64 config; @@ -152,12 +135,11 @@ struct perf_counter_attr { comm : 1, /* include comm data */ freq : 1, /* use freq, not period */ - __reserved_1 : 53; + __reserved_2 : 53; __u32 wakeup_events; /* wakeup every n events */ - __u32 __reserved_2; + __u32 __reserved_3; - __u64 __reserved_3; __u64 __reserved_4; }; @@ -278,8 +260,8 @@ enum perf_event_type { /* * struct { - * struct perf_event_header header; - * u32 pid, ppid; + * struct perf_event_header header; + * u32 pid, ppid; * }; */ PERF_EVENT_FORK = 7, @@ -331,27 +313,6 @@ enum perf_event_type { struct task_struct; -static inline u64 perf_event_raw(struct perf_counter_attr *attr) -{ - return attr->config & PERF_COUNTER_RAW_MASK; -} - -static inline u64 perf_event_config(struct perf_counter_attr *attr) -{ - return attr->config & PERF_COUNTER_CONFIG_MASK; -} - -static inline u64 perf_event_type(struct perf_counter_attr *attr) -{ - return (attr->config & PERF_COUNTER_TYPE_MASK) >> - PERF_COUNTER_TYPE_SHIFT; -} - -static inline u64 perf_event_id(struct perf_counter_attr *attr) -{ - return attr->config & PERF_COUNTER_EVENT_MASK; -} - /** * struct hw_perf_counter - performance counter hardware details: */ @@ -616,8 +577,8 @@ extern int perf_counter_overflow(struct perf_counter *counter, */ static inline int is_software_counter(struct perf_counter *counter) { - return !perf_event_raw(&counter->attr) && - perf_event_type(&counter->attr) != PERF_TYPE_HARDWARE; + return (counter->attr.type != PERF_TYPE_RAW) && + (counter->attr.type != PERF_TYPE_HARDWARE); } extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64); diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 47c92fb927f..75ae76796df 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -3091,14 +3091,12 @@ static int perf_swcounter_match(struct perf_counter *counter, enum perf_event_types type, u32 event, struct pt_regs *regs) { - u64 event_config; - - event_config = ((u64) type << PERF_COUNTER_TYPE_SHIFT) | event; - if (!perf_swcounter_is_counting(counter)) return 0; - if (counter->attr.config != event_config) + if (counter->attr.type != type) + return 0; + if (counter->attr.config != event) return 0; if (regs) { @@ -3403,7 +3401,7 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) * to be kernel events, and page faults are never hypervisor * events. */ - switch (perf_event_id(&counter->attr)) { + switch (counter->attr.config) { case PERF_COUNT_CPU_CLOCK: pmu = &perf_ops_cpu_clock; @@ -3496,12 +3494,12 @@ perf_counter_alloc(struct perf_counter_attr *attr, if (attr->inherit && (attr->sample_type & PERF_SAMPLE_GROUP)) goto done; - if (perf_event_raw(attr)) { + if (attr->type == PERF_TYPE_RAW) { pmu = hw_perf_counter_init(counter); goto done; } - switch (perf_event_type(attr)) { + switch (attr->type) { case PERF_TYPE_HARDWARE: pmu = hw_perf_counter_init(counter); break; -- cgit v1.2.3-70-g09d2 From 8326f44da090d6d304d29b9fdc7fb3e20889e329 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 5 Jun 2009 20:22:46 +0200 Subject: perf_counter: Implement generalized cache event types Extend generic event enumeration with the PERF_TYPE_HW_CACHE method. This is a 3-dimensional space: { L1-D, L1-I, L2, ITLB, DTLB, BPU } x { load, store, prefetch } x { accesses, misses } User-space passes in the 3 coordinates and the kernel provides a counter. (if the hardware supports that type and if the combination makes sense.) Combinations that make no sense produce a -EINVAL. Combinations that are not supported by the hardware produce -ENOTSUP. Extend the tools to deal with this, and rewrite the event symbol parsing code with various popular aliases for the units and access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are both valid aliases. ( x86 is supported for now, with the Nehalem event table filled in, and with Core2 and Atom having placeholder tables. ) Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- Documentation/perf_counter/util/parse-events.c | 104 ++++++++++++- arch/x86/kernel/cpu/perf_counter.c | 201 ++++++++++++++++++++++++- include/linux/perf_counter.h | 34 +++++ kernel/perf_counter.c | 1 + 4 files changed, 329 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/Documentation/perf_counter/util/parse-events.c b/Documentation/perf_counter/util/parse-events.c index eb56bd99657..de9a77c4715 100644 --- a/Documentation/perf_counter/util/parse-events.c +++ b/Documentation/perf_counter/util/parse-events.c @@ -6,6 +6,8 @@ #include "exec_cmd.h" #include "string.h" +extern char *strcasestr(const char *haystack, const char *needle); + int nr_counters; struct perf_counter_attr attrs[MAX_COUNTERS]; @@ -17,6 +19,7 @@ struct event_symbol { }; #define C(x, y) .type = PERF_TYPE_##x, .config = PERF_COUNT_##y +#define CR(x, y) .type = PERF_TYPE_##x, .config = y static struct event_symbol event_symbols[] = { { C(HARDWARE, CPU_CYCLES), "cpu-cycles", }, @@ -69,6 +72,28 @@ static char *sw_event_names[] = { "major faults", }; +#define MAX_ALIASES 8 + +static char *hw_cache [][MAX_ALIASES] = { + { "l1-d" , "l1d" , "l1", "l1-data-cache" }, + { "l1-i" , "l1i" , "l1-instruction-cache" }, + { "l2" , }, + { "dtlb", }, + { "itlb", }, + { "bpu" , "btb", "branch-cache", NULL }, +}; + +static char *hw_cache_op [][MAX_ALIASES] = { + { "read" , "load" }, + { "write" , "store" }, + { "prefetch" , "speculative-read", "speculative-load" }, +}; + +static char *hw_cache_result [][MAX_ALIASES] = { + { "access", "ops" }, + { "miss", }, +}; + char *event_name(int counter) { __u64 config = attrs[counter].config; @@ -86,6 +111,30 @@ char *event_name(int counter) return hw_event_names[config]; return "unknown-hardware"; + case PERF_TYPE_HW_CACHE: { + __u8 cache_type, cache_op, cache_result; + static char name[100]; + + cache_type = (config >> 0) & 0xff; + if (cache_type > PERF_COUNT_HW_CACHE_MAX) + return "unknown-ext-hardware-cache-type"; + + cache_op = (config >> 8) & 0xff; + if (cache_type > PERF_COUNT_HW_CACHE_OP_MAX) + return "unknown-ext-hardware-cache-op-type"; + + cache_result = (config >> 16) & 0xff; + if (cache_type > PERF_COUNT_HW_CACHE_RESULT_MAX) + return "unknown-ext-hardware-cache-result-type"; + + sprintf(name, "%s:%s:%s", + hw_cache[cache_type][0], + hw_cache_op[cache_op][0], + hw_cache_result[cache_result][0]); + + return name; + } + case PERF_TYPE_SOFTWARE: if (config < PERF_SW_EVENTS_MAX) return sw_event_names[config]; @@ -98,11 +147,60 @@ char *event_name(int counter) return "unknown"; } +static int parse_aliases(const char *str, char *names[][MAX_ALIASES], int size) +{ + int i, j; + + for (i = 0; i < size; i++) { + for (j = 0; j < MAX_ALIASES; j++) { + if (!names[i][j]) + break; + if (strcasestr(str, names[i][j])) + return i; + } + } + + return 0; +} + +static int parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr) +{ + __u8 cache_type = -1, cache_op = 0, cache_result = 0; + + cache_type = parse_aliases(str, hw_cache, PERF_COUNT_HW_CACHE_MAX); + /* + * No fallback - if we cannot get a clear cache type + * then bail out: + */ + if (cache_type == -1) + return -EINVAL; + + cache_op = parse_aliases(str, hw_cache_op, PERF_COUNT_HW_CACHE_OP_MAX); + /* + * Fall back to reads: + */ + if (cache_type == -1) + cache_type = PERF_COUNT_HW_CACHE_OP_READ; + + cache_result = parse_aliases(str, hw_cache_result, + PERF_COUNT_HW_CACHE_RESULT_MAX); + /* + * Fall back to accesses: + */ + if (cache_result == -1) + cache_result = PERF_COUNT_HW_CACHE_RESULT_ACCESS; + + attr->config = cache_type | (cache_op << 8) | (cache_result << 16); + attr->type = PERF_TYPE_HW_CACHE; + + return 0; +} + /* * Each event can have multiple symbolic names. * Symbolic names are (almost) exactly matched. */ -static int match_event_symbols(const char *str, struct perf_counter_attr *attr) +static int parse_event_symbols(const char *str, struct perf_counter_attr *attr) { __u64 config, id; int type; @@ -147,7 +245,7 @@ static int match_event_symbols(const char *str, struct perf_counter_attr *attr) } } - return -EINVAL; + return parse_generic_hw_symbols(str, attr); } int parse_events(const struct option *opt, const char *str, int unset) @@ -160,7 +258,7 @@ again: if (nr_counters == MAX_COUNTERS) return -1; - ret = match_event_symbols(str, &attr); + ret = parse_event_symbols(str, &attr); if (ret < 0) return ret; diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 430e048f285..e86679fa521 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -83,6 +83,128 @@ static u64 intel_pmu_event_map(int event) return intel_perfmon_event_map[event]; } +/* + * Generalized hw caching related event table, filled + * in on a per model basis. A value of 0 means + * 'not supported', -1 means 'event makes no sense on + * this CPU', any other value means the raw event + * ID. + */ + +#define C(x) PERF_COUNT_HW_CACHE_##x + +static u64 __read_mostly hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; + +static const u64 nehalem_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ + [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ + [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ + [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0480, /* L1I.READS */ + [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(L2 ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ + [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ + [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0xc024, /* L2_RQSTS.PREFETCHES */ + [ C(RESULT_MISS) ] = 0x8024, /* L2_RQSTS.PREFETCH_MISS */ + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ + [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ + [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ + [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISS_RETIRED */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ + [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + +static const u64 core2_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + /* To be filled in */ +}; + +static const u64 atom_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + /* To be filled in */ +}; + static u64 intel_pmu_raw_event(u64 event) { #define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL @@ -246,6 +368,39 @@ static inline int x86_pmu_initialized(void) return x86_pmu.handle_irq != NULL; } +static inline int +set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr) +{ + unsigned int cache_type, cache_op, cache_result; + u64 config, val; + + config = attr->config; + + cache_type = (config >> 0) & 0xff; + if (cache_type >= PERF_COUNT_HW_CACHE_MAX) + return -EINVAL; + + cache_op = (config >> 8) & 0xff; + if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) + return -EINVAL; + + cache_result = (config >> 16) & 0xff; + if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return -EINVAL; + + val = hw_cache_event_ids[cache_type][cache_op][cache_result]; + + if (val == 0) + return -ENOENT; + + if (val == -1) + return -EINVAL; + + hwc->config |= val; + + return 0; +} + /* * Setup the hardware configuration for a given attr_type */ @@ -288,22 +443,25 @@ static int __hw_perf_counter_init(struct perf_counter *counter) hwc->sample_period = x86_pmu.max_period; atomic64_set(&hwc->period_left, hwc->sample_period); + counter->destroy = hw_perf_counter_destroy; /* * Raw event type provide the config in the event structure */ if (attr->type == PERF_TYPE_RAW) { hwc->config |= x86_pmu.raw_event(attr->config); - } else { - if (attr->config >= x86_pmu.max_events) - return -EINVAL; - /* - * The generic map: - */ - hwc->config |= x86_pmu.event_map(attr->config); + return 0; } - counter->destroy = hw_perf_counter_destroy; + if (attr->type == PERF_TYPE_HW_CACHE) + return set_ext_hw_attr(hwc, attr); + + if (attr->config >= x86_pmu.max_events) + return -EINVAL; + /* + * The generic map: + */ + hwc->config |= x86_pmu.event_map(attr->config); return 0; } @@ -989,6 +1147,33 @@ static int intel_pmu_init(void) rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); + /* + * Nehalem: + */ + switch (boot_cpu_data.x86_model) { + case 17: + memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, + sizeof(u64)*PERF_COUNT_HW_CACHE_MAX* + PERF_COUNT_HW_CACHE_OP_MAX*PERF_COUNT_HW_CACHE_RESULT_MAX); + + pr_info("... installed Core2 event tables\n"); + break; + default: + case 26: + memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, + sizeof(u64)*PERF_COUNT_HW_CACHE_MAX* + PERF_COUNT_HW_CACHE_OP_MAX*PERF_COUNT_HW_CACHE_RESULT_MAX); + + pr_info("... installed Nehalem/Corei7 event tables\n"); + break; + case 28: + memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, + sizeof(u64)*PERF_COUNT_HW_CACHE_MAX* + PERF_COUNT_HW_CACHE_OP_MAX*PERF_COUNT_HW_CACHE_RESULT_MAX); + + pr_info("... installed Atom event tables\n"); + break; + } return 0; } diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index f794c69b34c..3586df840f6 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -28,6 +28,7 @@ enum perf_event_types { PERF_TYPE_HARDWARE = 0, PERF_TYPE_SOFTWARE = 1, PERF_TYPE_TRACEPOINT = 2, + PERF_TYPE_HW_CACHE = 3, /* * available TYPE space, raw is the max value. @@ -55,6 +56,39 @@ enum attr_ids { PERF_HW_EVENTS_MAX = 7, }; +/* + * Generalized hardware cache counters: + * + * { L1-D, L1-I, L2, LLC, ITLB, DTLB, BPU } x + * { read, write, prefetch } x + * { accesses, misses } + */ +enum hw_cache_id { + PERF_COUNT_HW_CACHE_L1D, + PERF_COUNT_HW_CACHE_L1I, + PERF_COUNT_HW_CACHE_L2, + PERF_COUNT_HW_CACHE_DTLB, + PERF_COUNT_HW_CACHE_ITLB, + PERF_COUNT_HW_CACHE_BPU, + + PERF_COUNT_HW_CACHE_MAX, +}; + +enum hw_cache_op_id { + PERF_COUNT_HW_CACHE_OP_READ, + PERF_COUNT_HW_CACHE_OP_WRITE, + PERF_COUNT_HW_CACHE_OP_PREFETCH, + + PERF_COUNT_HW_CACHE_OP_MAX, +}; + +enum hw_cache_op_result_id { + PERF_COUNT_HW_CACHE_RESULT_ACCESS, + PERF_COUNT_HW_CACHE_RESULT_MISS, + + PERF_COUNT_HW_CACHE_RESULT_MAX, +}; + /* * Special "software" counters provided by the kernel, even if the hardware * does not support performance counters. These counters measure various diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 75ae76796df..5eacaaf3f9c 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -3501,6 +3501,7 @@ perf_counter_alloc(struct perf_counter_attr *attr, switch (attr->type) { case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: pmu = hw_perf_counter_init(counter); break; -- cgit v1.2.3-70-g09d2 From e5110d011e03030926872457f05e49e3d5031737 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sat, 6 Jun 2009 18:35:27 +0200 Subject: firewire: add parent-of-unit accessor Retrieval of an fw_unit's parent is a common pattern in high-level code. Wrap it up as device = fw_parent_device(unit). Signed-off-by: Stefan Richter --- drivers/firewire/core-device.c | 4 ++-- drivers/firewire/sbp2.c | 37 +++++++++++++++++++++---------------- include/linux/firewire.h | 5 +++++ 3 files changed, 28 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c index 65d84dd6c1d..3f4e646367b 100644 --- a/drivers/firewire/core-device.c +++ b/drivers/firewire/core-device.c @@ -93,7 +93,7 @@ static int fw_unit_match(struct device *dev, struct device_driver *drv) if (!is_fw_unit(dev)) return 0; - device = fw_device(unit->device.parent); + device = fw_parent_device(unit); id = container_of(drv, struct fw_driver, driver)->id_table; for (; id->match_flags != 0; id++) { @@ -114,7 +114,7 @@ static int fw_unit_match(struct device *dev, struct device_driver *drv) static int get_modalias(struct fw_unit *unit, char *buffer, size_t buffer_size) { - struct fw_device *device = fw_device(unit->device.parent); + struct fw_device *device = fw_parent_device(unit); struct fw_csr_iterator ci; int key, value; diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c index d41cb6e455b..2353643721c 100644 --- a/drivers/firewire/sbp2.c +++ b/drivers/firewire/sbp2.c @@ -180,6 +180,11 @@ struct sbp2_target { int blocked; /* ditto */ }; +static struct fw_device *target_device(struct sbp2_target *tgt) +{ + return fw_parent_device(tgt->unit); +} + /* Impossible login_id, to detect logout attempt before successful login */ #define INVALID_LOGIN_ID 0x10000 @@ -488,7 +493,7 @@ static void complete_transaction(struct fw_card *card, int rcode, static void sbp2_send_orb(struct sbp2_orb *orb, struct sbp2_logical_unit *lu, int node_id, int generation, u64 offset) { - struct fw_device *device = fw_device(lu->tgt->unit->device.parent); + struct fw_device *device = target_device(lu->tgt); unsigned long flags; orb->pointer.high = 0; @@ -510,7 +515,7 @@ static void sbp2_send_orb(struct sbp2_orb *orb, struct sbp2_logical_unit *lu, static int sbp2_cancel_orbs(struct sbp2_logical_unit *lu) { - struct fw_device *device = fw_device(lu->tgt->unit->device.parent); + struct fw_device *device = target_device(lu->tgt); struct sbp2_orb *orb, *next; struct list_head list; unsigned long flags; @@ -548,7 +553,7 @@ static int sbp2_send_management_orb(struct sbp2_logical_unit *lu, int node_id, int generation, int function, int lun_or_login_id, void *response) { - struct fw_device *device = fw_device(lu->tgt->unit->device.parent); + struct fw_device *device = target_device(lu->tgt); struct sbp2_management_orb *orb; unsigned int timeout; int retval = -ENOMEM; @@ -644,7 +649,7 @@ static int sbp2_send_management_orb(struct sbp2_logical_unit *lu, int node_id, static void sbp2_agent_reset(struct sbp2_logical_unit *lu) { - struct fw_device *device = fw_device(lu->tgt->unit->device.parent); + struct fw_device *device = target_device(lu->tgt); __be32 d = 0; fw_run_transaction(device->card, TCODE_WRITE_QUADLET_REQUEST, @@ -661,7 +666,7 @@ static void complete_agent_reset_write_no_wait(struct fw_card *card, static void sbp2_agent_reset_no_wait(struct sbp2_logical_unit *lu) { - struct fw_device *device = fw_device(lu->tgt->unit->device.parent); + struct fw_device *device = target_device(lu->tgt); struct fw_transaction *t; static __be32 d; @@ -700,7 +705,7 @@ static inline void sbp2_allow_block(struct sbp2_logical_unit *lu) static void sbp2_conditionally_block(struct sbp2_logical_unit *lu) { struct sbp2_target *tgt = lu->tgt; - struct fw_card *card = fw_device(tgt->unit->device.parent)->card; + struct fw_card *card = target_device(tgt)->card; struct Scsi_Host *shost = container_of((void *)tgt, struct Scsi_Host, hostdata[0]); unsigned long flags; @@ -724,7 +729,7 @@ static void sbp2_conditionally_block(struct sbp2_logical_unit *lu) static void sbp2_conditionally_unblock(struct sbp2_logical_unit *lu) { struct sbp2_target *tgt = lu->tgt; - struct fw_card *card = fw_device(tgt->unit->device.parent)->card; + struct fw_card *card = target_device(tgt)->card; struct Scsi_Host *shost = container_of((void *)tgt, struct Scsi_Host, hostdata[0]); unsigned long flags; @@ -749,7 +754,7 @@ static void sbp2_conditionally_unblock(struct sbp2_logical_unit *lu) */ static void sbp2_unblock(struct sbp2_target *tgt) { - struct fw_card *card = fw_device(tgt->unit->device.parent)->card; + struct fw_card *card = target_device(tgt)->card; struct Scsi_Host *shost = container_of((void *)tgt, struct Scsi_Host, hostdata[0]); unsigned long flags; @@ -779,7 +784,7 @@ static void sbp2_release_target(struct kref *kref) struct Scsi_Host *shost = container_of((void *)tgt, struct Scsi_Host, hostdata[0]); struct scsi_device *sdev; - struct fw_device *device = fw_device(tgt->unit->device.parent); + struct fw_device *device = target_device(tgt); /* prevent deadlocks */ sbp2_unblock(tgt); @@ -852,7 +857,7 @@ static void sbp2_queue_work(struct sbp2_logical_unit *lu, unsigned long delay) */ static void sbp2_set_busy_timeout(struct sbp2_logical_unit *lu) { - struct fw_device *device = fw_device(lu->tgt->unit->device.parent); + struct fw_device *device = target_device(lu->tgt); __be32 d = cpu_to_be32(SBP2_CYCLE_LIMIT | SBP2_RETRY_LIMIT); fw_run_transaction(device->card, TCODE_WRITE_QUADLET_REQUEST, @@ -868,7 +873,7 @@ static void sbp2_login(struct work_struct *work) struct sbp2_logical_unit *lu = container_of(work, struct sbp2_logical_unit, work.work); struct sbp2_target *tgt = lu->tgt; - struct fw_device *device = fw_device(tgt->unit->device.parent); + struct fw_device *device = target_device(tgt); struct Scsi_Host *shost; struct scsi_device *sdev; struct sbp2_login_response response; @@ -1116,7 +1121,7 @@ static struct scsi_host_template scsi_driver_template; static int sbp2_probe(struct device *dev) { struct fw_unit *unit = fw_unit(dev); - struct fw_device *device = fw_device(unit->device.parent); + struct fw_device *device = fw_parent_device(unit); struct sbp2_target *tgt; struct sbp2_logical_unit *lu; struct Scsi_Host *shost; @@ -1197,7 +1202,7 @@ static void sbp2_reconnect(struct work_struct *work) struct sbp2_logical_unit *lu = container_of(work, struct sbp2_logical_unit, work.work); struct sbp2_target *tgt = lu->tgt; - struct fw_device *device = fw_device(tgt->unit->device.parent); + struct fw_device *device = target_device(tgt); int generation, node_id, local_node_id; if (fw_device_is_shutdown(device)) @@ -1249,7 +1254,7 @@ static void sbp2_update(struct fw_unit *unit) struct sbp2_target *tgt = unit->device.driver_data; struct sbp2_logical_unit *lu; - fw_device_enable_phys_dma(fw_device(unit->device.parent)); + fw_device_enable_phys_dma(fw_parent_device(unit)); /* * Fw-core serializes sbp2_update() against sbp2_remove(). @@ -1342,7 +1347,7 @@ static void complete_command_orb(struct sbp2_orb *base_orb, { struct sbp2_command_orb *orb = container_of(base_orb, struct sbp2_command_orb, base); - struct fw_device *device = fw_device(orb->lu->tgt->unit->device.parent); + struct fw_device *device = target_device(orb->lu->tgt); int result; if (status != NULL) { @@ -1449,7 +1454,7 @@ static int sbp2_map_scatterlist(struct sbp2_command_orb *orb, static int sbp2_scsi_queuecommand(struct scsi_cmnd *cmd, scsi_done_fn_t done) { struct sbp2_logical_unit *lu = cmd->device->hostdata; - struct fw_device *device = fw_device(lu->tgt->unit->device.parent); + struct fw_device *device = target_device(lu->tgt); struct sbp2_command_orb *orb; int generation, retval = SCSI_MLQUEUE_HOST_BUSY; diff --git a/include/linux/firewire.h b/include/linux/firewire.h index e979f9b22cb..a69aea0394e 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -248,6 +248,11 @@ static inline void fw_unit_put(struct fw_unit *unit) put_device(&unit->device); } +static inline struct fw_device *fw_parent_device(struct fw_unit *unit) +{ + return fw_device(unit->device.parent); +} + struct ieee1394_device_id; struct fw_driver { -- cgit v1.2.3-70-g09d2 From a93958ac980f0ce594ad90657ecbc595ff157a40 Mon Sep 17 00:00:00 2001 From: Ayaz Abdulla Date: Sun, 7 Jun 2009 03:54:37 -0700 Subject: removal of forcedeth device ids This patch removes the forcedeth device ids from pci_ids.h The forcedeth driver uses the device id constants directly in its source file. [ Need to keep PCI_DEVICE_ID_NVIDIA_NVENET_15 in order to keep drivers/pci/quirks.c building -DaveM ] Signed-off-by: Ayaz Abdulla Signed-off-by: David S. Miller --- include/linux/pci_ids.h | 38 -------------------------------------- 1 file changed, 38 deletions(-) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 12db06cf0e2..b87c51aea14 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1066,8 +1066,6 @@ #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_SMBUS 0x0034 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_IDE 0x0035 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_SATA 0x0036 -#define PCI_DEVICE_ID_NVIDIA_NVENET_10 0x0037 -#define PCI_DEVICE_ID_NVIDIA_NVENET_11 0x0038 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_SATA2 0x003e #define PCI_DEVICE_ID_NVIDIA_GEFORCE_6800_ULTRA 0x0040 #define PCI_DEVICE_ID_NVIDIA_GEFORCE_6800 0x0041 @@ -1078,21 +1076,16 @@ #define PCI_DEVICE_ID_NVIDIA_NFORCE_CK804_IDE 0x0053 #define PCI_DEVICE_ID_NVIDIA_NFORCE_CK804_SATA 0x0054 #define PCI_DEVICE_ID_NVIDIA_NFORCE_CK804_SATA2 0x0055 -#define PCI_DEVICE_ID_NVIDIA_NVENET_8 0x0056 -#define PCI_DEVICE_ID_NVIDIA_NVENET_9 0x0057 #define PCI_DEVICE_ID_NVIDIA_CK804_AUDIO 0x0059 #define PCI_DEVICE_ID_NVIDIA_CK804_PCIE 0x005d #define PCI_DEVICE_ID_NVIDIA_NFORCE2_SMBUS 0x0064 #define PCI_DEVICE_ID_NVIDIA_NFORCE2_IDE 0x0065 -#define PCI_DEVICE_ID_NVIDIA_NVENET_2 0x0066 #define PCI_DEVICE_ID_NVIDIA_MCP2_MODEM 0x0069 #define PCI_DEVICE_ID_NVIDIA_MCP2_AUDIO 0x006a #define PCI_DEVICE_ID_NVIDIA_NFORCE2S_SMBUS 0x0084 #define PCI_DEVICE_ID_NVIDIA_NFORCE2S_IDE 0x0085 -#define PCI_DEVICE_ID_NVIDIA_NVENET_4 0x0086 #define PCI_DEVICE_ID_NVIDIA_MCP2S_MODEM 0x0089 #define PCI_DEVICE_ID_NVIDIA_CK8_AUDIO 0x008a -#define PCI_DEVICE_ID_NVIDIA_NVENET_5 0x008c #define PCI_DEVICE_ID_NVIDIA_NFORCE2S_SATA 0x008e #define PCI_DEVICE_ID_NVIDIA_GEFORCE_7800_GT 0x0090 #define PCI_DEVICE_ID_NVIDIA_GEFORCE_7800_GTX 0x0091 @@ -1108,15 +1101,12 @@ #define PCI_DEVICE_ID_NVIDIA_NFORCE3 0x00d1 #define PCI_DEVICE_ID_NVIDIA_NFORCE3_SMBUS 0x00d4 #define PCI_DEVICE_ID_NVIDIA_NFORCE3_IDE 0x00d5 -#define PCI_DEVICE_ID_NVIDIA_NVENET_3 0x00d6 #define PCI_DEVICE_ID_NVIDIA_MCP3_MODEM 0x00d9 #define PCI_DEVICE_ID_NVIDIA_MCP3_AUDIO 0x00da -#define PCI_DEVICE_ID_NVIDIA_NVENET_7 0x00df #define PCI_DEVICE_ID_NVIDIA_NFORCE3S 0x00e1 #define PCI_DEVICE_ID_NVIDIA_NFORCE3S_SATA 0x00e3 #define PCI_DEVICE_ID_NVIDIA_NFORCE3S_SMBUS 0x00e4 #define PCI_DEVICE_ID_NVIDIA_NFORCE3S_IDE 0x00e5 -#define PCI_DEVICE_ID_NVIDIA_NVENET_6 0x00e6 #define PCI_DEVICE_ID_NVIDIA_CK8S_AUDIO 0x00ea #define PCI_DEVICE_ID_NVIDIA_NFORCE3S_SATA2 0x00ee #define PCIE_DEVICE_ID_NVIDIA_GEFORCE_6800_ALT1 0x00f0 @@ -1176,7 +1166,6 @@ #define PCI_DEVICE_ID_NVIDIA_NFORCE_SMBUS 0x01b4 #define PCI_DEVICE_ID_NVIDIA_NFORCE_IDE 0x01bc #define PCI_DEVICE_ID_NVIDIA_MCP1_MODEM 0x01c1 -#define PCI_DEVICE_ID_NVIDIA_NVENET_1 0x01c3 #define PCI_DEVICE_ID_NVIDIA_NFORCE2 0x01e0 #define PCI_DEVICE_ID_NVIDIA_GEFORCE3 0x0200 #define PCI_DEVICE_ID_NVIDIA_GEFORCE3_1 0x0201 @@ -1199,8 +1188,6 @@ #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE 0x036E #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA 0x037E #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA2 0x037F -#define PCI_DEVICE_ID_NVIDIA_NVENET_12 0x0268 -#define PCI_DEVICE_ID_NVIDIA_NVENET_13 0x0269 #define PCI_DEVICE_ID_NVIDIA_GEFORCE4_TI_4800 0x0280 #define PCI_DEVICE_ID_NVIDIA_GEFORCE4_TI_4800_8X 0x0281 #define PCI_DEVICE_ID_NVIDIA_GEFORCE4_TI_4800SE 0x0282 @@ -1247,46 +1234,21 @@ #define PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_GO5700_2 0x0348 #define PCI_DEVICE_ID_NVIDIA_QUADRO_FX_GO1000 0x034C #define PCI_DEVICE_ID_NVIDIA_QUADRO_FX_1100 0x034E -#define PCI_DEVICE_ID_NVIDIA_NVENET_14 0x0372 #define PCI_DEVICE_ID_NVIDIA_NVENET_15 0x0373 -#define PCI_DEVICE_ID_NVIDIA_NVENET_16 0x03E5 -#define PCI_DEVICE_ID_NVIDIA_NVENET_17 0x03E6 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA 0x03E7 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SMBUS 0x03EB #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_IDE 0x03EC -#define PCI_DEVICE_ID_NVIDIA_NVENET_18 0x03EE -#define PCI_DEVICE_ID_NVIDIA_NVENET_19 0x03EF #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA2 0x03F6 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA3 0x03F7 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP65_SMBUS 0x0446 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP65_IDE 0x0448 -#define PCI_DEVICE_ID_NVIDIA_NVENET_20 0x0450 -#define PCI_DEVICE_ID_NVIDIA_NVENET_21 0x0451 -#define PCI_DEVICE_ID_NVIDIA_NVENET_22 0x0452 -#define PCI_DEVICE_ID_NVIDIA_NVENET_23 0x0453 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP67_SMBUS 0x0542 -#define PCI_DEVICE_ID_NVIDIA_NVENET_24 0x054C -#define PCI_DEVICE_ID_NVIDIA_NVENET_25 0x054D -#define PCI_DEVICE_ID_NVIDIA_NVENET_26 0x054E -#define PCI_DEVICE_ID_NVIDIA_NVENET_27 0x054F -#define PCI_DEVICE_ID_NVIDIA_NVENET_28 0x07DC -#define PCI_DEVICE_ID_NVIDIA_NVENET_29 0x07DD -#define PCI_DEVICE_ID_NVIDIA_NVENET_30 0x07DE -#define PCI_DEVICE_ID_NVIDIA_NVENET_31 0x07DF #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP67_IDE 0x0560 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP73_IDE 0x056C #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP78S_SMBUS 0x0752 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP77_IDE 0x0759 -#define PCI_DEVICE_ID_NVIDIA_NVENET_32 0x0760 -#define PCI_DEVICE_ID_NVIDIA_NVENET_33 0x0761 -#define PCI_DEVICE_ID_NVIDIA_NVENET_34 0x0762 -#define PCI_DEVICE_ID_NVIDIA_NVENET_35 0x0763 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP73_SMBUS 0x07D8 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP79_SMBUS 0x0AA2 -#define PCI_DEVICE_ID_NVIDIA_NVENET_36 0x0AB0 -#define PCI_DEVICE_ID_NVIDIA_NVENET_37 0x0AB1 -#define PCI_DEVICE_ID_NVIDIA_NVENET_38 0x0AB2 -#define PCI_DEVICE_ID_NVIDIA_NVENET_39 0x0AB3 #define PCI_VENDOR_ID_IMS 0x10e0 #define PCI_DEVICE_ID_IMS_TT128 0x9128 -- cgit v1.2.3-70-g09d2 From db429e9ec0f9dee2d8e50c154f04f29f880fc9d6 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Sun, 7 Jun 2009 13:52:52 +0200 Subject: partitions: add ->set_capacity block device method * Add ->set_capacity block device method and use it in rescan_partitions() to attempt enabling native capacity of the device upon detecting the partition which exceeds device capacity. * Add GENHD_FL_NATIVE_CAPACITY flag to try limit attempts of enabling native capacity during partition scan. Together with the consecutive patch implementing ->set_capacity method in ide-gd device driver this allows automatic disabling of Host Protected Area (HPA) if any partitions overlapping HPA are detected. Cc: Robert Hancock Cc: Frans Pop Cc: "Andries E. Brouwer" Acked-by: Al Viro Emphatically-Acked-by: Alan Cox Signed-off-by: Bartlomiej Zolnierkiewicz --- fs/partitions/check.c | 43 ++++++++++++++++++++++++++++++++----------- include/linux/blkdev.h | 2 ++ include/linux/genhd.h | 1 + 3 files changed, 35 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 137a708bb21..4bc2c43fa08 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -546,28 +546,49 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) /* add partitions */ for (p = 1; p < state->limit; p++) { - sector_t size = state->parts[p].size; - sector_t from = state->parts[p].from; + sector_t size, from; +try_scan: + size = state->parts[p].size; if (!size) continue; + + from = state->parts[p].from; if (from >= get_capacity(disk)) { printk(KERN_WARNING "%s: p%d ignored, start %llu is behind the end of the disk\n", disk->disk_name, p, (unsigned long long) from); continue; } + if (from + size > get_capacity(disk)) { - /* - * we can not ignore partitions of broken tables - * created by for example camera firmware, but we - * limit them to the end of the disk to avoid - * creating invalid block devices - */ + struct block_device_operations *bdops = disk->fops; + unsigned long long capacity; + printk(KERN_WARNING - "%s: p%d size %llu exceeds device capacity, " - "limited to end of disk\n", + "%s: p%d size %llu exceeds device capacity, ", disk->disk_name, p, (unsigned long long) size); - size = get_capacity(disk) - from; + + if (bdops->set_capacity && + (disk->flags & GENHD_FL_NATIVE_CAPACITY) == 0) { + printk(KERN_CONT "enabling native capacity\n"); + capacity = bdops->set_capacity(disk, ~0ULL); + disk->flags |= GENHD_FL_NATIVE_CAPACITY; + if (capacity > get_capacity(disk)) { + set_capacity(disk, capacity); + check_disk_size_change(disk, bdev); + bdev->bd_invalidated = 0; + } + goto try_scan; + } else { + /* + * we can not ignore partitions of broken tables + * created by for example camera firmware, but + * we limit them to the end of the disk to avoid + * creating invalid block devices + */ + printk(KERN_CONT "limited to end of disk\n"); + size = get_capacity(disk) - from; + } } part = add_partition(disk, p, from, size, state->parts[p].flags); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6f841fb1be3..a2d7298be35 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1106,6 +1106,8 @@ struct block_device_operations { int (*direct_access) (struct block_device *, sector_t, void **, unsigned long *); int (*media_changed) (struct gendisk *); + unsigned long long (*set_capacity) (struct gendisk *, + unsigned long long); int (*revalidate_disk) (struct gendisk *); int (*getgeo)(struct block_device *, struct hd_geometry *); struct module *owner; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 634c53028fb..239e24b081a 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -113,6 +113,7 @@ struct hd_struct { #define GENHD_FL_UP 16 #define GENHD_FL_SUPPRESS_PARTITION_INFO 32 #define GENHD_FL_EXT_DEVT 64 /* allow extended devt */ +#define GENHD_FL_NATIVE_CAPACITY 128 #define BLK_SCSI_MAX_CMDS (256) #define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) -- cgit v1.2.3-70-g09d2 From e957b60d1583022a0f7c03267d37fcae2ddb78b1 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Sun, 7 Jun 2009 13:52:52 +0200 Subject: ide-gd: implement block device ->set_capacity method (v2) * Use ->probed_capacity to store native device capacity for ATA disks. * Add ->set_capacity method to struct ide_disk_ops. * Implement disk device ->set_capacity method for ATA disks. * Implement block device ->set_capacity method. v2: * Check if LBA and HPA are supported in ide_disk_set_capacity(). * According to the spec the SET MAX ADDRESS command shall be immediately preceded by a READ NATIVE MAX ADDRESS command. * Add ide_disk_hpa_{get_native,set}_capacity() helpers. Together with the previous patch adding ->set_capacity block device method this allows automatic disabling of Host Protected Area (HPA) if any partitions overlapping HPA are detected. Cc: Robert Hancock Cc: Frans Pop Cc: "Andries E. Brouwer" Cc: Al Viro Emphatically-Acked-by: Alan Cox Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/ide-disk.c | 67 +++++++++++++++++++++++++++++++++++++++++--------- drivers/ide/ide-gd.c | 14 +++++++++++ include/linux/ide.h | 4 +-- 3 files changed, 72 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index a9fbe2c3121..61a6d354622 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -302,14 +302,12 @@ static const struct drive_list_entry hpa_list[] = { { NULL, NULL } }; -static void idedisk_check_hpa(ide_drive_t *drive) +static u64 ide_disk_hpa_get_native_capacity(ide_drive_t *drive, int lba48) { - unsigned long long capacity, set_max; - int lba48 = ata_id_lba48_enabled(drive->id); + u64 capacity, set_max; capacity = drive->capacity64; - - set_max = idedisk_read_native_max_address(drive, lba48); + set_max = idedisk_read_native_max_address(drive, lba48); if (ide_in_drive_list(drive->id, hpa_list)) { /* @@ -320,9 +318,31 @@ static void idedisk_check_hpa(ide_drive_t *drive) set_max--; } + return set_max; +} + +static u64 ide_disk_hpa_set_capacity(ide_drive_t *drive, u64 set_max, int lba48) +{ + set_max = idedisk_set_max_address(drive, set_max, lba48); + if (set_max) + drive->capacity64 = set_max; + + return set_max; +} + +static void idedisk_check_hpa(ide_drive_t *drive) +{ + u64 capacity, set_max; + int lba48 = ata_id_lba48_enabled(drive->id); + + capacity = drive->capacity64; + set_max = ide_disk_hpa_get_native_capacity(drive, lba48); + if (set_max <= capacity) return; + drive->probed_capacity = set_max; + printk(KERN_INFO "%s: Host Protected Area detected.\n" "\tcurrent capacity is %llu sectors (%llu MB)\n" "\tnative capacity is %llu sectors (%llu MB)\n", @@ -330,13 +350,10 @@ static void idedisk_check_hpa(ide_drive_t *drive) capacity, sectors_to_MB(capacity), set_max, sectors_to_MB(set_max)); - set_max = idedisk_set_max_address(drive, set_max, lba48); - - if (set_max) { - drive->capacity64 = set_max; + set_max = ide_disk_hpa_set_capacity(drive, set_max, lba48); + if (set_max) printk(KERN_INFO "%s: Host Protected Area disabled.\n", drive->name); - } } static int ide_disk_get_capacity(ide_drive_t *drive) @@ -358,6 +375,8 @@ static int ide_disk_get_capacity(ide_drive_t *drive) drive->capacity64 = drive->cyl * drive->head * drive->sect; } + drive->probed_capacity = drive->capacity64; + if (lba) { drive->dev_flags |= IDE_DFLAG_LBA; @@ -376,7 +395,7 @@ static int ide_disk_get_capacity(ide_drive_t *drive) "%llu sectors (%llu MB)\n", drive->name, (unsigned long long)drive->capacity64, sectors_to_MB(drive->capacity64)); - drive->capacity64 = 1ULL << 28; + drive->probed_capacity = drive->capacity64 = 1ULL << 28; } if ((drive->hwif->host_flags & IDE_HFLAG_NO_LBA48_DMA) && @@ -392,6 +411,31 @@ static int ide_disk_get_capacity(ide_drive_t *drive) return 0; } +static u64 ide_disk_set_capacity(ide_drive_t *drive, u64 capacity) +{ + u64 set = min(capacity, drive->probed_capacity); + u16 *id = drive->id; + int lba48 = ata_id_lba48_enabled(id); + + if ((drive->dev_flags & IDE_DFLAG_LBA) == 0 || + ata_id_hpa_enabled(id) == 0) + goto out; + + /* + * according to the spec the SET MAX ADDRESS command shall be + * immediately preceded by a READ NATIVE MAX ADDRESS command + */ + capacity = ide_disk_hpa_get_native_capacity(drive, lba48); + if (capacity == 0) + goto out; + + set = ide_disk_hpa_set_capacity(drive, set, lba48); + if (set) + return set; +out: + return drive->capacity64; +} + static void idedisk_prepare_flush(struct request_queue *q, struct request *rq) { ide_drive_t *drive = q->queuedata; @@ -741,6 +785,7 @@ static int ide_disk_set_doorlock(ide_drive_t *drive, struct gendisk *disk, const struct ide_disk_ops ide_ata_disk_ops = { .check = ide_disk_check, + .set_capacity = ide_disk_set_capacity, .get_capacity = ide_disk_get_capacity, .setup = ide_disk_setup, .flush = ide_disk_flush, diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c index 4b6b71e2cdf..214119026b3 100644 --- a/drivers/ide/ide-gd.c +++ b/drivers/ide/ide-gd.c @@ -287,6 +287,19 @@ static int ide_gd_media_changed(struct gendisk *disk) return ret; } +static unsigned long long ide_gd_set_capacity(struct gendisk *disk, + unsigned long long capacity) +{ + struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj); + ide_drive_t *drive = idkp->drive; + const struct ide_disk_ops *disk_ops = drive->disk_ops; + + if (disk_ops->set_capacity) + return disk_ops->set_capacity(drive, capacity); + + return drive->capacity64; +} + static int ide_gd_revalidate_disk(struct gendisk *disk) { struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj); @@ -315,6 +328,7 @@ static struct block_device_operations ide_gd_ops = { .locked_ioctl = ide_gd_ioctl, .getgeo = ide_gd_getgeo, .media_changed = ide_gd_media_changed, + .set_capacity = ide_gd_set_capacity, .revalidate_disk = ide_gd_revalidate_disk }; diff --git a/include/linux/ide.h b/include/linux/ide.h index 9fed365a598..e96ace12872 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -397,6 +397,7 @@ struct ide_drive_s; struct ide_disk_ops { int (*check)(struct ide_drive_s *, const char *); int (*get_capacity)(struct ide_drive_s *); + u64 (*set_capacity)(struct ide_drive_s *, u64); void (*setup)(struct ide_drive_s *); void (*flush)(struct ide_drive_s *); int (*init_media)(struct ide_drive_s *, struct gendisk *); @@ -568,8 +569,7 @@ struct ide_drive_s { unsigned int drive_data; /* used by set_pio_mode/dev_select() */ unsigned int failures; /* current failure count */ unsigned int max_failures; /* maximum allowed failure count */ - u64 probed_capacity;/* initial reported media capacity (ide-cd only currently) */ - + u64 probed_capacity;/* initial/native media capacity */ u64 capacity64; /* total number of sectors */ int lun; /* logical unit */ -- cgit v1.2.3-70-g09d2 From 075affcbe01d4d7cefcd0e30a98df1253bcf8d92 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Sun, 7 Jun 2009 13:52:52 +0200 Subject: ide: preserve Host Protected Area by default (v2) From the perspective of most users of recent systems, disabling Host Protected Area (HPA) can break vendor RAID formats, GPT partitions and risks corrupting firmware or overwriting vendor system recovery tools. Unfortunately the original (kernels < 2.6.30) behavior (unconditionally disabling HPA and using full disk capacity) was introduced at the time when the main use of HPA was to make the drive look small enough for the BIOS to allow the system to boot with large capacity drives. Thus to allow the maximum compatibility with the existing setups (using HPA and partitioned with HPA disabled) we automically disable HPA if any partitions overlapping HPA are detected. Additionally HPA can also be disabled using the "nohpa" module parameter (i.e. "ide_core.nohpa=0.0" to disable HPA on /dev/hda). v2: Fix ->resume HPA support. While at it: - remove stale "idebus=" entry from Documentation/kernel-parameters.txt Cc: Robert Hancock Cc: Frans Pop Cc: "Andries E. Brouwer" Cc: Al Viro Acked-by: Sergei Shtylyov [patch description was based on input from Alan Cox and Frans Pop] Emphatically-Acked-by: Alan Cox Signed-off-by: Bartlomiej Zolnierkiewicz --- Documentation/ide/ide.txt | 2 ++ Documentation/kernel-parameters.txt | 7 ++----- drivers/ide/ide-disk.c | 8 +++++++- drivers/ide/ide.c | 10 ++++++++++ include/linux/ide.h | 2 ++ 5 files changed, 23 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/Documentation/ide/ide.txt b/Documentation/ide/ide.txt index 0c78f4b1d9d..e77bebfa7b0 100644 --- a/Documentation/ide/ide.txt +++ b/Documentation/ide/ide.txt @@ -216,6 +216,8 @@ Other kernel parameters for ide_core are: * "noflush=[interface_number.device_number]" to disable flush requests +* "nohpa=[interface_number.device_number]" to disable Host Protected Area + * "noprobe=[interface_number.device_number]" to skip probing * "nowerr=[interface_number.device_number]" to ignore the WRERR_STAT bit diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index a19f021f081..e58c91ca802 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -835,11 +835,8 @@ and is between 256 and 4096 characters. It is defined in the file ide-core.nodma= [HW] (E)IDE subsystem Format: =0.0 to prevent dma on hda, =0.1 hdb =1.0 hdc - .vlb_clock .pci_clock .noflush .noprobe .nowerr .cdrom - .chs .ignore_cable are additional options - See Documentation/ide/ide.txt. - - idebus= [HW] (E)IDE subsystem - VLB/PCI bus speed + .vlb_clock .pci_clock .noflush .nohpa .noprobe .nowerr + .cdrom .chs .ignore_cable are additional options See Documentation/ide/ide.txt. ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 61a6d354622..3d92c9d54d4 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -350,6 +350,9 @@ static void idedisk_check_hpa(ide_drive_t *drive) capacity, sectors_to_MB(capacity), set_max, sectors_to_MB(set_max)); + if ((drive->dev_flags & IDE_DFLAG_NOHPA) == 0) + return; + set_max = ide_disk_hpa_set_capacity(drive, set_max, lba48); if (set_max) printk(KERN_INFO "%s: Host Protected Area disabled.\n", @@ -430,8 +433,11 @@ static u64 ide_disk_set_capacity(ide_drive_t *drive, u64 capacity) goto out; set = ide_disk_hpa_set_capacity(drive, set, lba48); - if (set) + if (set) { + /* needed for ->resume to disable HPA */ + drive->dev_flags |= IDE_DFLAG_NOHPA; return set; + } out: return drive->capacity64; } diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index 92c9b90931e..16d056939f9 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -211,6 +211,11 @@ static unsigned int ide_noflush; module_param_call(noflush, ide_set_dev_param_mask, NULL, &ide_noflush, 0); MODULE_PARM_DESC(noflush, "disable flush requests for a device"); +static unsigned int ide_nohpa; + +module_param_call(nohpa, ide_set_dev_param_mask, NULL, &ide_nohpa, 0); +MODULE_PARM_DESC(nohpa, "disable Host Protected Area for a device"); + static unsigned int ide_noprobe; module_param_call(noprobe, ide_set_dev_param_mask, NULL, &ide_noprobe, 0); @@ -281,6 +286,11 @@ static void ide_dev_apply_params(ide_drive_t *drive, u8 unit) drive->name); drive->dev_flags |= IDE_DFLAG_NOFLUSH; } + if (ide_nohpa & (1 << i)) { + printk(KERN_INFO "ide: disabling Host Protected Area for %s\n", + drive->name); + drive->dev_flags |= IDE_DFLAG_NOHPA; + } if (ide_noprobe & (1 << i)) { printk(KERN_INFO "ide: skipping probe for %s\n", drive->name); drive->dev_flags |= IDE_DFLAG_NOPROBE; diff --git a/include/linux/ide.h b/include/linux/ide.h index e96ace12872..45dce3b4c88 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -475,6 +475,8 @@ enum { IDE_DFLAG_NICE1 = (1 << 5), /* device is physically present */ IDE_DFLAG_PRESENT = (1 << 6), + /* disable Host Protected Area */ + IDE_DFLAG_NOHPA = (1 << 7), /* id read from device (synthetic if not set) */ IDE_DFLAG_ID_READ = (1 << 8), IDE_DFLAG_NOPROBE = (1 << 9), -- cgit v1.2.3-70-g09d2 From 8bc1e5aa06a2a9a425c4a6795fc564cba1521487 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Sun, 7 Jun 2009 15:37:09 +0200 Subject: ide: respect quirk_drives[] list on all controllers * Add ide_check_nien_quirk_list() helper to the core code and then use it in ide_port_tune_devices(). * Remove no longer needed ->quirkproc methods from hpt366.c and pdc202xx_{new,old}.c. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/hpt366.c | 27 --------------------------- drivers/ide/ide-iops.c | 25 +++++++++++++++++++++++++ drivers/ide/ide-probe.c | 2 ++ drivers/ide/pdc202xx_new.c | 26 -------------------------- drivers/ide/pdc202xx_old.c | 27 --------------------------- include/linux/ide.h | 1 + 6 files changed, 28 insertions(+), 80 deletions(-) (limited to 'include') diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c index cb04523e31c..a2e9f6c65a9 100644 --- a/drivers/ide/hpt366.c +++ b/drivers/ide/hpt366.c @@ -138,18 +138,6 @@ #undef HPT_RESET_STATE_ENGINE #undef HPT_DELAY_INTERRUPT -static const char *quirk_drives[] = { - "QUANTUM FIREBALLlct08 08", - "QUANTUM FIREBALLP KA6.4", - "QUANTUM FIREBALLP KA9.1", - "QUANTUM FIREBALLP KX13.6", - "QUANTUM FIREBALLP KX20.5", - "QUANTUM FIREBALLP KX27.3", - "QUANTUM FIREBALLP LM20.4", - "QUANTUM FIREBALLP LM20.5", - NULL -}; - static const char *bad_ata100_5[] = { "IBM-DTLA-307075", "IBM-DTLA-307060", @@ -733,20 +721,6 @@ static void hpt3xx_set_pio_mode(ide_drive_t *drive, const u8 pio) hpt3xx_set_mode(drive, XFER_PIO_0 + pio); } -static void hpt3xx_quirkproc(ide_drive_t *drive) -{ - char *m = (char *)&drive->id[ATA_ID_PROD]; - const char **list = quirk_drives; - - while (*list) - if (strstr(m, *list++)) { - drive->quirk_list = 2; - return; - } - - drive->quirk_list = 0; -} - static void hpt3xx_maskproc(ide_drive_t *drive, int mask) { ide_hwif_t *hwif = drive->hwif; @@ -1408,7 +1382,6 @@ static int __devinit hpt36x_init(struct pci_dev *dev, struct pci_dev *dev2) static const struct ide_port_ops hpt3xx_port_ops = { .set_pio_mode = hpt3xx_set_pio_mode, .set_dma_mode = hpt3xx_set_mode, - .quirkproc = hpt3xx_quirkproc, .maskproc = hpt3xx_maskproc, .mdma_filter = hpt3xx_mdma_filter, .udma_filter = hpt3xx_udma_filter, diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c index 8dff623f9da..c55349537c2 100644 --- a/drivers/ide/ide-iops.c +++ b/drivers/ide/ide-iops.c @@ -282,6 +282,31 @@ no_80w: return 0; } +static const char *nien_quirk_list[] = { + "QUANTUM FIREBALLlct08 08", + "QUANTUM FIREBALLP KA6.4", + "QUANTUM FIREBALLP KA9.1", + "QUANTUM FIREBALLP KX13.6", + "QUANTUM FIREBALLP KX20.5", + "QUANTUM FIREBALLP KX27.3", + "QUANTUM FIREBALLP LM20.4", + "QUANTUM FIREBALLP LM20.5", + NULL +}; + +void ide_check_nien_quirk_list(ide_drive_t *drive) +{ + const char **list, *m = (char *)&drive->id[ATA_ID_PROD]; + + for (list = nien_quirk_list; *list != NULL; list++) + if (strstr(m, *list) != NULL) { + drive->quirk_list = 2; + return; + } + + drive->quirk_list = 0; +} + int ide_driveid_update(ide_drive_t *drive) { u16 *id; diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 89574b0bd56..28f95cb41c2 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -732,6 +732,8 @@ static void ide_port_tune_devices(ide_hwif_t *hwif) int i; ide_port_for_each_present_dev(i, drive, hwif) { + ide_check_nien_quirk_list(drive); + if (port_ops && port_ops->quirkproc) port_ops->quirkproc(drive); } diff --git a/drivers/ide/pdc202xx_new.c b/drivers/ide/pdc202xx_new.c index b68906c3c17..65ba8239e7b 100644 --- a/drivers/ide/pdc202xx_new.c +++ b/drivers/ide/pdc202xx_new.c @@ -40,18 +40,6 @@ #define DBG(fmt, args...) #endif -static const char *pdc_quirk_drives[] = { - "QUANTUM FIREBALLlct08 08", - "QUANTUM FIREBALLP KA6.4", - "QUANTUM FIREBALLP KA9.1", - "QUANTUM FIREBALLP LM20.4", - "QUANTUM FIREBALLP KX13.6", - "QUANTUM FIREBALLP KX20.5", - "QUANTUM FIREBALLP KX27.3", - "QUANTUM FIREBALLP LM20.5", - NULL -}; - static u8 max_dma_rate(struct pci_dev *pdev) { u8 mode; @@ -200,19 +188,6 @@ static u8 pdcnew_cable_detect(ide_hwif_t *hwif) return ATA_CBL_PATA80; } -static void pdcnew_quirkproc(ide_drive_t *drive) -{ - const char **list, *m = (char *)&drive->id[ATA_ID_PROD]; - - for (list = pdc_quirk_drives; *list != NULL; list++) - if (strstr(m, *list) != NULL) { - drive->quirk_list = 2; - return; - } - - drive->quirk_list = 0; -} - static void pdcnew_reset(ide_drive_t *drive) { /* @@ -473,7 +448,6 @@ static struct pci_dev * __devinit pdc20270_get_dev2(struct pci_dev *dev) static const struct ide_port_ops pdcnew_port_ops = { .set_pio_mode = pdcnew_set_pio_mode, .set_dma_mode = pdcnew_set_dma_mode, - .quirkproc = pdcnew_quirkproc, .resetproc = pdcnew_reset, .cable_detect = pdcnew_cable_detect, }; diff --git a/drivers/ide/pdc202xx_old.c b/drivers/ide/pdc202xx_old.c index 4980dd7b2e2..fe01db679a3 100644 --- a/drivers/ide/pdc202xx_old.c +++ b/drivers/ide/pdc202xx_old.c @@ -23,18 +23,6 @@ #define PDC202XX_DEBUG_DRIVE_INFO 0 -static const char *pdc_quirk_drives[] = { - "QUANTUM FIREBALLlct08 08", - "QUANTUM FIREBALLP KA6.4", - "QUANTUM FIREBALLP KA9.1", - "QUANTUM FIREBALLP LM20.4", - "QUANTUM FIREBALLP KX13.6", - "QUANTUM FIREBALLP KX20.5", - "QUANTUM FIREBALLP KX27.3", - "QUANTUM FIREBALLP LM20.5", - NULL -}; - static void pdc_old_disable_66MHz_clock(ide_hwif_t *); static void pdc202xx_set_mode(ide_drive_t *drive, const u8 speed) @@ -151,19 +139,6 @@ static void pdc_old_disable_66MHz_clock(ide_hwif_t *hwif) outb(clock & ~(hwif->channel ? 0x08 : 0x02), clock_reg); } -static void pdc202xx_quirkproc(ide_drive_t *drive) -{ - const char **list, *m = (char *)&drive->id[ATA_ID_PROD]; - - for (list = pdc_quirk_drives; *list != NULL; list++) - if (strstr(m, *list) != NULL) { - drive->quirk_list = 2; - return; - } - - drive->quirk_list = 0; -} - static void pdc202xx_dma_start(ide_drive_t *drive) { if (drive->current_speed > XFER_UDMA_2) @@ -256,13 +231,11 @@ static void __devinit pdc202ata4_fixup_irq(struct pci_dev *dev, static const struct ide_port_ops pdc20246_port_ops = { .set_pio_mode = pdc202xx_set_pio_mode, .set_dma_mode = pdc202xx_set_mode, - .quirkproc = pdc202xx_quirkproc, }; static const struct ide_port_ops pdc2026x_port_ops = { .set_pio_mode = pdc202xx_set_pio_mode, .set_dma_mode = pdc202xx_set_mode, - .quirkproc = pdc202xx_quirkproc, .cable_detect = pdc2026x_cable_detect, }; diff --git a/include/linux/ide.h b/include/linux/ide.h index c8f7b967371..6caaae0c774 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1453,6 +1453,7 @@ static inline void ide_acpi_set_state(ide_hwif_t *hwif, int on) {} void ide_register_region(struct gendisk *); void ide_unregister_region(struct gendisk *); +void ide_check_nien_quirk_list(ide_drive_t *); void ide_undecoded_slave(ide_drive_t *); void ide_port_apply_params(ide_hwif_t *); -- cgit v1.2.3-70-g09d2 From 734affdcae20af4fec95e46a64fb29f063a15c19 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Sun, 7 Jun 2009 15:37:10 +0200 Subject: ide: add IDE_DFLAG_NIEN_QUIRK device flag Add IDE_DFLAG_NIEN_QUIRK device flag and use it instead of drive->quirk_list. There should be no functional changes caused by this patch. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/hpt366.c | 2 +- drivers/ide/ide-eh.c | 5 +++-- drivers/ide/ide-io.c | 8 ++++++-- drivers/ide/ide-iops.c | 6 ++---- include/linux/ide.h | 2 +- 5 files changed, 13 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c index a2e9f6c65a9..7ce68ef6b90 100644 --- a/drivers/ide/hpt366.c +++ b/drivers/ide/hpt366.c @@ -727,7 +727,7 @@ static void hpt3xx_maskproc(ide_drive_t *drive, int mask) struct pci_dev *dev = to_pci_dev(hwif->dev); struct hpt_info *info = hpt3xx_get_info(hwif->dev); - if (drive->quirk_list == 0) + if ((drive->dev_flags & IDE_DFLAG_NIEN_QUIRK) == 0) return; if (info->chip_type >= HPT370) { diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c index 39d589254d4..2b914197961 100644 --- a/drivers/ide/ide-eh.c +++ b/drivers/ide/ide-eh.c @@ -407,8 +407,9 @@ static ide_startstop_t do_reset1(ide_drive_t *drive, int do_not_try_atapi) /* more than enough time */ udelay(10); /* clear SRST, leave nIEN (unless device is on the quirk list) */ - tp_ops->write_devctl(hwif, (drive->quirk_list == 2 ? 0 : ATA_NIEN) | - ATA_DEVCTL_OBS); + tp_ops->write_devctl(hwif, + ((drive->dev_flags & IDE_DFLAG_NIEN_QUIRK) ? 0 : ATA_NIEN) | + ATA_DEVCTL_OBS); /* more than enough time */ udelay(10); hwif->poll_timeout = jiffies + WAIT_WORSTCASE; diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 9654bd34cf5..243cf6561e7 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -488,11 +488,15 @@ repeat: if ((hwif->host->host_flags & IDE_HFLAG_SERIALIZE) && hwif != prev_port) { + ide_drive_t *cur_dev = + prev_port ? prev_port->cur_dev : NULL; + /* * set nIEN for previous port, drives in the - * quirk_list may not like intr setups/cleanups + * quirk list may not like intr setups/cleanups */ - if (prev_port && prev_port->cur_dev->quirk_list == 0) + if (cur_dev && + (cur_dev->dev_flags & IDE_DFLAG_NIEN_QUIRK) == 0) prev_port->tp_ops->write_devctl(prev_port, ATA_NIEN | ATA_DEVCTL_OBS); diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c index c55349537c2..fa047150a1c 100644 --- a/drivers/ide/ide-iops.c +++ b/drivers/ide/ide-iops.c @@ -300,11 +300,9 @@ void ide_check_nien_quirk_list(ide_drive_t *drive) for (list = nien_quirk_list; *list != NULL; list++) if (strstr(m, *list) != NULL) { - drive->quirk_list = 2; + drive->dev_flags |= IDE_DFLAG_NIEN_QUIRK; return; } - - drive->quirk_list = 0; } int ide_driveid_update(ide_drive_t *drive) @@ -389,7 +387,7 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed) tp_ops->exec_command(hwif, ATA_CMD_SET_FEATURES); - if (drive->quirk_list == 2) + if (drive->dev_flags & IDE_DFLAG_NIEN_QUIRK) tp_ops->write_devctl(hwif, ATA_DEVCTL_OBS); error = __ide_wait_stat(drive, drive->ready_stat, diff --git a/include/linux/ide.h b/include/linux/ide.h index 6caaae0c774..a6c6a2fad7c 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -499,6 +499,7 @@ enum { /* write protect */ IDE_DFLAG_WP = (1 << 29), IDE_DFLAG_FORMAT_IN_PROGRESS = (1 << 30), + IDE_DFLAG_NIEN_QUIRK = (1 << 31), }; struct ide_drive_s { @@ -530,7 +531,6 @@ struct ide_drive_s { u8 waiting_for_dma; /* dma currently in progress */ u8 dma; /* atapi dma flag */ - u8 quirk_list; /* considered quirky, set for a specific host */ u8 init_speed; /* transfer rate set at boot */ u8 current_speed; /* current transfer rate set */ u8 desired_speed; /* desired transfer rate set */ -- cgit v1.2.3-70-g09d2 From eae3f29cc73f83cc3f1891d3ad40021b5172c630 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 5 Jun 2009 04:03:35 +0000 Subject: net: num_dma_maps is not used Get rid of num_dma_maps in struct skb_shared_info, as it seems unused. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 --- net/core/skb_dma_map.c | 1 - 2 files changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7305da92be8..7485058125e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -194,9 +194,6 @@ struct skb_shared_info { unsigned short gso_type; __be32 ip6_frag_id; union skb_shared_tx tx_flags; -#ifdef CONFIG_HAS_DMA - unsigned int num_dma_maps; -#endif struct sk_buff *frag_list; struct skb_shared_hwtstamps hwtstamps; skb_frag_t frags[MAX_SKB_FRAGS]; diff --git a/net/core/skb_dma_map.c b/net/core/skb_dma_map.c index 86234923a3b..7adb623ef66 100644 --- a/net/core/skb_dma_map.c +++ b/net/core/skb_dma_map.c @@ -30,7 +30,6 @@ int skb_dma_map(struct device *dev, struct sk_buff *skb, goto unwind; sp->dma_maps[i + 1] = map; } - sp->num_dma_maps = i + 1; return 0; -- cgit v1.2.3-70-g09d2 From 042a53a9e437feaf2230dd2cadcecfae9c7bfe05 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 5 Jun 2009 04:04:16 +0000 Subject: net: skb_shared_info optimization skb_dma_unmap() is quite expensive for small packets, because we use two different cache lines from skb_shared_info. One to access nr_frags, one to access dma_maps[0] Instead of dma_maps being an array of MAX_SKB_FRAGS + 1 elements, let dma_head alone in a new dma_head field, close to nr_frags, to reduce cache lines misses. Tested on my dev machine (bnx2 & tg3 adapters), nice speedup ! Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/bnx2.c | 6 +++--- drivers/net/e1000/e1000_main.c | 4 ++-- drivers/net/e1000e/netdev.c | 4 ++-- drivers/net/igb/igb_main.c | 5 ++--- drivers/net/igbvf/netdev.c | 5 ++--- drivers/net/ixgb/ixgb_main.c | 4 ++-- drivers/net/ixgbe/ixgbe_main.c | 4 ++-- drivers/net/tg3.c | 10 +++++----- include/linux/skbuff.h | 5 ++++- net/core/skb_dma_map.c | 12 ++++++------ 10 files changed, 30 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index f53017250e0..391d2d47089 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -5487,7 +5487,7 @@ bnx2_run_loopback(struct bnx2 *bp, int loopback_mode) dev_kfree_skb(skb); return -EIO; } - map = skb_shinfo(skb)->dma_maps[0]; + map = skb_shinfo(skb)->dma_head; REG_WR(bp, BNX2_HC_COMMAND, bp->hc_cmd | BNX2_HC_COMMAND_COAL_NOW_WO_INT); @@ -6167,7 +6167,7 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev) } sp = skb_shinfo(skb); - mapping = sp->dma_maps[0]; + mapping = sp->dma_head; tx_buf = &txr->tx_buf_ring[ring_prod]; tx_buf->skb = skb; @@ -6191,7 +6191,7 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev) txbd = &txr->tx_desc_ring[ring_prod]; len = frag->size; - mapping = sp->dma_maps[i + 1]; + mapping = sp->dma_maps[i]; txbd->tx_bd_haddr_hi = (u64) mapping >> 32; txbd->tx_bd_haddr_lo = (u64) mapping & 0xffffffff; diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index 05e87a59f1c..8d36743c814 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -2998,7 +2998,7 @@ static int e1000_tx_map(struct e1000_adapter *adapter, size -= 4; buffer_info->length = size; - buffer_info->dma = map[0] + offset; + buffer_info->dma = skb_shinfo(skb)->dma_head + offset; buffer_info->time_stamp = jiffies; buffer_info->next_to_watch = i; @@ -3039,7 +3039,7 @@ static int e1000_tx_map(struct e1000_adapter *adapter, size -= 4; buffer_info->length = size; - buffer_info->dma = map[f + 1] + offset; + buffer_info->dma = map[f] + offset; buffer_info->time_stamp = jiffies; buffer_info->next_to_watch = i; diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c index 38694c79edc..9043f1b845f 100644 --- a/drivers/net/e1000e/netdev.c +++ b/drivers/net/e1000e/netdev.c @@ -3916,7 +3916,7 @@ static int e1000_tx_map(struct e1000_adapter *adapter, buffer_info->length = size; buffer_info->time_stamp = jiffies; buffer_info->next_to_watch = i; - buffer_info->dma = map[0] + offset; + buffer_info->dma = skb_shinfo(skb)->dma_head + offset; count++; len -= size; @@ -3947,7 +3947,7 @@ static int e1000_tx_map(struct e1000_adapter *adapter, buffer_info->length = size; buffer_info->time_stamp = jiffies; buffer_info->next_to_watch = i; - buffer_info->dma = map[f + 1] + offset; + buffer_info->dma = map[f] + offset; len -= size; offset += size; diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c index 958b2879da4..ea17319624a 100644 --- a/drivers/net/igb/igb_main.c +++ b/drivers/net/igb/igb_main.c @@ -3139,8 +3139,7 @@ static inline int igb_tx_map_adv(struct igb_adapter *adapter, /* set time_stamp *before* dma to help avoid a possible race */ buffer_info->time_stamp = jiffies; buffer_info->next_to_watch = i; - buffer_info->dma = map[count]; - count++; + buffer_info->dma = skb_shinfo(skb)->dma_head; for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) { struct skb_frag_struct *frag; @@ -3164,7 +3163,7 @@ static inline int igb_tx_map_adv(struct igb_adapter *adapter, tx_ring->buffer_info[i].skb = skb; tx_ring->buffer_info[first].next_to_watch = i; - return count; + return count + 1; } static inline void igb_tx_queue_adv(struct igb_adapter *adapter, diff --git a/drivers/net/igbvf/netdev.c b/drivers/net/igbvf/netdev.c index 5f7ba1a4990..22aadb7884f 100644 --- a/drivers/net/igbvf/netdev.c +++ b/drivers/net/igbvf/netdev.c @@ -2119,8 +2119,7 @@ static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter, /* set time_stamp *before* dma to help avoid a possible race */ buffer_info->time_stamp = jiffies; buffer_info->next_to_watch = i; - buffer_info->dma = map[count]; - count++; + buffer_info->dma = skb_shinfo(skb)->dma_head; for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) { struct skb_frag_struct *frag; @@ -2144,7 +2143,7 @@ static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter, tx_ring->buffer_info[i].skb = skb; tx_ring->buffer_info[first].next_to_watch = i; - return count; + return count + 1; } static inline void igbvf_tx_queue_adv(struct igbvf_adapter *adapter, diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c index 6eb7f37a113..9c897cf86b9 100644 --- a/drivers/net/ixgb/ixgb_main.c +++ b/drivers/net/ixgb/ixgb_main.c @@ -1300,7 +1300,7 @@ ixgb_tx_map(struct ixgb_adapter *adapter, struct sk_buff *skb, buffer_info->length = size; WARN_ON(buffer_info->dma != 0); buffer_info->time_stamp = jiffies; - buffer_info->dma = map[0] + offset; + buffer_info->dma = skb_shinfo(skb)->dma_head + offset; pci_map_single(adapter->pdev, skb->data + offset, size, @@ -1340,7 +1340,7 @@ ixgb_tx_map(struct ixgb_adapter *adapter, struct sk_buff *skb, buffer_info->length = size; buffer_info->time_stamp = jiffies; - buffer_info->dma = map[f + 1] + offset; + buffer_info->dma = map[f] + offset; buffer_info->next_to_watch = 0; len -= size; diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index d36003cbb6d..09994e920d5 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -4837,7 +4837,7 @@ static int ixgbe_tx_map(struct ixgbe_adapter *adapter, size = min(len, (uint)IXGBE_MAX_DATA_PER_TXD); tx_buffer_info->length = size; - tx_buffer_info->dma = map[0] + offset; + tx_buffer_info->dma = skb_shinfo(skb)->dma_head + offset; tx_buffer_info->time_stamp = jiffies; tx_buffer_info->next_to_watch = i; @@ -4869,7 +4869,7 @@ static int ixgbe_tx_map(struct ixgbe_adapter *adapter, size = min(len, (uint)IXGBE_MAX_DATA_PER_TXD); tx_buffer_info->length = size; - tx_buffer_info->dma = map[f + 1] + offset; + tx_buffer_info->dma = map[f] + offset; tx_buffer_info->time_stamp = jiffies; tx_buffer_info->next_to_watch = i; diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index a39b534fb43..46a3f86125b 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -5021,7 +5021,7 @@ static int tigon3_dma_hwbug_workaround(struct tg3 *tp, struct sk_buff *skb, /* New SKB is guaranteed to be linear. */ entry = *start; ret = skb_dma_map(&tp->pdev->dev, new_skb, DMA_TO_DEVICE); - new_addr = skb_shinfo(new_skb)->dma_maps[0]; + new_addr = skb_shinfo(new_skb)->dma_head; /* Make sure new skb does not cross any 4G boundaries. * Drop the packet if it does. @@ -5155,7 +5155,7 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) sp = skb_shinfo(skb); - mapping = sp->dma_maps[0]; + mapping = sp->dma_head; tp->tx_buffers[entry].skb = skb; @@ -5173,7 +5173,7 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; len = frag->size; - mapping = sp->dma_maps[i + 1]; + mapping = sp->dma_maps[i]; tp->tx_buffers[entry].skb = NULL; tg3_set_txd(tp, entry, mapping, len, @@ -5331,7 +5331,7 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev) sp = skb_shinfo(skb); - mapping = sp->dma_maps[0]; + mapping = sp->dma_head; tp->tx_buffers[entry].skb = skb; @@ -5356,7 +5356,7 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev) skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; len = frag->size; - mapping = sp->dma_maps[i + 1]; + mapping = sp->dma_maps[i]; tp->tx_buffers[entry].skb = NULL; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7485058125e..aad484cd586 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -189,6 +189,9 @@ struct skb_shared_info { atomic_t dataref; unsigned short nr_frags; unsigned short gso_size; +#ifdef CONFIG_HAS_DMA + dma_addr_t dma_head; +#endif /* Warning: this field is not always filled in (UFO)! */ unsigned short gso_segs; unsigned short gso_type; @@ -198,7 +201,7 @@ struct skb_shared_info { struct skb_shared_hwtstamps hwtstamps; skb_frag_t frags[MAX_SKB_FRAGS]; #ifdef CONFIG_HAS_DMA - dma_addr_t dma_maps[MAX_SKB_FRAGS + 1]; + dma_addr_t dma_maps[MAX_SKB_FRAGS]; #endif /* Intermediate layers must ensure that destructor_arg * remains valid until skb destructor */ diff --git a/net/core/skb_dma_map.c b/net/core/skb_dma_map.c index 7adb623ef66..79687dfd695 100644 --- a/net/core/skb_dma_map.c +++ b/net/core/skb_dma_map.c @@ -20,7 +20,7 @@ int skb_dma_map(struct device *dev, struct sk_buff *skb, if (dma_mapping_error(dev, map)) goto out_err; - sp->dma_maps[0] = map; + sp->dma_head = map; for (i = 0; i < sp->nr_frags; i++) { skb_frag_t *fp = &sp->frags[i]; @@ -28,7 +28,7 @@ int skb_dma_map(struct device *dev, struct sk_buff *skb, fp->size, dir); if (dma_mapping_error(dev, map)) goto unwind; - sp->dma_maps[i + 1] = map; + sp->dma_maps[i] = map; } return 0; @@ -37,10 +37,10 @@ unwind: while (--i >= 0) { skb_frag_t *fp = &sp->frags[i]; - dma_unmap_page(dev, sp->dma_maps[i + 1], + dma_unmap_page(dev, sp->dma_maps[i], fp->size, dir); } - dma_unmap_single(dev, sp->dma_maps[0], + dma_unmap_single(dev, sp->dma_head, skb_headlen(skb), dir); out_err: return -ENOMEM; @@ -53,12 +53,12 @@ void skb_dma_unmap(struct device *dev, struct sk_buff *skb, struct skb_shared_info *sp = skb_shinfo(skb); int i; - dma_unmap_single(dev, sp->dma_maps[0], + dma_unmap_single(dev, sp->dma_head, skb_headlen(skb), dir); for (i = 0; i < sp->nr_frags; i++) { skb_frag_t *fp = &sp->frags[i]; - dma_unmap_page(dev, sp->dma_maps[i + 1], + dma_unmap_page(dev, sp->dma_maps[i], fp->size, dir); } } -- cgit v1.2.3-70-g09d2 From 4e329972052c3649367b91de783f6293b8653cb2 Mon Sep 17 00:00:00 2001 From: Tilman Schmidt Date: Sun, 7 Jun 2009 09:09:23 +0000 Subject: isdn: rename capi_ctr_reseted() to capi_ctr_down() Change the name of the Kernel CAPI exported function capi_ctr_reseted() to something representing its purpose better. Impact: renaming, no functional change Signed-off-by: Tilman Schmidt Signed-off-by: David S. Miller --- Documentation/isdn/INTERFACE.CAPI | 4 ++-- drivers/isdn/capi/kcapi.c | 8 ++++---- drivers/isdn/hardware/avm/b1.c | 2 +- drivers/isdn/hardware/avm/b1dma.c | 2 +- drivers/isdn/hardware/avm/c4.c | 4 ++-- drivers/isdn/hardware/avm/t1isa.c | 2 +- drivers/isdn/hysdn/hycapi.c | 4 ++-- include/linux/isdn/capilli.h | 2 +- net/bluetooth/cmtp/capi.c | 2 +- 9 files changed, 15 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/Documentation/isdn/INTERFACE.CAPI b/Documentation/isdn/INTERFACE.CAPI index 786d619b36e..46315381616 100644 --- a/Documentation/isdn/INTERFACE.CAPI +++ b/Documentation/isdn/INTERFACE.CAPI @@ -45,7 +45,7 @@ From then on, Kernel CAPI may call the registered callback functions for the device. If the device becomes unusable for any reason (shutdown, disconnect ...), the -driver has to call capi_ctr_reseted(). This will prevent further calls to the +driver has to call capi_ctr_down(). This will prevent further calls to the callback functions by Kernel CAPI. @@ -166,7 +166,7 @@ int detach_capi_ctr(struct capi_ctr *ctrlr) register/unregister a device (controller) with Kernel CAPI void capi_ctr_ready(struct capi_ctr *ctrlr) -void capi_ctr_reseted(struct capi_ctr *ctrlr) +void capi_ctr_down(struct capi_ctr *ctrlr) signal controller ready/not ready void capi_ctr_suspend_output(struct capi_ctr *ctrlr) diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c index f33170368cd..57d26360f64 100644 --- a/drivers/isdn/capi/kcapi.c +++ b/drivers/isdn/capi/kcapi.c @@ -377,14 +377,14 @@ void capi_ctr_ready(struct capi_ctr * card) EXPORT_SYMBOL(capi_ctr_ready); /** - * capi_ctr_reseted() - signal CAPI controller reset + * capi_ctr_down() - signal CAPI controller not ready * @card: controller descriptor structure. * * Called by hardware driver to signal that the controller is down and * unavailable for use. */ -void capi_ctr_reseted(struct capi_ctr * card) +void capi_ctr_down(struct capi_ctr * card) { u16 appl; @@ -413,7 +413,7 @@ void capi_ctr_reseted(struct capi_ctr * card) notify_push(KCI_CONTRDOWN, card->cnr, 0, 0); } -EXPORT_SYMBOL(capi_ctr_reseted); +EXPORT_SYMBOL(capi_ctr_down); /** * capi_ctr_suspend_output() - suspend controller @@ -517,7 +517,7 @@ EXPORT_SYMBOL(attach_capi_ctr); int detach_capi_ctr(struct capi_ctr *card) { if (card->cardstate != CARD_DETECTED) - capi_ctr_reseted(card); + capi_ctr_down(card); ncards--; diff --git a/drivers/isdn/hardware/avm/b1.c b/drivers/isdn/hardware/avm/b1.c index abf05ec3176..a7c0083e78a 100644 --- a/drivers/isdn/hardware/avm/b1.c +++ b/drivers/isdn/hardware/avm/b1.c @@ -330,7 +330,7 @@ void b1_reset_ctr(struct capi_ctr *ctrl) spin_lock_irqsave(&card->lock, flags); capilib_release(&cinfo->ncci_head); spin_unlock_irqrestore(&card->lock, flags); - capi_ctr_reseted(ctrl); + capi_ctr_down(ctrl); } void b1_register_appl(struct capi_ctr *ctrl, diff --git a/drivers/isdn/hardware/avm/b1dma.c b/drivers/isdn/hardware/avm/b1dma.c index da34b98e3de..0e84aaae43f 100644 --- a/drivers/isdn/hardware/avm/b1dma.c +++ b/drivers/isdn/hardware/avm/b1dma.c @@ -759,7 +759,7 @@ void b1dma_reset_ctr(struct capi_ctr *ctrl) memset(cinfo->version, 0, sizeof(cinfo->version)); capilib_release(&cinfo->ncci_head); spin_unlock_irqrestore(&card->lock, flags); - capi_ctr_reseted(ctrl); + capi_ctr_down(ctrl); } /* ------------------------------------------------------------- */ diff --git a/drivers/isdn/hardware/avm/c4.c b/drivers/isdn/hardware/avm/c4.c index 9df1d3f66c8..6833301a45f 100644 --- a/drivers/isdn/hardware/avm/c4.c +++ b/drivers/isdn/hardware/avm/c4.c @@ -681,7 +681,7 @@ static irqreturn_t c4_handle_interrupt(avmcard *card) spin_lock_irqsave(&card->lock, flags); capilib_release(&cinfo->ncci_head); spin_unlock_irqrestore(&card->lock, flags); - capi_ctr_reseted(&cinfo->capi_ctrl); + capi_ctr_down(&cinfo->capi_ctrl); } card->nlogcontr = 0; return IRQ_HANDLED; @@ -909,7 +909,7 @@ static void c4_reset_ctr(struct capi_ctr *ctrl) for (i=0; i < card->nr_controllers; i++) { cinfo = &card->ctrlinfo[i]; memset(cinfo->version, 0, sizeof(cinfo->version)); - capi_ctr_reseted(&cinfo->capi_ctrl); + capi_ctr_down(&cinfo->capi_ctrl); } card->nlogcontr = 0; } diff --git a/drivers/isdn/hardware/avm/t1isa.c b/drivers/isdn/hardware/avm/t1isa.c index e7724493738..1c53fd49adb 100644 --- a/drivers/isdn/hardware/avm/t1isa.c +++ b/drivers/isdn/hardware/avm/t1isa.c @@ -339,7 +339,7 @@ static void t1isa_reset_ctr(struct capi_ctr *ctrl) spin_lock_irqsave(&card->lock, flags); capilib_release(&cinfo->ncci_head); spin_unlock_irqrestore(&card->lock, flags); - capi_ctr_reseted(ctrl); + capi_ctr_down(ctrl); } static void t1isa_remove(struct pci_dev *pdev) diff --git a/drivers/isdn/hysdn/hycapi.c b/drivers/isdn/hysdn/hycapi.c index 53f6ad1235d..4ffaa14b9fc 100644 --- a/drivers/isdn/hysdn/hycapi.c +++ b/drivers/isdn/hysdn/hycapi.c @@ -67,7 +67,7 @@ hycapi_reset_ctr(struct capi_ctr *ctrl) printk(KERN_NOTICE "HYCAPI hycapi_reset_ctr\n"); #endif capilib_release(&cinfo->ncci_head); - capi_ctr_reseted(ctrl); + capi_ctr_down(ctrl); } /****************************** @@ -347,7 +347,7 @@ int hycapi_capi_stop(hysdn_card *card) if(cinfo) { ctrl = &cinfo->capi_ctrl; /* ctrl->suspend_output(ctrl); */ - capi_ctr_reseted(ctrl); + capi_ctr_down(ctrl); } return 0; } diff --git a/include/linux/isdn/capilli.h b/include/linux/isdn/capilli.h index 35e9b0fd014..7acb87a4487 100644 --- a/include/linux/isdn/capilli.h +++ b/include/linux/isdn/capilli.h @@ -79,7 +79,7 @@ int attach_capi_ctr(struct capi_ctr *); int detach_capi_ctr(struct capi_ctr *); void capi_ctr_ready(struct capi_ctr * card); -void capi_ctr_reseted(struct capi_ctr * card); +void capi_ctr_down(struct capi_ctr * card); void capi_ctr_suspend_output(struct capi_ctr * card); void capi_ctr_resume_output(struct capi_ctr * card); void capi_ctr_handle_message(struct capi_ctr * card, u16 appl, struct sk_buff *skb); diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c index 78958c0f9a4..97f8d68d574 100644 --- a/net/bluetooth/cmtp/capi.c +++ b/net/bluetooth/cmtp/capi.c @@ -382,7 +382,7 @@ static void cmtp_reset_ctr(struct capi_ctr *ctrl) BT_DBG("ctrl %p", ctrl); - capi_ctr_reseted(ctrl); + capi_ctr_down(ctrl); atomic_inc(&session->terminate); cmtp_schedule(session); -- cgit v1.2.3-70-g09d2 From 8db4dc46dcff7568896aa1eae4bd07620ce3dd93 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Mon, 20 Apr 2009 01:31:05 -0300 Subject: Bluetooth: Use macros for L2CAP channel identifiers Use macros instead of hardcoded numbers to make the L2CAP source code more readable. Signed-off-by: Gustavo F. Padovan Signed-off-by: Marcel Holtmann --- include/net/bluetooth/l2cap.h | 6 ++++++ net/bluetooth/l2cap.c | 18 +++++++++--------- 2 files changed, 15 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index f566aa1f0a4..ed4ba913d94 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -106,6 +106,12 @@ struct l2cap_conn_rsp { __le16 status; } __attribute__ ((packed)); +/* channel indentifier */ +#define L2CAP_CID_SIGNALING 0x0001 +#define L2CAP_CID_CONN_LESS 0x0002 +#define L2CAP_CID_DYN_START 0x0040 +#define L2CAP_CID_DYN_END 0xffff + /* connect result */ #define L2CAP_CR_SUCCESS 0x0000 #define L2CAP_CR_PEND 0x0001 diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index ca4d3b40d5c..ff1744e34cd 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -161,9 +161,9 @@ static inline struct sock *l2cap_get_chan_by_ident(struct l2cap_chan_list *l, u8 static u16 l2cap_alloc_cid(struct l2cap_chan_list *l) { - u16 cid = 0x0040; + u16 cid = L2CAP_CID_DYN_START; - for (; cid < 0xffff; cid++) { + for (; cid < L2CAP_CID_DYN_END; cid++) { if(!__l2cap_get_chan_by_scid(l, cid)) return cid; } @@ -215,13 +215,13 @@ static void __l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct so l2cap_pi(sk)->scid = l2cap_alloc_cid(l); } else if (sk->sk_type == SOCK_DGRAM) { /* Connectionless socket */ - l2cap_pi(sk)->scid = 0x0002; - l2cap_pi(sk)->dcid = 0x0002; + l2cap_pi(sk)->scid = L2CAP_CID_CONN_LESS; + l2cap_pi(sk)->dcid = L2CAP_CID_CONN_LESS; l2cap_pi(sk)->omtu = L2CAP_DEFAULT_MTU; } else { /* Raw socket can send/recv signalling messages only */ - l2cap_pi(sk)->scid = 0x0001; - l2cap_pi(sk)->dcid = 0x0001; + l2cap_pi(sk)->scid = L2CAP_CID_SIGNALING; + l2cap_pi(sk)->dcid = L2CAP_CID_SIGNALING; l2cap_pi(sk)->omtu = L2CAP_DEFAULT_MTU; } @@ -1598,7 +1598,7 @@ static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); lh->len = cpu_to_le16(L2CAP_CMD_HDR_SIZE + dlen); - lh->cid = cpu_to_le16(0x0001); + lh->cid = cpu_to_le16(L2CAP_CID_SIGNALING); cmd = (struct l2cap_cmd_hdr *) skb_put(skb, L2CAP_CMD_HDR_SIZE); cmd->code = code; @@ -2420,11 +2420,11 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb) BT_DBG("len %d, cid 0x%4.4x", len, cid); switch (cid) { - case 0x0001: + case L2CAP_CID_SIGNALING: l2cap_sig_channel(conn, skb); break; - case 0x0002: + case L2CAP_CID_CONN_LESS: psm = get_unaligned((__le16 *) skb->data); skb_pull(skb, 2); l2cap_conless_channel(conn, psm, skb); -- cgit v1.2.3-70-g09d2 From 589d27464846c7cb758f93d9ee380c8ff05a161c Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Mon, 20 Apr 2009 01:31:07 -0300 Subject: Bluetooth: Use macro for L2CAP hint mask on receiving config request Using the L2CAP_CONF_HINT macro is easier to understand than using a hardcoded 0x80 value. Signed-off-by: Gustavo F. Padovan Signed-off-by: Marcel Holtmann --- include/net/bluetooth/l2cap.h | 2 ++ net/bluetooth/l2cap.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index ed4ba913d94..300b63f0eec 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -149,6 +149,8 @@ struct l2cap_conf_opt { } __attribute__ ((packed)); #define L2CAP_CONF_OPT_SIZE 2 +#define L2CAP_CONF_HINT 0x80 + #define L2CAP_CONF_MTU 0x01 #define L2CAP_CONF_FLUSH_TO 0x02 #define L2CAP_CONF_QOS 0x03 diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index ff1744e34cd..c0c091533bd 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -1739,7 +1739,7 @@ static int l2cap_parse_conf_req(struct sock *sk, void *data) while (len >= L2CAP_CONF_OPT_SIZE) { len -= l2cap_get_conf_opt(&req, &type, &olen, &val); - hint = type & 0x80; + hint = type & L2CAP_CONF_HINT; type &= 0x7f; switch (type) { -- cgit v1.2.3-70-g09d2 From 47ec1dcd696d56a7c396e5838516a566ecd4b03d Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sat, 2 May 2009 18:57:55 -0700 Subject: Bluetooth: Add basic constants for L2CAP ERTM support and use them This adds the basic constants required to add support for L2CAP Enhanced Retransmission feature. Based on a patch from Nathan Holstein Signed-off-by: Marcel Holtmann --- include/net/bluetooth/l2cap.h | 63 ++++++++++++++++++++++++++++--------------- net/bluetooth/l2cap.c | 6 ++--- 2 files changed, 45 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 300b63f0eec..e919fca1072 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -26,8 +26,13 @@ #define __L2CAP_H /* L2CAP defaults */ -#define L2CAP_DEFAULT_MTU 672 -#define L2CAP_DEFAULT_FLUSH_TO 0xFFFF +#define L2CAP_DEFAULT_MTU 672 +#define L2CAP_DEFAULT_FLUSH_TO 0xffff +#define L2CAP_DEFAULT_RX_WINDOW 1 +#define L2CAP_DEFAULT_MAX_RECEIVE 1 +#define L2CAP_DEFAULT_RETRANS_TO 300 /* 300 milliseconds */ +#define L2CAP_DEFAULT_MONITOR_TO 1000 /* 1 second */ +#define L2CAP_DEFAULT_MAX_RX_APDU 0xfff7 #define L2CAP_CONN_TIMEOUT (40000) /* 40 seconds */ #define L2CAP_INFO_TIMEOUT (4000) /* 4 seconds */ @@ -64,17 +69,29 @@ struct l2cap_conninfo { #define L2CAP_LM_SECURE 0x0020 /* L2CAP command codes */ -#define L2CAP_COMMAND_REJ 0x01 -#define L2CAP_CONN_REQ 0x02 -#define L2CAP_CONN_RSP 0x03 -#define L2CAP_CONF_REQ 0x04 -#define L2CAP_CONF_RSP 0x05 -#define L2CAP_DISCONN_REQ 0x06 -#define L2CAP_DISCONN_RSP 0x07 -#define L2CAP_ECHO_REQ 0x08 -#define L2CAP_ECHO_RSP 0x09 -#define L2CAP_INFO_REQ 0x0a -#define L2CAP_INFO_RSP 0x0b +#define L2CAP_COMMAND_REJ 0x01 +#define L2CAP_CONN_REQ 0x02 +#define L2CAP_CONN_RSP 0x03 +#define L2CAP_CONF_REQ 0x04 +#define L2CAP_CONF_RSP 0x05 +#define L2CAP_DISCONN_REQ 0x06 +#define L2CAP_DISCONN_RSP 0x07 +#define L2CAP_ECHO_REQ 0x08 +#define L2CAP_ECHO_RSP 0x09 +#define L2CAP_INFO_REQ 0x0a +#define L2CAP_INFO_RSP 0x0b + +/* L2CAP feature mask */ +#define L2CAP_FEAT_FLOWCTL 0x00000001 +#define L2CAP_FEAT_RETRANS 0x00000002 +#define L2CAP_FEAT_ERTM 0x00000008 +#define L2CAP_FEAT_STREAMING 0x00000010 +#define L2CAP_FEAT_FCS 0x00000020 +#define L2CAP_FEAT_FIXED_CHAN 0x00000080 + +/* L2CAP checksum option */ +#define L2CAP_FCS_NONE 0x00 +#define L2CAP_FCS_CRC16 0x01 /* L2CAP structures */ struct l2cap_hdr { @@ -113,16 +130,16 @@ struct l2cap_conn_rsp { #define L2CAP_CID_DYN_END 0xffff /* connect result */ -#define L2CAP_CR_SUCCESS 0x0000 -#define L2CAP_CR_PEND 0x0001 -#define L2CAP_CR_BAD_PSM 0x0002 -#define L2CAP_CR_SEC_BLOCK 0x0003 -#define L2CAP_CR_NO_MEM 0x0004 +#define L2CAP_CR_SUCCESS 0x0000 +#define L2CAP_CR_PEND 0x0001 +#define L2CAP_CR_BAD_PSM 0x0002 +#define L2CAP_CR_SEC_BLOCK 0x0003 +#define L2CAP_CR_NO_MEM 0x0004 /* connect status */ -#define L2CAP_CS_NO_INFO 0x0000 -#define L2CAP_CS_AUTHEN_PEND 0x0001 -#define L2CAP_CS_AUTHOR_PEND 0x0002 +#define L2CAP_CS_NO_INFO 0x0000 +#define L2CAP_CS_AUTHEN_PEND 0x0001 +#define L2CAP_CS_AUTHOR_PEND 0x0002 struct l2cap_conf_req { __le16 dcid; @@ -150,11 +167,13 @@ struct l2cap_conf_opt { #define L2CAP_CONF_OPT_SIZE 2 #define L2CAP_CONF_HINT 0x80 +#define L2CAP_CONF_MASK 0x7f #define L2CAP_CONF_MTU 0x01 #define L2CAP_CONF_FLUSH_TO 0x02 #define L2CAP_CONF_QOS 0x03 #define L2CAP_CONF_RFC 0x04 +#define L2CAP_CONF_FCS 0x05 #define L2CAP_CONF_MAX_SIZE 22 @@ -170,6 +189,8 @@ struct l2cap_conf_rfc { #define L2CAP_MODE_BASIC 0x00 #define L2CAP_MODE_RETRANS 0x01 #define L2CAP_MODE_FLOWCTL 0x02 +#define L2CAP_MODE_ERTM 0x03 +#define L2CAP_MODE_STREAM 0x04 struct l2cap_disconn_req { __le16 dcid; diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index f6a82f203fd..338295007c4 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -52,7 +52,7 @@ #define VERSION "2.13" -static u32 l2cap_feat_mask = 0x0080; +static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN; static u8 l2cap_fixed_chan[8] = { 0x02, }; static const struct proto_ops l2cap_sock_ops; @@ -1747,7 +1747,7 @@ static int l2cap_parse_conf_req(struct sock *sk, void *data) len -= l2cap_get_conf_opt(&req, &type, &olen, &val); hint = type & L2CAP_CONF_HINT; - type &= 0x7f; + type &= L2CAP_CONF_MASK; switch (type) { case L2CAP_CONF_MTU: @@ -2244,7 +2244,7 @@ static inline int l2cap_information_rsp(struct l2cap_conn *conn, struct l2cap_cm if (type == L2CAP_IT_FEAT_MASK) { conn->feat_mask = get_unaligned_le32(rsp->data); - if (conn->feat_mask & 0x0080) { + if (conn->feat_mask & L2CAP_FEAT_FIXED_CHAN) { struct l2cap_info_req req; req.type = cpu_to_le16(L2CAP_IT_FIXED_CHAN); -- cgit v1.2.3-70-g09d2 From b4324b5dc5b62ba76372e1bf8927230cf744df66 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 7 Jun 2009 18:06:51 +0200 Subject: Bluetooth: Remove pointless endian conversion helpers The Bluetooth source uses some endian conversion helpers, that in the end translate to kernel standard routines. So remove this obfuscation since it is fully pointless. Signed-off-by: Marcel Holtmann --- include/net/bluetooth/bluetooth.h | 6 ------ net/bluetooth/l2cap.c | 19 ++++++++++--------- net/bluetooth/rfcomm/core.c | 12 ++++++------ 3 files changed, 16 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 3ad5390a4dd..cf8bcb25533 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -81,12 +81,6 @@ enum { BT_CLOSED }; -/* Endianness conversions */ -#define htobs(a) __cpu_to_le16(a) -#define htobl(a) __cpu_to_le32(a) -#define btohs(a) __le16_to_cpu(a) -#define btohl(a) __le32_to_cpu(a) - /* BD Address */ typedef struct { __u8 b[6]; diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 338295007c4..bd0a4c1bced 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -812,7 +812,7 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) goto done; } - if (la.l2_psm && btohs(la.l2_psm) < 0x1001 && + if (la.l2_psm && __le16_to_cpu(la.l2_psm) < 0x1001 && !capable(CAP_NET_BIND_SERVICE)) { err = -EACCES; goto done; @@ -829,7 +829,8 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) l2cap_pi(sk)->sport = la.l2_psm; sk->sk_state = BT_BOUND; - if (btohs(la.l2_psm) == 0x0001 || btohs(la.l2_psm) == 0x0003) + if (__le16_to_cpu(la.l2_psm) == 0x0001 || + __le16_to_cpu(la.l2_psm) == 0x0003) l2cap_pi(sk)->sec_level = BT_SECURITY_SDP; } @@ -1015,9 +1016,9 @@ static int l2cap_sock_listen(struct socket *sock, int backlog) write_lock_bh(&l2cap_sk_list.lock); for (psm = 0x1001; psm < 0x1100; psm += 2) - if (!__l2cap_get_sock_by_addr(htobs(psm), src)) { - l2cap_pi(sk)->psm = htobs(psm); - l2cap_pi(sk)->sport = htobs(psm); + if (!__l2cap_get_sock_by_addr(cpu_to_le16(psm), src)) { + l2cap_pi(sk)->psm = cpu_to_le16(psm); + l2cap_pi(sk)->sport = cpu_to_le16(psm); err = 0; break; } @@ -1106,11 +1107,11 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *l if (peer) { la->l2_psm = l2cap_pi(sk)->psm; bacpy(&la->l2_bdaddr, &bt_sk(sk)->dst); - la->l2_cid = htobs(l2cap_pi(sk)->dcid); + la->l2_cid = cpu_to_le16(l2cap_pi(sk)->dcid); } else { la->l2_psm = l2cap_pi(sk)->sport; bacpy(&la->l2_bdaddr, &bt_sk(sk)->src); - la->l2_cid = htobs(l2cap_pi(sk)->scid); + la->l2_cid = cpu_to_le16(l2cap_pi(sk)->scid); } return 0; @@ -2720,8 +2721,8 @@ static ssize_t l2cap_sysfs_show(struct class *dev, char *buf) str += sprintf(str, "%s %s %d %d 0x%4.4x 0x%4.4x %d %d %d\n", batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst), - sk->sk_state, btohs(pi->psm), pi->scid, pi->dcid, - pi->imtu, pi->omtu, pi->sec_level); + sk->sk_state, __le16_to_cpu(pi->psm), pi->scid, + pi->dcid, pi->imtu, pi->omtu, pi->sec_level); } read_unlock_bh(&l2cap_sk_list.lock); diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 374536e050a..e50566ebf9f 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -679,7 +679,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bdaddr_t *dst bacpy(&addr.l2_bdaddr, dst); addr.l2_family = AF_BLUETOOTH; - addr.l2_psm = htobs(RFCOMM_PSM); + addr.l2_psm = cpu_to_le16(RFCOMM_PSM); addr.l2_cid = 0; *err = kernel_connect(sock, (struct sockaddr *) &addr, sizeof(addr), O_NONBLOCK); if (*err == 0 || *err == -EINPROGRESS) @@ -852,9 +852,9 @@ static int rfcomm_send_pn(struct rfcomm_session *s, int cr, struct rfcomm_dlc *d } if (cr && channel_mtu >= 0) - pn->mtu = htobs(channel_mtu); + pn->mtu = cpu_to_le16(channel_mtu); else - pn->mtu = htobs(d->mtu); + pn->mtu = cpu_to_le16(d->mtu); *ptr = __fcs(buf); ptr++; @@ -1056,7 +1056,7 @@ static void rfcomm_make_uih(struct sk_buff *skb, u8 addr) if (len > 127) { hdr = (void *) skb_push(skb, 4); - put_unaligned(htobs(__len16(len)), (__le16 *) &hdr->len); + put_unaligned(cpu_to_le16(__len16(len)), (__le16 *) &hdr->len); } else { hdr = (void *) skb_push(skb, 3); hdr->len = __len8(len); @@ -1289,7 +1289,7 @@ static int rfcomm_apply_pn(struct rfcomm_dlc *d, int cr, struct rfcomm_pn *pn) d->priority = pn->priority; - d->mtu = btohs(pn->mtu); + d->mtu = __le16_to_cpu(pn->mtu); if (cr && d->mtu > s->mtu) d->mtu = s->mtu; @@ -1922,7 +1922,7 @@ static int rfcomm_add_listener(bdaddr_t *ba) /* Bind socket */ bacpy(&addr.l2_bdaddr, ba); addr.l2_family = AF_BLUETOOTH; - addr.l2_psm = htobs(RFCOMM_PSM); + addr.l2_psm = cpu_to_le16(RFCOMM_PSM); addr.l2_cid = 0; err = kernel_bind(sock, (struct sockaddr *) &addr, sizeof(addr)); if (err < 0) { -- cgit v1.2.3-70-g09d2 From 611b30f74b5d8ca036a9923b3bf6e0ee10a21a53 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Mon, 8 Jun 2009 14:41:38 +0200 Subject: Bluetooth: Add native RFKILL soft-switch support for all devices With the re-write of the RFKILL subsystem it is now possible to easily integrate RFKILL soft-switch support into the Bluetooth subsystem. All Bluetooth devices will now get automatically RFKILL support. Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 2 ++ net/bluetooth/hci_core.c | 41 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 42 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 73aead222b3..c4ca4228b08 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -137,6 +137,8 @@ struct hci_dev { struct device *parent; struct device dev; + struct rfkill *rfkill; + struct module *owner; int (*open)(struct hci_dev *hdev); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index cd061510b6b..406ad07cdea 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -476,6 +477,11 @@ int hci_dev_open(__u16 dev) hci_req_lock(hdev); + if (hdev->rfkill && rfkill_blocked(hdev->rfkill)) { + ret = -ERFKILL; + goto done; + } + if (test_bit(HCI_UP, &hdev->flags)) { ret = -EALREADY; goto done; @@ -813,6 +819,24 @@ int hci_get_dev_info(void __user *arg) /* ---- Interface to HCI drivers ---- */ +static int hci_rfkill_set_block(void *data, bool blocked) +{ + struct hci_dev *hdev = data; + + BT_DBG("%p name %s blocked %d", hdev, hdev->name, blocked); + + if (!blocked) + return 0; + + hci_dev_do_close(hdev); + + return 0; +} + +static const struct rfkill_ops hci_rfkill_ops = { + .set_block = hci_rfkill_set_block, +}; + /* Alloc HCI device */ struct hci_dev *hci_alloc_dev(void) { @@ -844,7 +868,8 @@ int hci_register_dev(struct hci_dev *hdev) struct list_head *head = &hci_dev_list, *p; int i, id = 0; - BT_DBG("%p name %s type %d owner %p", hdev, hdev->name, hdev->type, hdev->owner); + BT_DBG("%p name %s type %d owner %p", hdev, hdev->name, + hdev->type, hdev->owner); if (!hdev->open || !hdev->close || !hdev->destruct) return -EINVAL; @@ -900,6 +925,15 @@ int hci_register_dev(struct hci_dev *hdev) hci_register_sysfs(hdev); + hdev->rfkill = rfkill_alloc(hdev->name, &hdev->dev, + RFKILL_TYPE_BLUETOOTH, &hci_rfkill_ops, hdev); + if (hdev->rfkill) { + if (rfkill_register(hdev->rfkill) < 0) { + rfkill_destroy(hdev->rfkill); + hdev->rfkill = NULL; + } + } + hci_notify(hdev, HCI_DEV_REG); return id; @@ -924,6 +958,11 @@ int hci_unregister_dev(struct hci_dev *hdev) hci_notify(hdev, HCI_DEV_UNREG); + if (hdev->rfkill) { + rfkill_unregister(hdev->rfkill); + rfkill_destroy(hdev->rfkill); + } + hci_unregister_sysfs(hdev); __hci_dev_put(hdev); -- cgit v1.2.3-70-g09d2 From f87fb666bb00a7afcbd7992d236e42ac544996f9 Mon Sep 17 00:00:00 2001 From: Jan Kasprzak Date: Mon, 8 Jun 2009 15:53:43 +0200 Subject: netfilter: nf_ct_icmp: keep the ICMP ct entries longer Current conntrack code kills the ICMP conntrack entry as soon as the first reply is received. This is incorrect, as we then see only the first ICMP echo reply out of several possible duplicates as ESTABLISHED, while the rest will be INVALID. Also this unnecessarily increases the conntrackd traffic on H-A firewalls. Make all the ICMP conntrack entries (including the replied ones) last for the default of nf_conntrack_icmp{,v6}_timeout seconds. Signed-off-by: Jan "Yenya" Kasprzak Signed-off-by: Patrick McHardy --- include/net/netfilter/ipv4/nf_conntrack_icmp.h | 11 ----------- include/net/netfilter/ipv6/nf_conntrack_icmpv6.h | 7 ------- include/net/netfilter/nf_conntrack.h | 3 --- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 16 ++++------------ net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 16 ++++------------ 5 files changed, 8 insertions(+), 45 deletions(-) delete mode 100644 include/net/netfilter/ipv4/nf_conntrack_icmp.h (limited to 'include') diff --git a/include/net/netfilter/ipv4/nf_conntrack_icmp.h b/include/net/netfilter/ipv4/nf_conntrack_icmp.h deleted file mode 100644 index 3dd22cff23e..00000000000 --- a/include/net/netfilter/ipv4/nf_conntrack_icmp.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef _NF_CONNTRACK_ICMP_H -#define _NF_CONNTRACK_ICMP_H -/* ICMP tracking. */ -#include - -struct ip_ct_icmp -{ - /* Optimization: when number in == number out, forget immediately. */ - atomic_t count; -}; -#endif /* _NF_CONNTRACK_ICMP_H */ diff --git a/include/net/netfilter/ipv6/nf_conntrack_icmpv6.h b/include/net/netfilter/ipv6/nf_conntrack_icmpv6.h index 86591afda29..67edd50a398 100644 --- a/include/net/netfilter/ipv6/nf_conntrack_icmpv6.h +++ b/include/net/netfilter/ipv6/nf_conntrack_icmpv6.h @@ -9,7 +9,6 @@ #ifndef _NF_CONNTRACK_ICMPV6_H #define _NF_CONNTRACK_ICMPV6_H -#include #ifndef ICMPV6_NI_QUERY #define ICMPV6_NI_QUERY 139 @@ -18,10 +17,4 @@ #define ICMPV6_NI_REPLY 140 #endif -struct nf_ct_icmpv6 -{ - /* Optimization: when number in == number out, forget immediately. */ - atomic_t count; -}; - #endif /* _NF_CONNTRACK_ICMPV6_H */ diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 2ba36dd33ae..2b877374242 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -23,7 +23,6 @@ #include #include #include -#include #include #include @@ -34,8 +33,6 @@ union nf_conntrack_proto { struct nf_ct_dccp dccp; struct ip_ct_sctp sctp; struct ip_ct_tcp tcp; - struct ip_ct_icmp icmp; - struct nf_ct_icmpv6 icmpv6; struct nf_ct_gre gre; }; diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index c6ab3d99e79..d71ba767734 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -82,17 +82,10 @@ static int icmp_packet(struct nf_conn *ct, u_int8_t pf, unsigned int hooknum) { - /* Try to delete connection immediately after all replies: - won't actually vanish as we still have skb, and del_timer - means this will only run once even if count hits zero twice - (theoretically possible with SMP) */ - if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) { - if (atomic_dec_and_test(&ct->proto.icmp.count)) - nf_ct_kill_acct(ct, ctinfo, skb); - } else { - atomic_inc(&ct->proto.icmp.count); - nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout); - } + /* Do not immediately delete the connection after the first + successful reply to avoid excessive conntrackd traffic + and also to handle correctly ICMP echo reply duplicates. */ + nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout); return NF_ACCEPT; } @@ -116,7 +109,6 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb, nf_ct_dump_tuple_ip(&ct->tuplehash[0].tuple); return false; } - atomic_set(&ct->proto.icmp.count, 0); return true; } diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index a0acd9655fe..642dcb127ba 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -95,17 +95,10 @@ static int icmpv6_packet(struct nf_conn *ct, u_int8_t pf, unsigned int hooknum) { - /* Try to delete connection immediately after all replies: - won't actually vanish as we still have skb, and del_timer - means this will only run once even if count hits zero twice - (theoretically possible with SMP) */ - if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) { - if (atomic_dec_and_test(&ct->proto.icmp.count)) - nf_ct_kill_acct(ct, ctinfo, skb); - } else { - atomic_inc(&ct->proto.icmp.count); - nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmpv6_timeout); - } + /* Do not immediately delete the connection after the first + successful reply to avoid excessive conntrackd traffic + and also to handle correctly ICMP echo reply duplicates. */ + nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmpv6_timeout); return NF_ACCEPT; } @@ -131,7 +124,6 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb, type + 128); return false; } - atomic_set(&ct->proto.icmp.count, 0); return true; } -- cgit v1.2.3-70-g09d2 From 11eeef41d5f63c7d2f7fdfcc733eb7fb137cc384 Mon Sep 17 00:00:00 2001 From: Evgeniy Polyakov Date: Mon, 8 Jun 2009 17:01:51 +0200 Subject: netfilter: passive OS fingerprint xtables match Passive OS fingerprinting netfilter module allows to passively detect remote OS and perform various netfilter actions based on that knowledge. This module compares some data (WS, MSS, options and it's order, ttl, df and others) from packets with SYN bit set with dynamically loaded OS fingerprints. Fingerprint matching rules can be downloaded from OpenBSD source tree or found in archive and loaded via netfilter netlink subsystem into the kernel via special util found in archive. Archive contains library file (also attached), which was shipped with iptables extensions some time ago (at least when ipt_osf existed in patch-o-matic). Following changes were made in this release: * added NLM_F_CREATE/NLM_F_EXCL checks * dropped _rcu list traversing helpers in the protected add/remove calls * dropped unneded structures, debug prints, obscure comment and check Fingerprints can be downloaded from http://www.openbsd.org/cgi-bin/cvsweb/src/etc/pf.os or can be found in archive Example usage: -d switch removes fingerprints Please consider for inclusion. Thank you. Passive OS fingerprint homepage (archives, examples): http://www.ioremap.net/projects/osf Signed-off-by: Evgeniy Polyakov Signed-off-by: Patrick McHardy --- include/linux/netfilter/Kbuild | 1 + include/linux/netfilter/nfnetlink.h | 3 +- include/linux/netfilter/xt_osf.h | 133 +++++++++++ net/netfilter/Kconfig | 13 ++ net/netfilter/Makefile | 1 + net/netfilter/xt_osf.c | 428 ++++++++++++++++++++++++++++++++++++ 6 files changed, 578 insertions(+), 1 deletion(-) create mode 100644 include/linux/netfilter/xt_osf.h create mode 100644 net/netfilter/xt_osf.c (limited to 'include') diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index af9d2fb9721..2aea50399c0 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -33,6 +33,7 @@ header-y += xt_limit.h header-y += xt_mac.h header-y += xt_mark.h header-y += xt_multiport.h +header-y += xt_osf.h header-y += xt_owner.h header-y += xt_pkttype.h header-y += xt_quota.h diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 2214e516146..bff4d5741d9 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -46,7 +46,8 @@ struct nfgenmsg { #define NFNL_SUBSYS_CTNETLINK_EXP 2 #define NFNL_SUBSYS_QUEUE 3 #define NFNL_SUBSYS_ULOG 4 -#define NFNL_SUBSYS_COUNT 5 +#define NFNL_SUBSYS_OSF 5 +#define NFNL_SUBSYS_COUNT 6 #ifdef __KERNEL__ diff --git a/include/linux/netfilter/xt_osf.h b/include/linux/netfilter/xt_osf.h new file mode 100644 index 00000000000..fd2272e0959 --- /dev/null +++ b/include/linux/netfilter/xt_osf.h @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2003+ Evgeniy Polyakov + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _XT_OSF_H +#define _XT_OSF_H + +#define MAXGENRELEN 32 + +#define XT_OSF_GENRE (1<<0) +#define XT_OSF_TTL (1<<1) +#define XT_OSF_LOG (1<<2) +#define XT_OSF_INVERT (1<<3) + +#define XT_OSF_LOGLEVEL_ALL 0 /* log all matched fingerprints */ +#define XT_OSF_LOGLEVEL_FIRST 1 /* log only the first matced fingerprint */ +#define XT_OSF_LOGLEVEL_ALL_KNOWN 2 /* do not log unknown packets */ + +#define XT_OSF_TTL_TRUE 0 /* True ip and fingerprint TTL comparison */ +#define XT_OSF_TTL_LESS 1 /* Check if ip TTL is less than fingerprint one */ +#define XT_OSF_TTL_NOCHECK 2 /* Do not compare ip and fingerprint TTL at all */ + +struct xt_osf_info { + char genre[MAXGENRELEN]; + __u32 len; + __u32 flags; + __u32 loglevel; + __u32 ttl; +}; + +/* + * Wildcard MSS (kind of). + * It is used to implement a state machine for the different wildcard values + * of the MSS and window sizes. + */ +struct xt_osf_wc { + __u32 wc; + __u32 val; +}; + +/* + * This struct represents IANA options + * http://www.iana.org/assignments/tcp-parameters + */ +struct xt_osf_opt { + __u16 kind, length; + struct xt_osf_wc wc; +}; + +struct xt_osf_user_finger { + struct xt_osf_wc wss; + + __u8 ttl, df; + __u16 ss, mss; + __u16 opt_num; + + char genre[MAXGENRELEN]; + char version[MAXGENRELEN]; + char subtype[MAXGENRELEN]; + + /* MAX_IPOPTLEN is maximum if all options are NOPs or EOLs */ + struct xt_osf_opt opt[MAX_IPOPTLEN]; +}; + +struct xt_osf_nlmsg { + struct xt_osf_user_finger f; + struct iphdr ip; + struct tcphdr tcp; +}; + +/* Defines for IANA option kinds */ + +enum iana_options { + OSFOPT_EOL = 0, /* End of options */ + OSFOPT_NOP, /* NOP */ + OSFOPT_MSS, /* Maximum segment size */ + OSFOPT_WSO, /* Window scale option */ + OSFOPT_SACKP, /* SACK permitted */ + OSFOPT_SACK, /* SACK */ + OSFOPT_ECHO, + OSFOPT_ECHOREPLY, + OSFOPT_TS, /* Timestamp option */ + OSFOPT_POCP, /* Partial Order Connection Permitted */ + OSFOPT_POSP, /* Partial Order Service Profile */ + + /* Others are not used in the current OSF */ + OSFOPT_EMPTY = 255, +}; + +/* + * Initial window size option state machine: multiple of mss, mtu or + * plain numeric value. Can also be made as plain numeric value which + * is not a multiple of specified value. + */ +enum xt_osf_window_size_options { + OSF_WSS_PLAIN = 0, + OSF_WSS_MSS, + OSF_WSS_MTU, + OSF_WSS_MODULO, + OSF_WSS_MAX, +}; + +/* + * Add/remove fingerprint from the kernel. + */ +enum xt_osf_msg_types { + OSF_MSG_ADD, + OSF_MSG_REMOVE, + OSF_MSG_MAX, +}; + +enum xt_osf_attr_type { + OSF_ATTR_UNSPEC, + OSF_ATTR_FINGER, + OSF_ATTR_MAX, +}; + +#endif /* _XT_OSF_H */ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index cb3ad741ebf..79ba47f042c 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -917,6 +917,19 @@ config NETFILTER_XT_MATCH_U32 Details and examples are in the kernel module source. +config NETFILTER_XT_MATCH_OSF + tristate '"osf" Passive OS fingerprint match' + depends on NETFILTER_ADVANCED && NETFILTER_NETLINK + help + This option selects the Passive OS Fingerprinting match module + that allows to passively match the remote operating system by + analyzing incoming TCP SYN packets. + + Rules and loading software can be downloaded from + http://www.ioremap.net/projects/osf + + To compile it as a module, choose M here. If unsure, say N. + endif # NETFILTER_XTABLES endmenu diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 6282060fbda..49f62ee4e9f 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -77,6 +77,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o obj-$(CONFIG_NETFILTER_XT_MATCH_MARK) += xt_mark.o obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o +obj-$(CONFIG_NETFILTER_XT_MATCH_OSF) += xt_osf.o obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c new file mode 100644 index 00000000000..863e40977a4 --- /dev/null +++ b/net/netfilter/xt_osf.c @@ -0,0 +1,428 @@ +/* + * Copyright (c) 2003+ Evgeniy Polyakov + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +struct xt_osf_finger { + struct rcu_head rcu_head; + struct list_head finger_entry; + struct xt_osf_user_finger finger; +}; + +enum osf_fmatch_states { + /* Packet does not match the fingerprint */ + FMATCH_WRONG = 0, + /* Packet matches the fingerprint */ + FMATCH_OK, + /* Options do not match the fingerprint, but header does */ + FMATCH_OPT_WRONG, +}; + +/* + * Indexed by dont-fragment bit. + * It is the only constant value in the fingerprint. + */ +static struct list_head xt_osf_fingers[2]; + +static const struct nla_policy xt_osf_policy[OSF_ATTR_MAX + 1] = { + [OSF_ATTR_FINGER] = { .len = sizeof(struct xt_osf_user_finger) }, +}; + +static void xt_osf_finger_free_rcu(struct rcu_head *rcu_head) +{ + struct xt_osf_finger *f = container_of(rcu_head, struct xt_osf_finger, rcu_head); + + kfree(f); +} + +static int xt_osf_add_callback(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nlattr *osf_attrs[]) +{ + struct xt_osf_user_finger *f; + struct xt_osf_finger *kf = NULL, *sf; + int err = 0; + + if (!osf_attrs[OSF_ATTR_FINGER]) + return -EINVAL; + + if (!(nlh->nlmsg_flags & NLM_F_CREATE)) + return -EINVAL; + + f = nla_data(osf_attrs[OSF_ATTR_FINGER]); + + kf = kmalloc(sizeof(struct xt_osf_finger), GFP_KERNEL); + if (!kf) + return -ENOMEM; + + memcpy(&kf->finger, f, sizeof(struct xt_osf_user_finger)); + + list_for_each_entry(sf, &xt_osf_fingers[!!f->df], finger_entry) { + if (memcmp(&sf->finger, f, sizeof(struct xt_osf_user_finger))) + continue; + + kfree(kf); + kf = NULL; + + if (nlh->nlmsg_flags & NLM_F_EXCL) + err = -EEXIST; + break; + } + + /* + * We are protected by nfnl mutex. + */ + if (kf) + list_add_tail_rcu(&kf->finger_entry, &xt_osf_fingers[!!f->df]); + + return err; +} + +static int xt_osf_remove_callback(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nlattr *osf_attrs[]) +{ + struct xt_osf_user_finger *f; + struct xt_osf_finger *sf; + int err = ENOENT; + + if (!osf_attrs[OSF_ATTR_FINGER]) + return -EINVAL; + + f = nla_data(osf_attrs[OSF_ATTR_FINGER]); + + list_for_each_entry(sf, &xt_osf_fingers[!!f->df], finger_entry) { + if (memcmp(&sf->finger, f, sizeof(struct xt_osf_user_finger))) + continue; + + /* + * We are protected by nfnl mutex. + */ + list_del_rcu(&sf->finger_entry); + call_rcu(&sf->rcu_head, xt_osf_finger_free_rcu); + + err = 0; + break; + } + + return err; +} + +static const struct nfnl_callback xt_osf_nfnetlink_callbacks[OSF_MSG_MAX] = { + [OSF_MSG_ADD] = { + .call = xt_osf_add_callback, + .attr_count = OSF_ATTR_MAX, + .policy = xt_osf_policy, + }, + [OSF_MSG_REMOVE] = { + .call = xt_osf_remove_callback, + .attr_count = OSF_ATTR_MAX, + .policy = xt_osf_policy, + }, +}; + +static const struct nfnetlink_subsystem xt_osf_nfnetlink = { + .name = "osf", + .subsys_id = NFNL_SUBSYS_OSF, + .cb_count = OSF_MSG_MAX, + .cb = xt_osf_nfnetlink_callbacks, +}; + +static inline int xt_osf_ttl(const struct sk_buff *skb, const struct xt_osf_info *info, + unsigned char f_ttl) +{ + const struct iphdr *ip = ip_hdr(skb); + + if (info->flags & XT_OSF_TTL) { + if (info->ttl == XT_OSF_TTL_TRUE) + return ip->ttl == f_ttl; + if (info->ttl == XT_OSF_TTL_NOCHECK) + return 1; + else if (ip->ttl <= f_ttl) + return 1; + else { + struct in_device *in_dev = __in_dev_get_rcu(skb->dev); + int ret = 0; + + for_ifa(in_dev) { + if (inet_ifa_match(ip->saddr, ifa)) { + ret = (ip->ttl == f_ttl); + break; + } + } + endfor_ifa(in_dev); + + return ret; + } + } + + return ip->ttl == f_ttl; +} + +static bool xt_osf_match_packet(const struct sk_buff *skb, + const struct xt_match_param *p) +{ + const struct xt_osf_info *info = p->matchinfo; + const struct iphdr *ip = ip_hdr(skb); + const struct tcphdr *tcp; + struct tcphdr _tcph; + int fmatch = FMATCH_WRONG, fcount = 0; + unsigned int optsize = 0, check_WSS = 0; + u16 window, totlen, mss = 0; + bool df; + const unsigned char *optp = NULL, *_optp = NULL; + unsigned char opts[MAX_IPOPTLEN]; + const struct xt_osf_finger *kf; + const struct xt_osf_user_finger *f; + + if (!info) + return false; + + tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph); + if (!tcp) + return false; + + if (!tcp->syn) + return false; + + totlen = ntohs(ip->tot_len); + df = ntohs(ip->frag_off) & IP_DF; + window = ntohs(tcp->window); + + if (tcp->doff * 4 > sizeof(struct tcphdr)) { + optsize = tcp->doff * 4 - sizeof(struct tcphdr); + + _optp = optp = skb_header_pointer(skb, ip_hdrlen(skb) + + sizeof(struct tcphdr), optsize, opts); + } + + rcu_read_lock(); + list_for_each_entry_rcu(kf, &xt_osf_fingers[df], finger_entry) { + f = &kf->finger; + + if (!(info->flags & XT_OSF_LOG) && strcmp(info->genre, f->genre)) + continue; + + optp = _optp; + fmatch = FMATCH_WRONG; + + if (totlen == f->ss && xt_osf_ttl(skb, info, f->ttl)) { + int foptsize, optnum; + + /* + * Should not happen if userspace parser was written correctly. + */ + if (f->wss.wc >= OSF_WSS_MAX) + continue; + + /* Check options */ + + foptsize = 0; + for (optnum = 0; optnum < f->opt_num; ++optnum) + foptsize += f->opt[optnum].length; + + if (foptsize > MAX_IPOPTLEN || + optsize > MAX_IPOPTLEN || + optsize != foptsize) + continue; + + check_WSS = f->wss.wc; + + for (optnum = 0; optnum < f->opt_num; ++optnum) { + if (f->opt[optnum].kind == (*optp)) { + __u32 len = f->opt[optnum].length; + const __u8 *optend = optp + len; + int loop_cont = 0; + + fmatch = FMATCH_OK; + + switch (*optp) { + case OSFOPT_MSS: + mss = optp[3]; + mss <<= 8; + mss |= optp[2]; + + mss = ntohs(mss); + break; + case OSFOPT_TS: + loop_cont = 1; + break; + } + + optp = optend; + } else + fmatch = FMATCH_OPT_WRONG; + + if (fmatch != FMATCH_OK) + break; + } + + if (fmatch != FMATCH_OPT_WRONG) { + fmatch = FMATCH_WRONG; + + switch (check_WSS) { + case OSF_WSS_PLAIN: + if (f->wss.val == 0 || window == f->wss.val) + fmatch = FMATCH_OK; + break; + case OSF_WSS_MSS: + /* + * Some smart modems decrease mangle MSS to + * SMART_MSS_2, so we check standard, decreased + * and the one provided in the fingerprint MSS + * values. + */ +#define SMART_MSS_1 1460 +#define SMART_MSS_2 1448 + if (window == f->wss.val * mss || + window == f->wss.val * SMART_MSS_1 || + window == f->wss.val * SMART_MSS_2) + fmatch = FMATCH_OK; + break; + case OSF_WSS_MTU: + if (window == f->wss.val * (mss + 40) || + window == f->wss.val * (SMART_MSS_1 + 40) || + window == f->wss.val * (SMART_MSS_2 + 40)) + fmatch = FMATCH_OK; + break; + case OSF_WSS_MODULO: + if ((window % f->wss.val) == 0) + fmatch = FMATCH_OK; + break; + } + } + + if (fmatch != FMATCH_OK) + continue; + + fcount++; + + if (info->flags & XT_OSF_LOG) + nf_log_packet(p->hooknum, 0, skb, p->in, p->out, NULL, + "%s [%s:%s] : %pi4:%d -> %pi4:%d hops=%d\n", + f->genre, f->version, f->subtype, + &ip->saddr, ntohs(tcp->source), + &ip->daddr, ntohs(tcp->dest), + f->ttl - ip->ttl); + + if ((info->flags & XT_OSF_LOG) && + info->loglevel == XT_OSF_LOGLEVEL_FIRST) + break; + } + } + rcu_read_unlock(); + + if (!fcount && (info->flags & XT_OSF_LOG)) + nf_log_packet(p->hooknum, 0, skb, p->in, p->out, NULL, + "Remote OS is not known: %pi4:%u -> %pi4:%u\n", + &ip->saddr, ntohs(tcp->source), + &ip->daddr, ntohs(tcp->dest)); + + if (fcount) + fmatch = FMATCH_OK; + + return fmatch == FMATCH_OK; +} + +static struct xt_match xt_osf_match = { + .name = "osf", + .revision = 0, + .family = NFPROTO_IPV4, + .proto = IPPROTO_TCP, + .hooks = (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_FORWARD), + .match = xt_osf_match_packet, + .matchsize = sizeof(struct xt_osf_info), + .me = THIS_MODULE, +}; + +static int __init xt_osf_init(void) +{ + int err = -EINVAL; + int i; + + for (i=0; ifinger_entry); + call_rcu(&f->rcu_head, xt_osf_finger_free_rcu); + } + } + rcu_read_unlock(); + + rcu_barrier(); +} + +module_init(xt_osf_init); +module_exit(xt_osf_fini); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Evgeniy Polyakov "); +MODULE_DESCRIPTION("Passive OS fingerprint matching."); +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF); -- cgit v1.2.3-70-g09d2 From 226c7ffe74474257b4b87bd38ae8ba0030cf65e2 Mon Sep 17 00:00:00 2001 From: Joe Eykholt Date: Wed, 6 May 2009 10:52:51 -0700 Subject: [SCSI] net, libfcoe: Add the FCoE Initialization Protocol ethertype FIP is the FCoE Initialization Protocol and this patch adds the protocol ethertype to the kernel's list of ethertypes. Signed-off-by: Joe Eykholt Signed-off-by: Robert Love Acked-by: David S. Miller Signed-off-by: James Bottomley --- include/linux/if_ether.h | 1 + include/scsi/fc/fc_fip.h | 7 ------- 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index cfe4fe1b713..60e8934d10b 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -79,6 +79,7 @@ #define ETH_P_AOE 0x88A2 /* ATA over Ethernet */ #define ETH_P_TIPC 0x88CA /* TIPC */ #define ETH_P_FCOE 0x8906 /* Fibre Channel over Ethernet */ +#define ETH_P_FIP 0x8914 /* FCoE Initialization Protocol */ #define ETH_P_EDSA 0xDADA /* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */ /* diff --git a/include/scsi/fc/fc_fip.h b/include/scsi/fc/fc_fip.h index 0627a9ae634..3d138c1fcf8 100644 --- a/include/scsi/fc/fc_fip.h +++ b/include/scsi/fc/fc_fip.h @@ -22,13 +22,6 @@ * http://www.t11.org/ftp/t11/pub/fc/bb-5/08-543v1.pdf */ -/* - * The FIP ethertype eventually goes in net/if_ether.h. - */ -#ifndef ETH_P_FIP -#define ETH_P_FIP 0x8914 /* FIP Ethertype */ -#endif - #define FIP_DEF_PRI 128 /* default selection priority */ #define FIP_DEF_FC_MAP 0x0efc00 /* default FCoE MAP (MAC OUI) value */ #define FIP_DEF_FKA 8000 /* default FCF keep-alive/advert period (mS) */ -- cgit v1.2.3-70-g09d2 From c6f4a42de60b981dd210de01cd3e575835e3158e Mon Sep 17 00:00:00 2001 From: Minkyu Kang Date: Fri, 5 Jun 2009 15:33:04 +0900 Subject: Add MAX17040 Fuel Gauge driver The MAX17040 is a I2C interfaced Fuel Gauge systems for lithium-ion batteries This patch adds support the MAX17040 Fuel Gauge Signed-off-by: Minkyu Kang Signed-off-by: Anton Vorontsov --- drivers/power/Kconfig | 8 + drivers/power/Makefile | 3 +- drivers/power/max17040_battery.c | 309 +++++++++++++++++++++++++++++++++++++++ include/linux/max17040_battery.h | 19 +++ 4 files changed, 338 insertions(+), 1 deletion(-) create mode 100644 drivers/power/max17040_battery.c create mode 100644 include/linux/max17040_battery.h (limited to 'include') diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig index 33da1127992..7eda34838bf 100644 --- a/drivers/power/Kconfig +++ b/drivers/power/Kconfig @@ -82,6 +82,14 @@ config BATTERY_DA9030 Say Y here to enable support for batteries charger integrated into DA9030 PMIC. +config BATTERY_MAX17040 + tristate "Maxim MAX17040 Fuel Gauge" + depends on I2C + help + MAX17040 is fuel-gauge systems for lithium-ion (Li+) batteries + in handheld and portable equipment. The MAX17040 is configured + to operate with a single lithium cell + config CHARGER_PCF50633 tristate "NXP PCF50633 MBC" depends on MFD_PCF50633 diff --git a/drivers/power/Makefile b/drivers/power/Makefile index 2fcf41d13e5..daf3179689a 100644 --- a/drivers/power/Makefile +++ b/drivers/power/Makefile @@ -25,4 +25,5 @@ obj-$(CONFIG_BATTERY_TOSA) += tosa_battery.o obj-$(CONFIG_BATTERY_WM97XX) += wm97xx_battery.o obj-$(CONFIG_BATTERY_BQ27x00) += bq27x00_battery.o obj-$(CONFIG_BATTERY_DA9030) += da9030_battery.o -obj-$(CONFIG_CHARGER_PCF50633) += pcf50633-charger.o \ No newline at end of file +obj-$(CONFIG_BATTERY_MAX17040) += max17040_battery.o +obj-$(CONFIG_CHARGER_PCF50633) += pcf50633-charger.o diff --git a/drivers/power/max17040_battery.c b/drivers/power/max17040_battery.c new file mode 100644 index 00000000000..87b98bf27ae --- /dev/null +++ b/drivers/power/max17040_battery.c @@ -0,0 +1,309 @@ +/* + * max17040_battery.c + * fuel-gauge systems for lithium-ion (Li+) batteries + * + * Copyright (C) 2009 Samsung Electronics + * Minkyu Kang + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAX17040_VCELL_MSB 0x02 +#define MAX17040_VCELL_LSB 0x03 +#define MAX17040_SOC_MSB 0x04 +#define MAX17040_SOC_LSB 0x05 +#define MAX17040_MODE_MSB 0x06 +#define MAX17040_MODE_LSB 0x07 +#define MAX17040_VER_MSB 0x08 +#define MAX17040_VER_LSB 0x09 +#define MAX17040_RCOMP_MSB 0x0C +#define MAX17040_RCOMP_LSB 0x0D +#define MAX17040_CMD_MSB 0xFE +#define MAX17040_CMD_LSB 0xFF + +#define MAX17040_DELAY 1000 +#define MAX17040_BATTERY_FULL 95 + +struct max17040_chip { + struct i2c_client *client; + struct delayed_work work; + struct power_supply battery; + struct max17040_platform_data *pdata; + + /* State Of Connect */ + int online; + /* battery voltage */ + int vcell; + /* battery capacity */ + int soc; + /* State Of Charge */ + int status; +}; + +static int max17040_get_property(struct power_supply *psy, + enum power_supply_property psp, + union power_supply_propval *val) +{ + struct max17040_chip *chip = container_of(psy, + struct max17040_chip, battery); + + switch (psp) { + case POWER_SUPPLY_PROP_STATUS: + val->intval = chip->status; + break; + case POWER_SUPPLY_PROP_ONLINE: + val->intval = chip->online; + break; + case POWER_SUPPLY_PROP_VOLTAGE_NOW: + val->intval = chip->vcell; + break; + case POWER_SUPPLY_PROP_CAPACITY: + val->intval = chip->soc; + break; + default: + return -EINVAL; + } + return 0; +} + +static int max17040_write_reg(struct i2c_client *client, int reg, u8 value) +{ + int ret; + + ret = i2c_smbus_write_byte_data(client, reg, value); + + if (ret < 0) + dev_err(&client->dev, "%s: err %d\n", __func__, ret); + + return ret; +} + +static int max17040_read_reg(struct i2c_client *client, int reg) +{ + int ret; + + ret = i2c_smbus_read_byte_data(client, reg); + + if (ret < 0) + dev_err(&client->dev, "%s: err %d\n", __func__, ret); + + return ret; +} + +static void max17040_reset(struct i2c_client *client) +{ + max17040_write_reg(client, MAX17040_CMD_MSB, 0x54); + max17040_write_reg(client, MAX17040_CMD_LSB, 0x00); +} + +static void max17040_get_vcell(struct i2c_client *client) +{ + struct max17040_chip *chip = i2c_get_clientdata(client); + u8 msb; + u8 lsb; + + msb = max17040_read_reg(client, MAX17040_VCELL_MSB); + lsb = max17040_read_reg(client, MAX17040_VCELL_LSB); + + chip->vcell = (msb << 4) + (lsb >> 4); +} + +static void max17040_get_soc(struct i2c_client *client) +{ + struct max17040_chip *chip = i2c_get_clientdata(client); + u8 msb; + u8 lsb; + + msb = max17040_read_reg(client, MAX17040_SOC_MSB); + lsb = max17040_read_reg(client, MAX17040_SOC_LSB); + + chip->soc = msb; +} + +static void max17040_get_version(struct i2c_client *client) +{ + u8 msb; + u8 lsb; + + msb = max17040_read_reg(client, MAX17040_VER_MSB); + lsb = max17040_read_reg(client, MAX17040_VER_LSB); + + dev_info(&client->dev, "MAX17040 Fuel-Gauge Ver %d%d\n", msb, lsb); +} + +static void max17040_get_online(struct i2c_client *client) +{ + struct max17040_chip *chip = i2c_get_clientdata(client); + + if (chip->pdata->battery_online) + chip->online = chip->pdata->battery_online(); + else + chip->online = 1; +} + +static void max17040_get_status(struct i2c_client *client) +{ + struct max17040_chip *chip = i2c_get_clientdata(client); + + if (!chip->pdata->charger_online || !chip->pdata->charger_enable) { + chip->status = POWER_SUPPLY_STATUS_UNKNOWN; + return; + } + + if (chip->pdata->charger_online()) { + if (chip->pdata->charger_enable()) + chip->status = POWER_SUPPLY_STATUS_CHARGING; + else + chip->status = POWER_SUPPLY_STATUS_NOT_CHARGING; + } else { + chip->status = POWER_SUPPLY_STATUS_DISCHARGING; + } + + if (chip->soc > MAX17040_BATTERY_FULL) + chip->status = POWER_SUPPLY_STATUS_FULL; +} + +static void max17040_work(struct work_struct *work) +{ + struct max17040_chip *chip; + + chip = container_of(work, struct max17040_chip, work.work); + + max17040_get_vcell(chip->client); + max17040_get_soc(chip->client); + max17040_get_online(chip->client); + max17040_get_status(chip->client); + + schedule_delayed_work(&chip->work, MAX17040_DELAY); +} + +static enum power_supply_property max17040_battery_props[] = { + POWER_SUPPLY_PROP_STATUS, + POWER_SUPPLY_PROP_ONLINE, + POWER_SUPPLY_PROP_VOLTAGE_NOW, + POWER_SUPPLY_PROP_CAPACITY, +}; + +static int __devinit max17040_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent); + struct max17040_chip *chip; + int ret; + + if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE)) + return -EIO; + + chip = kzalloc(sizeof(*chip), GFP_KERNEL); + if (!chip) + return -ENOMEM; + + chip->client = client; + chip->pdata = client->dev.platform_data; + + i2c_set_clientdata(client, chip); + + chip->battery.name = "battery"; + chip->battery.type = POWER_SUPPLY_TYPE_BATTERY; + chip->battery.get_property = max17040_get_property; + chip->battery.properties = max17040_battery_props; + chip->battery.num_properties = ARRAY_SIZE(max17040_battery_props); + + ret = power_supply_register(&client->dev, &chip->battery); + if (ret) { + dev_err(&client->dev, "failed: power supply register\n"); + i2c_set_clientdata(client, NULL); + kfree(chip); + return ret; + } + + max17040_reset(client); + max17040_get_version(client); + + INIT_DELAYED_WORK_DEFERRABLE(&chip->work, max17040_work); + schedule_delayed_work(&chip->work, MAX17040_DELAY); + + return 0; +} + +static int __devexit max17040_remove(struct i2c_client *client) +{ + struct max17040_chip *chip = i2c_get_clientdata(client); + + power_supply_unregister(&chip->battery); + cancel_delayed_work(&chip->work); + i2c_set_clientdata(client, NULL); + kfree(chip); + return 0; +} + +#ifdef CONFIG_PM + +static int max17040_suspend(struct i2c_client *client, + pm_message_t state) +{ + struct max17040_chip *chip = i2c_get_clientdata(client); + + cancel_delayed_work(&chip->work); + return 0; +} + +static int max17040_resume(struct i2c_client *client) +{ + struct max17040_chip *chip = i2c_get_clientdata(client); + + schedule_delayed_work(&chip->work, MAX17040_DELAY); + return 0; +} + +#else + +#define max17040_suspend NULL +#define max17040_resume NULL + +#endif /* CONFIG_PM */ + +static const struct i2c_device_id max17040_id[] = { + { "max17040", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, max17040_id); + +static struct i2c_driver max17040_i2c_driver = { + .driver = { + .name = "max17040", + }, + .probe = max17040_probe, + .remove = __devexit_p(max17040_remove), + .suspend = max17040_suspend, + .resume = max17040_resume, + .id_table = max17040_id, +}; + +static int __init max17040_init(void) +{ + return i2c_add_driver(&max17040_i2c_driver); +} +module_init(max17040_init); + +static void __exit max17040_exit(void) +{ + i2c_del_driver(&max17040_i2c_driver); +} +module_exit(max17040_exit); + +MODULE_AUTHOR("Minkyu Kang "); +MODULE_DESCRIPTION("MAX17040 Fuel Gauge"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/max17040_battery.h b/include/linux/max17040_battery.h new file mode 100644 index 00000000000..ad97b06cf93 --- /dev/null +++ b/include/linux/max17040_battery.h @@ -0,0 +1,19 @@ +/* + * Copyright (C) 2009 Samsung Electronics + * Minkyu Kang + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __MAX17040_BATTERY_H_ +#define __MAX17040_BATTERY_H_ + +struct max17040_platform_data { + int (*battery_online)(void); + int (*charger_online)(void); + int (*charger_enable)(void); +}; + +#endif -- cgit v1.2.3-70-g09d2 From 1f8a6a10fb9437eac3f516ea4324a19087872f30 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 8 Jun 2009 18:18:39 +0200 Subject: ring-buffer: pass in lockdep class key for reader_lock On Sun, 7 Jun 2009, Ingo Molnar wrote: > Testing tracer sched_switch: <6>Starting ring buffer hammer > PASSED > Testing tracer sysprof: PASSED > Testing tracer function: PASSED > Testing tracer irqsoff: > ============================================= > PASSED > Testing tracer preemptoff: PASSED > Testing tracer preemptirqsoff: [ INFO: possible recursive locking detected ] > PASSED > Testing tracer branch: 2.6.30-rc8-tip-01972-ge5b9078-dirty #5760 > --------------------------------------------- > rb_consumer/431 is trying to acquire lock: > (&cpu_buffer->reader_lock){......}, at: [] ring_buffer_reset_cpu+0x37/0x70 > > but task is already holding lock: > (&cpu_buffer->reader_lock){......}, at: [] ring_buffer_consume+0x7e/0xc0 > > other info that might help us debug this: > 1 lock held by rb_consumer/431: > #0: (&cpu_buffer->reader_lock){......}, at: [] ring_buffer_consume+0x7e/0xc0 The ring buffer is a generic structure, and can be used outside of ftrace. If ftrace traces within the use of the ring buffer, it can produce false positives with lockdep. This patch passes in a static lock key into the allocation of the ring buffer, so that different ring buffers will have their own lock class. Reported-by: Ingo Molnar Signed-off-by: Peter Zijlstra LKML-Reference: <1244477919.13761.9042.camel@twins> [ store key in ring buffer descriptor ] Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 14 +++++++++++++- kernel/trace/ring_buffer.c | 9 +++++++-- 2 files changed, 20 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index f1345828c7c..8670f1575fe 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -105,7 +105,19 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer, * size is in bytes for each per CPU buffer. */ struct ring_buffer * -ring_buffer_alloc(unsigned long size, unsigned flags); +__ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *key); + +/* + * Because the ring buffer is generic, if other users of the ring buffer get + * traced by ftrace, it can produce lockdep warnings. We need to keep each + * ring buffer's lock class separate. + */ +#define ring_buffer_alloc(size, flags) \ +({ \ + static struct lock_class_key __key; \ + __ring_buffer_alloc((size), (flags), &__key); \ +}) + void ring_buffer_free(struct ring_buffer *buffer); int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 7102d7a2fad..22878b0d370 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -426,6 +426,8 @@ struct ring_buffer { atomic_t record_disabled; cpumask_var_t cpumask; + struct lock_class_key *reader_lock_key; + struct mutex mutex; struct ring_buffer_per_cpu **buffers; @@ -565,6 +567,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) cpu_buffer->cpu = cpu; cpu_buffer->buffer = buffer; spin_lock_init(&cpu_buffer->reader_lock); + lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; INIT_LIST_HEAD(&cpu_buffer->pages); @@ -635,7 +638,8 @@ static int rb_cpu_notify(struct notifier_block *self, * when the buffer wraps. If this flag is not set, the buffer will * drop data when the tail hits the head. */ -struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags) +struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, + struct lock_class_key *key) { struct ring_buffer *buffer; int bsize; @@ -658,6 +662,7 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags) buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); buffer->flags = flags; buffer->clock = trace_clock_local; + buffer->reader_lock_key = key; /* need at least two pages */ if (buffer->pages == 1) @@ -715,7 +720,7 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags) kfree(buffer); return NULL; } -EXPORT_SYMBOL_GPL(ring_buffer_alloc); +EXPORT_SYMBOL_GPL(__ring_buffer_alloc); /** * ring_buffer_free - free a ring buffer. -- cgit v1.2.3-70-g09d2 From 05f77f85f47e30a53f7971b687f3b0250e42f665 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 8 Jun 2009 16:16:56 -0700 Subject: bluetooth: Kill skb_frags_no(), unused. Furthermore, it twiddles with the details of SKB list handling directly, which we're trying to eliminate. Signed-off-by: David S. Miller --- include/net/bluetooth/bluetooth.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 3ad5390a4dd..144d1d5dd82 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -171,15 +171,6 @@ static inline struct sk_buff *bt_skb_send_alloc(struct sock *sk, unsigned long l return skb; } -static inline int skb_frags_no(struct sk_buff *skb) -{ - register struct sk_buff *frag = skb_shinfo(skb)->frag_list; - register int n = 1; - - for (; frag; frag=frag->next, n++); - return n; -} - int bt_err(__u16 code); extern int hci_sock_init(void); -- cgit v1.2.3-70-g09d2 From 9df1bb9b516daeece159ab7fb262d01a0359247c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 9 Jun 2009 06:22:57 +0200 Subject: Revert "block: Fix bounce limit setting in DM" This reverts commit a05c0205ba031c01bba33a21bf0a35920eb64833. DM doesn't need to access the bounce_pfn directly. Signed-off-by: Jens Axboe --- block/blk-settings.c | 17 ----------------- drivers/md/dm-table.c | 2 +- include/linux/blkdev.h | 1 - 3 files changed, 1 insertion(+), 19 deletions(-) (limited to 'include') diff --git a/block/blk-settings.c b/block/blk-settings.c index 9acd0b7e802..8d339349289 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -193,23 +193,6 @@ void blk_queue_bounce_limit(struct request_queue *q, u64 dma_mask) } EXPORT_SYMBOL(blk_queue_bounce_limit); -/** - * blk_queue_bounce_pfn - set the bounce buffer limit for queue - * @q: the request queue for the device - * @pfn: max address - * - * Description: - * This function is similar to blk_queue_bounce_limit except it - * neither changes allocation flags, nor does it set up the ISA DMA - * pool. This function should only be used by stacking drivers. - * Hardware drivers should use blk_queue_bounce_limit instead. - */ -void blk_queue_bounce_pfn(struct request_queue *q, u64 pfn) -{ - q->limits.bounce_pfn = pfn; -} -EXPORT_SYMBOL(blk_queue_bounce_pfn); - /** * blk_queue_max_sectors - set max sectors for a request for this queue * @q: the request queue for the device diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 3ca1604ddd5..e9a73bb242b 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -920,7 +920,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q) blk_queue_max_segment_size(q, t->limits.max_segment_size); blk_queue_max_hw_sectors(q, t->limits.max_hw_sectors); blk_queue_segment_boundary(q, t->limits.seg_boundary_mask); - blk_queue_bounce_pfn(q, t->limits.bounce_pfn); + blk_queue_bounce_limit(q, t->limits.bounce_pfn); if (t->limits.no_cluster) queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 989aa1790f4..5e740a135e7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -910,7 +910,6 @@ extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *); extern void blk_cleanup_queue(struct request_queue *); extern void blk_queue_make_request(struct request_queue *, make_request_fn *); extern void blk_queue_bounce_limit(struct request_queue *, u64); -extern void blk_queue_bounce_pfn(struct request_queue *, u64); extern void blk_queue_max_sectors(struct request_queue *, unsigned int); extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short); -- cgit v1.2.3-70-g09d2 From ee0398717078260ee4ffa97d407071bc774e2dac Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 9 Jun 2009 00:17:13 -0700 Subject: skbuff: Add frag list abstraction interfaces. With the hope that these can be used to eliminate direct references to the frag list implementation. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index aad484cd586..f1c93b878b3 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1077,7 +1077,7 @@ extern void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, int size); #define SKB_PAGE_ASSERT(skb) BUG_ON(skb_shinfo(skb)->nr_frags) -#define SKB_FRAG_ASSERT(skb) BUG_ON(skb_shinfo(skb)->frag_list) +#define SKB_FRAG_ASSERT(skb) BUG_ON(skb_has_frags(skb)) #define SKB_LINEAR_ASSERT(skb) BUG_ON(skb_is_nonlinear(skb)) #ifdef NET_SKBUFF_DATA_USES_OFFSET @@ -1716,6 +1716,25 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) skb = skb->prev) +static inline bool skb_has_frags(const struct sk_buff *skb) +{ + return skb_shinfo(skb)->frag_list != NULL; +} + +static inline void skb_frag_list_init(struct sk_buff *skb) +{ + skb_shinfo(skb)->frag_list = NULL; +} + +static inline void skb_frag_add_head(struct sk_buff *skb, struct sk_buff *frag) +{ + frag->next = skb_shinfo(skb)->frag_list; + skb_shinfo(skb)->frag_list = frag; +} + +#define skb_walk_frags(skb, iter) \ + for (iter = skb_shinfo(skb)->frag_list; iter; iter = iter->next) + extern struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, int *peeked, int *err); extern struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, -- cgit v1.2.3-70-g09d2 From a5bd8a13e9e0322c7f76b34790ba34e2e0ce2ac5 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 9 Jun 2009 00:17:27 -0700 Subject: netdevice.h: Use frag list abstraction interfaces. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2a801380b50..9ea8d6dfe54 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1904,7 +1904,7 @@ static inline int net_gso_ok(int features, int gso_type) static inline int skb_gso_ok(struct sk_buff *skb, int features) { return net_gso_ok(features, skb_shinfo(skb)->gso_type) && - (!skb_shinfo(skb)->frag_list || (features & NETIF_F_FRAGLIST)); + (!skb_has_frags(skb) || (features & NETIF_F_FRAGLIST)); } static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb) -- cgit v1.2.3-70-g09d2 From 151060ac13144208bd7601d17e4c92c59b98072f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 14 Apr 2009 10:54:54 +0900 Subject: CUSE: implement CUSE - Character device in Userspace CUSE enables implementing character devices in userspace. With recent additions of ioctl and poll support, FUSE already has most of what's necessary to implement character devices. All CUSE has to do is bonding all those components - FUSE, chardev and the driver model - nicely. When client opens /dev/cuse, kernel starts conversation with CUSE_INIT. The client tells CUSE which device it wants to create. As the previous patch made fuse_file usable without associated fuse_inode, CUSE doesn't create super block or inodes. It attaches fuse_file to cdev file->private_data during open and set ff->fi to NULL. The rest of the operation is almost identical to FUSE direct IO case. Each CUSE device has a corresponding directory /sys/class/cuse/DEVNAME (which is symlink to /sys/devices/virtual/class/DEVNAME if SYSFS_DEPRECATED is turned off) which hosts "waiting" and "abort" among other things. Those two files have the same meaning as the FUSE control files. The only notable lacking feature compared to in-kernel implementation is mmap support. Signed-off-by: Tejun Heo Signed-off-by: Miklos Szeredi --- fs/Kconfig | 10 + fs/fuse/Makefile | 1 + fs/fuse/cuse.c | 610 +++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/fuse.h | 31 +++ 4 files changed, 652 insertions(+) create mode 100644 fs/fuse/cuse.c (limited to 'include') diff --git a/fs/Kconfig b/fs/Kconfig index 9f7270f36b2..525da2e8f73 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -62,6 +62,16 @@ source "fs/autofs/Kconfig" source "fs/autofs4/Kconfig" source "fs/fuse/Kconfig" +config CUSE + tristate "Character device in Userpace support" + depends on FUSE_FS + help + This FUSE extension allows character devices to be + implemented in userspace. + + If you want to develop or use userspace character device + based on CUSE, answer Y or M. + config GENERIC_ACL bool select FS_POSIX_ACL diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile index 72437065f6a..e95eeb445e5 100644 --- a/fs/fuse/Makefile +++ b/fs/fuse/Makefile @@ -3,5 +3,6 @@ # obj-$(CONFIG_FUSE_FS) += fuse.o +obj-$(CONFIG_CUSE) += cuse.o fuse-objs := dev.o dir.o file.o inode.o control.o diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c new file mode 100644 index 00000000000..de792dcf327 --- /dev/null +++ b/fs/fuse/cuse.c @@ -0,0 +1,610 @@ +/* + * CUSE: Character device in Userspace + * + * Copyright (C) 2008-2009 SUSE Linux Products GmbH + * Copyright (C) 2008-2009 Tejun Heo + * + * This file is released under the GPLv2. + * + * CUSE enables character devices to be implemented from userland much + * like FUSE allows filesystems. On initialization /dev/cuse is + * created. By opening the file and replying to the CUSE_INIT request + * userland CUSE server can create a character device. After that the + * operation is very similar to FUSE. + * + * A CUSE instance involves the following objects. + * + * cuse_conn : contains fuse_conn and serves as bonding structure + * channel : file handle connected to the userland CUSE server + * cdev : the implemented character device + * dev : generic device for cdev + * + * Note that 'channel' is what 'dev' is in FUSE. As CUSE deals with + * devices, it's called 'channel' to reduce confusion. + * + * channel determines when the character device dies. When channel is + * closed, everything begins to destruct. The cuse_conn is taken off + * the lookup table preventing further access from cdev, cdev and + * generic device are removed and the base reference of cuse_conn is + * put. + * + * On each open, the matching cuse_conn is looked up and if found an + * additional reference is taken which is released when the file is + * closed. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fuse_i.h" + +#define CUSE_CONNTBL_LEN 64 + +struct cuse_conn { + struct list_head list; /* linked on cuse_conntbl */ + struct fuse_conn fc; /* fuse connection */ + struct cdev *cdev; /* associated character device */ + struct device *dev; /* device representing @cdev */ + + /* init parameters, set once during initialization */ + bool unrestricted_ioctl; +}; + +static DEFINE_SPINLOCK(cuse_lock); /* protects cuse_conntbl */ +static struct list_head cuse_conntbl[CUSE_CONNTBL_LEN]; +static struct class *cuse_class; + +static struct cuse_conn *fc_to_cc(struct fuse_conn *fc) +{ + return container_of(fc, struct cuse_conn, fc); +} + +static struct list_head *cuse_conntbl_head(dev_t devt) +{ + return &cuse_conntbl[(MAJOR(devt) + MINOR(devt)) % CUSE_CONNTBL_LEN]; +} + + +/************************************************************************** + * CUSE frontend operations + * + * These are file operations for the character device. + * + * On open, CUSE opens a file from the FUSE mnt and stores it to + * private_data of the open file. All other ops call FUSE ops on the + * FUSE file. + */ + +static ssize_t cuse_read(struct file *file, char __user *buf, size_t count, + loff_t *ppos) +{ + loff_t pos = 0; + + return fuse_direct_io(file, buf, count, &pos, 0); +} + +static ssize_t cuse_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + loff_t pos = 0; + /* + * No locking or generic_write_checks(), the server is + * responsible for locking and sanity checks. + */ + return fuse_direct_io(file, buf, count, &pos, 1); +} + +static int cuse_open(struct inode *inode, struct file *file) +{ + dev_t devt = inode->i_cdev->dev; + struct cuse_conn *cc = NULL, *pos; + int rc; + + /* look up and get the connection */ + spin_lock(&cuse_lock); + list_for_each_entry(pos, cuse_conntbl_head(devt), list) + if (pos->dev->devt == devt) { + fuse_conn_get(&pos->fc); + cc = pos; + break; + } + spin_unlock(&cuse_lock); + + /* dead? */ + if (!cc) + return -ENODEV; + + /* + * Generic permission check is already done against the chrdev + * file, proceed to open. + */ + rc = fuse_do_open(&cc->fc, 0, file, 0); + if (rc) + fuse_conn_put(&cc->fc); + return rc; +} + +static int cuse_release(struct inode *inode, struct file *file) +{ + struct fuse_file *ff = file->private_data; + struct fuse_conn *fc = ff->fc; + + fuse_sync_release(ff, file->f_flags); + fuse_conn_put(fc); + + return 0; +} + +static long cuse_file_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct fuse_file *ff = file->private_data; + struct cuse_conn *cc = fc_to_cc(ff->fc); + unsigned int flags = 0; + + if (cc->unrestricted_ioctl) + flags |= FUSE_IOCTL_UNRESTRICTED; + + return fuse_do_ioctl(file, cmd, arg, flags); +} + +static long cuse_file_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct fuse_file *ff = file->private_data; + struct cuse_conn *cc = fc_to_cc(ff->fc); + unsigned int flags = FUSE_IOCTL_COMPAT; + + if (cc->unrestricted_ioctl) + flags |= FUSE_IOCTL_UNRESTRICTED; + + return fuse_do_ioctl(file, cmd, arg, flags); +} + +static const struct file_operations cuse_frontend_fops = { + .owner = THIS_MODULE, + .read = cuse_read, + .write = cuse_write, + .open = cuse_open, + .release = cuse_release, + .unlocked_ioctl = cuse_file_ioctl, + .compat_ioctl = cuse_file_compat_ioctl, + .poll = fuse_file_poll, +}; + + +/************************************************************************** + * CUSE channel initialization and destruction + */ + +struct cuse_devinfo { + const char *name; +}; + +/** + * cuse_parse_one - parse one key=value pair + * @pp: i/o parameter for the current position + * @end: points to one past the end of the packed string + * @keyp: out parameter for key + * @valp: out parameter for value + * + * *@pp points to packed strings - "key0=val0\0key1=val1\0" which ends + * at @end - 1. This function parses one pair and set *@keyp to the + * start of the key and *@valp to the start of the value. Note that + * the original string is modified such that the key string is + * terminated with '\0'. *@pp is updated to point to the next string. + * + * RETURNS: + * 1 on successful parse, 0 on EOF, -errno on failure. + */ +static int cuse_parse_one(char **pp, char *end, char **keyp, char **valp) +{ + char *p = *pp; + char *key, *val; + + while (p < end && *p == '\0') + p++; + if (p == end) + return 0; + + if (end[-1] != '\0') { + printk(KERN_ERR "CUSE: info not properly terminated\n"); + return -EINVAL; + } + + key = val = p; + p += strlen(p); + + if (valp) { + strsep(&val, "="); + if (!val) + val = key + strlen(key); + key = strstrip(key); + val = strstrip(val); + } else + key = strstrip(key); + + if (!strlen(key)) { + printk(KERN_ERR "CUSE: zero length info key specified\n"); + return -EINVAL; + } + + *pp = p; + *keyp = key; + if (valp) + *valp = val; + + return 1; +} + +/** + * cuse_parse_dev_info - parse device info + * @p: device info string + * @len: length of device info string + * @devinfo: out parameter for parsed device info + * + * Parse @p to extract device info and store it into @devinfo. String + * pointed to by @p is modified by parsing and @devinfo points into + * them, so @p shouldn't be freed while @devinfo is in use. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +static int cuse_parse_devinfo(char *p, size_t len, struct cuse_devinfo *devinfo) +{ + char *end = p + len; + char *key, *val; + int rc; + + while (true) { + rc = cuse_parse_one(&p, end, &key, &val); + if (rc < 0) + return rc; + if (!rc) + break; + if (strcmp(key, "DEVNAME") == 0) + devinfo->name = val; + else + printk(KERN_WARNING "CUSE: unknown device info \"%s\"\n", + key); + } + + if (!devinfo->name || !strlen(devinfo->name)) { + printk(KERN_ERR "CUSE: DEVNAME unspecified\n"); + return -EINVAL; + } + + return 0; +} + +static void cuse_gendev_release(struct device *dev) +{ + kfree(dev); +} + +/** + * cuse_process_init_reply - finish initializing CUSE channel + * + * This function creates the character device and sets up all the + * required data structures for it. Please read the comment at the + * top of this file for high level overview. + */ +static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req) +{ + struct cuse_conn *cc = fc_to_cc(fc); + struct cuse_init_out *arg = &req->misc.cuse_init_out; + struct page *page = req->pages[0]; + struct cuse_devinfo devinfo = { }; + struct device *dev; + struct cdev *cdev; + dev_t devt; + int rc; + + if (req->out.h.error || + arg->major != FUSE_KERNEL_VERSION || arg->minor < 11) { + goto err; + } + + fc->minor = arg->minor; + fc->max_read = max_t(unsigned, arg->max_read, 4096); + fc->max_write = max_t(unsigned, arg->max_write, 4096); + + /* parse init reply */ + cc->unrestricted_ioctl = arg->flags & CUSE_UNRESTRICTED_IOCTL; + + rc = cuse_parse_devinfo(page_address(page), req->out.args[1].size, + &devinfo); + if (rc) + goto err; + + /* determine and reserve devt */ + devt = MKDEV(arg->dev_major, arg->dev_minor); + if (!MAJOR(devt)) + rc = alloc_chrdev_region(&devt, MINOR(devt), 1, devinfo.name); + else + rc = register_chrdev_region(devt, 1, devinfo.name); + if (rc) { + printk(KERN_ERR "CUSE: failed to register chrdev region\n"); + goto err; + } + + /* devt determined, create device */ + rc = -ENOMEM; + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + goto err_region; + + device_initialize(dev); + dev_set_uevent_suppress(dev, 1); + dev->class = cuse_class; + dev->devt = devt; + dev->release = cuse_gendev_release; + dev_set_drvdata(dev, cc); + dev_set_name(dev, "%s", devinfo.name); + + rc = device_add(dev); + if (rc) + goto err_device; + + /* register cdev */ + rc = -ENOMEM; + cdev = cdev_alloc(); + if (!cdev) + goto err_device; + + cdev->owner = THIS_MODULE; + cdev->ops = &cuse_frontend_fops; + + rc = cdev_add(cdev, devt, 1); + if (rc) + goto err_cdev; + + cc->dev = dev; + cc->cdev = cdev; + + /* make the device available */ + spin_lock(&cuse_lock); + list_add(&cc->list, cuse_conntbl_head(devt)); + spin_unlock(&cuse_lock); + + /* announce device availability */ + dev_set_uevent_suppress(dev, 0); + kobject_uevent(&dev->kobj, KOBJ_ADD); +out: + __free_page(page); + return; + +err_cdev: + cdev_del(cdev); +err_device: + put_device(dev); +err_region: + unregister_chrdev_region(devt, 1); +err: + fc->conn_error = 1; + goto out; +} + +static int cuse_send_init(struct cuse_conn *cc) +{ + int rc; + struct fuse_req *req; + struct page *page; + struct fuse_conn *fc = &cc->fc; + struct cuse_init_in *arg; + + BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE); + + req = fuse_get_req(fc); + if (IS_ERR(req)) { + rc = PTR_ERR(req); + goto err; + } + + rc = -ENOMEM; + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) + goto err_put_req; + + arg = &req->misc.cuse_init_in; + arg->major = FUSE_KERNEL_VERSION; + arg->minor = FUSE_KERNEL_MINOR_VERSION; + arg->flags |= CUSE_UNRESTRICTED_IOCTL; + req->in.h.opcode = CUSE_INIT; + req->in.numargs = 1; + req->in.args[0].size = sizeof(struct cuse_init_in); + req->in.args[0].value = arg; + req->out.numargs = 2; + req->out.args[0].size = sizeof(struct cuse_init_out); + req->out.args[0].value = &req->misc.cuse_init_out; + req->out.args[1].size = CUSE_INIT_INFO_MAX; + req->out.argvar = 1; + req->out.argpages = 1; + req->pages[0] = page; + req->num_pages = 1; + req->end = cuse_process_init_reply; + fuse_request_send_background(fc, req); + + return 0; + +err_put_req: + fuse_put_request(fc, req); +err: + return rc; +} + +static void cuse_fc_release(struct fuse_conn *fc) +{ + struct cuse_conn *cc = fc_to_cc(fc); + kfree(cc); +} + +/** + * cuse_channel_open - open method for /dev/cuse + * @inode: inode for /dev/cuse + * @file: file struct being opened + * + * Userland CUSE server can create a CUSE device by opening /dev/cuse + * and replying to the initilaization request kernel sends. This + * function is responsible for handling CUSE device initialization. + * Because the fd opened by this function is used during + * initialization, this function only creates cuse_conn and sends + * init. The rest is delegated to a kthread. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +static int cuse_channel_open(struct inode *inode, struct file *file) +{ + struct cuse_conn *cc; + int rc; + + /* set up cuse_conn */ + cc = kzalloc(sizeof(*cc), GFP_KERNEL); + if (!cc) + return -ENOMEM; + + fuse_conn_init(&cc->fc); + + INIT_LIST_HEAD(&cc->list); + cc->fc.release = cuse_fc_release; + + cc->fc.connected = 1; + cc->fc.blocked = 0; + rc = cuse_send_init(cc); + if (rc) { + fuse_conn_put(&cc->fc); + return rc; + } + file->private_data = &cc->fc; /* channel owns base reference to cc */ + + return 0; +} + +/** + * cuse_channel_release - release method for /dev/cuse + * @inode: inode for /dev/cuse + * @file: file struct being closed + * + * Disconnect the channel, deregister CUSE device and initiate + * destruction by putting the default reference. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +static int cuse_channel_release(struct inode *inode, struct file *file) +{ + struct cuse_conn *cc = fc_to_cc(file->private_data); + int rc; + + /* remove from the conntbl, no more access from this point on */ + spin_lock(&cuse_lock); + list_del_init(&cc->list); + spin_unlock(&cuse_lock); + + /* remove device */ + if (cc->dev) + device_unregister(cc->dev); + if (cc->cdev) { + unregister_chrdev_region(cc->cdev->dev, 1); + cdev_del(cc->cdev); + } + + /* kill connection and shutdown channel */ + fuse_conn_kill(&cc->fc); + rc = fuse_dev_release(inode, file); /* puts the base reference */ + + return rc; +} + +static struct file_operations cuse_channel_fops; /* initialized during init */ + + +/************************************************************************** + * Misc stuff and module initializatiion + * + * CUSE exports the same set of attributes to sysfs as fusectl. + */ + +static ssize_t cuse_class_waiting_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cuse_conn *cc = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", atomic_read(&cc->fc.num_waiting)); +} + +static ssize_t cuse_class_abort_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cuse_conn *cc = dev_get_drvdata(dev); + + fuse_abort_conn(&cc->fc); + return count; +} + +static struct device_attribute cuse_class_dev_attrs[] = { + __ATTR(waiting, S_IFREG | 0400, cuse_class_waiting_show, NULL), + __ATTR(abort, S_IFREG | 0200, NULL, cuse_class_abort_store), + { } +}; + +static struct miscdevice cuse_miscdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "cuse", + .fops = &cuse_channel_fops, +}; + +static int __init cuse_init(void) +{ + int i, rc; + + /* init conntbl */ + for (i = 0; i < CUSE_CONNTBL_LEN; i++) + INIT_LIST_HEAD(&cuse_conntbl[i]); + + /* inherit and extend fuse_dev_operations */ + cuse_channel_fops = fuse_dev_operations; + cuse_channel_fops.owner = THIS_MODULE; + cuse_channel_fops.open = cuse_channel_open; + cuse_channel_fops.release = cuse_channel_release; + + cuse_class = class_create(THIS_MODULE, "cuse"); + if (IS_ERR(cuse_class)) + return PTR_ERR(cuse_class); + + cuse_class->dev_attrs = cuse_class_dev_attrs; + + rc = misc_register(&cuse_miscdev); + if (rc) { + class_destroy(cuse_class); + return rc; + } + + return 0; +} + +static void __exit cuse_exit(void) +{ + misc_deregister(&cuse_miscdev); + class_destroy(cuse_class); +} + +module_init(cuse_init); +module_exit(cuse_exit); + +MODULE_AUTHOR("Tejun Heo "); +MODULE_DESCRIPTION("Character device in Userspace"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/fuse.h b/include/linux/fuse.h index 162e5defe68..d41ed593f79 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -120,6 +120,13 @@ struct fuse_file_lock { #define FUSE_EXPORT_SUPPORT (1 << 4) #define FUSE_BIG_WRITES (1 << 5) +/** + * CUSE INIT request/reply flags + * + * CUSE_UNRESTRICTED_IOCTL: use unrestricted ioctl + */ +#define CUSE_UNRESTRICTED_IOCTL (1 << 0) + /** * Release flags */ @@ -210,6 +217,9 @@ enum fuse_opcode { FUSE_DESTROY = 38, FUSE_IOCTL = 39, FUSE_POLL = 40, + + /* CUSE specific operations */ + CUSE_INIT = 4096, }; enum fuse_notify_code { @@ -401,6 +411,27 @@ struct fuse_init_out { __u32 max_write; }; +#define CUSE_INIT_INFO_MAX 4096 + +struct cuse_init_in { + __u32 major; + __u32 minor; + __u32 unused; + __u32 flags; +}; + +struct cuse_init_out { + __u32 major; + __u32 minor; + __u32 unused; + __u32 flags; + __u32 max_read; + __u32 max_write; + __u32 dev_major; /* chardev major */ + __u32 dev_minor; /* chardev minor */ + __u32 spare[10]; +}; + struct fuse_interrupt_in { __u64 unique; }; -- cgit v1.2.3-70-g09d2 From 728bf09827d350cdaa1f093170e745e8dac49b7a Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Mon, 8 Jun 2009 22:05:00 +0000 Subject: pkt_sched: Use PSCHED_SHIFT in PSCHED time conversion Use PSCHED_SHIFT constant instead of '10' in PSCHED_US2NS() and PSCHED_NS2US() macros to enable changing this value later. Additionally use PSCHED_SHIFT in sch_hfsc SM_SHIFT and ISM_SHIFT definitions. This part of the patch is based on feedback from Patrick McHardy . Reported-by: Antonio Almeida Tested-by: Antonio Almeida Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 5 +++-- net/sched/sch_hfsc.c | 8 +++++--- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index e37fe3129c1..cd0e026d4f6 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -42,8 +42,9 @@ typedef u64 psched_time_t; typedef long psched_tdiff_t; /* Avoid doing 64 bit divide by 1000 */ -#define PSCHED_US2NS(x) ((s64)(x) << 10) -#define PSCHED_NS2US(x) ((x) >> 10) +#define PSCHED_SHIFT 10 +#define PSCHED_US2NS(x) ((s64)(x) << PSCHED_SHIFT) +#define PSCHED_NS2US(x) ((x) >> PSCHED_SHIFT) #define PSCHED_TICKS_PER_SEC PSCHED_NS2US(NSEC_PER_SEC) #define PSCHED_PASTPERFECT 0 diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 5022f9c1f34..362c2811b2d 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -372,7 +372,7 @@ cftree_update(struct hfsc_class *cl) * ism: (psched_us/byte) << ISM_SHIFT * dx: psched_us * - * The clock source resolution with ktime is 1.024us. + * The clock source resolution with ktime and PSCHED_SHIFT 10 is 1.024us. * * sm and ism are scaled in order to keep effective digits. * SM_SHIFT and ISM_SHIFT are selected to keep at least 4 effective @@ -383,9 +383,11 @@ cftree_update(struct hfsc_class *cl) * bytes/1.024us 12.8e-3 128e-3 1280e-3 12800e-3 128000e-3 * * 1.024us/byte 78.125 7.8125 0.78125 0.078125 0.0078125 + * + * So, for PSCHED_SHIFT 10 we need: SM_SHIFT 20, ISM_SHIFT 18. */ -#define SM_SHIFT 20 -#define ISM_SHIFT 18 +#define SM_SHIFT (30 - PSCHED_SHIFT) +#define ISM_SHIFT (8 + PSCHED_SHIFT) #define SM_MASK ((1ULL << SM_SHIFT) - 1) #define ISM_MASK ((1ULL << ISM_SHIFT) - 1) -- cgit v1.2.3-70-g09d2 From a4a710c4a7490587406462bf1d54504b7783d7d7 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Mon, 8 Jun 2009 22:05:13 +0000 Subject: pkt_sched: Change PSCHED_SHIFT from 10 to 6 Change PSCHED_SHIFT from 10 to 6 to increase schedulers time resolution. This will increase 16x a number of (internal) ticks per nanosecond, and is needed to improve accuracy of schedulers based on rate tables, like HTB, TBF or CBQ, with rates above 100Mbit. It is assumed this change is safe for 32bit accounting of time diffs up to 2 minutes, which should be enough for common use (extremely low rate values may overflow, so get inaccurate instead). To make full use of this change an updated iproute2 will be needed. (But using older iproute2 should be safe too.) This change breaks ticks - microseconds similarity, so some minor code fixes might be needed. It is also planned to change naming adequately eg. to PSCHED_TICKS2NS() etc. in the near future. Reported-by: Antonio Almeida Tested-by: Antonio Almeida Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index cd0e026d4f6..120935b2abd 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -41,8 +41,8 @@ static inline void *qdisc_priv(struct Qdisc *q) typedef u64 psched_time_t; typedef long psched_tdiff_t; -/* Avoid doing 64 bit divide by 1000 */ -#define PSCHED_SHIFT 10 +/* Avoid doing 64 bit divide */ +#define PSCHED_SHIFT 6 #define PSCHED_US2NS(x) ((s64)(x) << PSCHED_SHIFT) #define PSCHED_NS2US(x) ((x) >> PSCHED_SHIFT) -- cgit v1.2.3-70-g09d2 From fcb94e422479da52ed90bab230c59617a0462416 Mon Sep 17 00:00:00 2001 From: Sergey Lapin Date: Mon, 8 Jun 2009 12:18:47 +0000 Subject: Add constants for the ieee 802.15.4 stack IEEE 802.15.4 stack requires several constants to be defined/adjusted. Signed-off-by: Dmitry Eremin-Solenikov Signed-off-by: Sergey Lapin Signed-off-by: David S. Miller --- include/linux/if_arp.h | 2 ++ include/linux/if_ether.h | 1 + include/linux/socket.h | 4 +++- net/core/dev.c | 6 ++++-- net/core/sock.c | 3 +++ 5 files changed, 13 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index 5ff89809a58..b554300ef8b 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -86,6 +86,8 @@ #define ARPHRD_IEEE80211 801 /* IEEE 802.11 */ #define ARPHRD_IEEE80211_PRISM 802 /* IEEE 802.11 + Prism2 header */ #define ARPHRD_IEEE80211_RADIOTAP 803 /* IEEE 802.11 + radiotap header */ +#define ARPHRD_IEEE802154 804 +#define ARPHRD_IEEE802154_PHY 805 #define ARPHRD_PHONET 820 /* PhoNet media type */ #define ARPHRD_PHONET_PIPE 821 /* PhoNet pipe header */ diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index cfe4fe1b713..11a60e4f0a6 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -106,6 +106,7 @@ #define ETH_P_DSA 0x001B /* Distributed Switch Arch. */ #define ETH_P_TRAILER 0x001C /* Trailer switch tagging */ #define ETH_P_PHONET 0x00F5 /* Nokia Phonet frames */ +#define ETH_P_IEEE802154 0x00F6 /* IEEE802.15.4 frame */ /* * This is an Ethernet frame header. diff --git a/include/linux/socket.h b/include/linux/socket.h index d2310cb45d2..3b461dffe24 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -194,7 +194,8 @@ struct ucred { #define AF_RXRPC 33 /* RxRPC sockets */ #define AF_ISDN 34 /* mISDN sockets */ #define AF_PHONET 35 /* Phonet sockets */ -#define AF_MAX 36 /* For now.. */ +#define AF_IEEE802154 36 /* IEEE802154 sockets */ +#define AF_MAX 37 /* For now.. */ /* Protocol families, same as address families. */ #define PF_UNSPEC AF_UNSPEC @@ -233,6 +234,7 @@ struct ucred { #define PF_RXRPC AF_RXRPC #define PF_ISDN AF_ISDN #define PF_PHONET AF_PHONET +#define PF_IEEE802154 AF_IEEE802154 #define PF_MAX AF_MAX /* Maximum queue length specifiable by listen. */ diff --git a/net/core/dev.c b/net/core/dev.c index 81b392ef511..11560e3258b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -269,7 +269,8 @@ static const unsigned short netdev_lock_type[] = ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, - ARPHRD_PHONET_PIPE, ARPHRD_VOID, ARPHRD_NONE}; + ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154, ARPHRD_IEEE802154_PHY, + ARPHRD_VOID, ARPHRD_NONE}; static const char *netdev_lock_name[] = {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", @@ -286,7 +287,8 @@ static const char *netdev_lock_name[] = "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", - "_xmit_PHONET_PIPE", "_xmit_VOID", "_xmit_NONE"}; + "_xmit_PHONET_PIPE", "_xmit_IEEE802154", "_xmit_IEEE802154_PHY", + "_xmit_VOID", "_xmit_NONE"}; static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)]; diff --git a/net/core/sock.c b/net/core/sock.c index 58dec9dff99..04e35eb2e73 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -155,6 +155,7 @@ static const char *af_family_key_strings[AF_MAX+1] = { "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" , "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , + "sk_lock-AF_IEEE802154", "sk_lock-AF_MAX" }; static const char *af_family_slock_key_strings[AF_MAX+1] = { @@ -170,6 +171,7 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = { "slock-27" , "slock-28" , "slock-AF_CAN" , "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , + "slock-AF_IEEE802154", "slock-AF_MAX" }; static const char *af_family_clock_key_strings[AF_MAX+1] = { @@ -185,6 +187,7 @@ static const char *af_family_clock_key_strings[AF_MAX+1] = { "clock-27" , "clock-28" , "clock-AF_CAN" , "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , + "clock-AF_IEEE802154", "clock-AF_MAX" }; -- cgit v1.2.3-70-g09d2 From 9ec7671603573ede31207eb5b0b3e1aa211b2854 Mon Sep 17 00:00:00 2001 From: Sergey Lapin Date: Mon, 8 Jun 2009 12:18:48 +0000 Subject: net: add IEEE 802.15.4 socket family implementation Add support for communication over IEEE 802.15.4 networks. This implementation is neither certified nor complete, but aims to that goal. This commit contains only the socket interface for communication over IEEE 802.15.4 networks. One can either send RAW datagrams or use SOCK_DGRAM to encapsulate data inside normal IEEE 802.15.4 packets. Configuration interface, drivers and software MAC 802.15.4 implementation will follow. Initial implementation was done by Maxim Gorbachyov, Maxim Osipov and Pavel Smolensky as a research project at Siemens AG. Later the stack was heavily reworked to better suit the linux networking model, and is now maitained as an open project partially sponsored by Siemens. Signed-off-by: Dmitry Eremin-Solenikov Signed-off-by: Sergey Lapin Signed-off-by: David S. Miller --- include/net/ieee802154/af_ieee802154.h | 60 +++++ include/net/ieee802154/mac_def.h | 160 +++++++++++++ include/net/ieee802154/netdevice.h | 115 ++++++++++ net/Kconfig | 1 + net/Makefile | 1 + net/ieee802154/Kconfig | 12 + net/ieee802154/Makefile | 4 + net/ieee802154/af802154.h | 36 +++ net/ieee802154/af_ieee802154.c | 372 +++++++++++++++++++++++++++++++ net/ieee802154/dgram.c | 394 +++++++++++++++++++++++++++++++++ net/ieee802154/raw.c | 254 +++++++++++++++++++++ 11 files changed, 1409 insertions(+) create mode 100644 include/net/ieee802154/af_ieee802154.h create mode 100644 include/net/ieee802154/mac_def.h create mode 100644 include/net/ieee802154/netdevice.h create mode 100644 net/ieee802154/Kconfig create mode 100644 net/ieee802154/Makefile create mode 100644 net/ieee802154/af802154.h create mode 100644 net/ieee802154/af_ieee802154.c create mode 100644 net/ieee802154/dgram.c create mode 100644 net/ieee802154/raw.c (limited to 'include') diff --git a/include/net/ieee802154/af_ieee802154.h b/include/net/ieee802154/af_ieee802154.h new file mode 100644 index 00000000000..0d78605fb1a --- /dev/null +++ b/include/net/ieee802154/af_ieee802154.h @@ -0,0 +1,60 @@ +/* + * IEEE 802.15.4 inteface for userspace + * + * Copyright 2007, 2008 Siemens AG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Written by: + * Sergey Lapin + * Dmitry Eremin-Solenikov + */ + +#ifndef _AF_IEEE802154_H +#define _AF_IEEE802154_H + +#include /* for sa_family_t */ + +enum { + IEEE802154_ADDR_NONE = 0x0, + /* RESERVED = 0x01, */ + IEEE802154_ADDR_SHORT = 0x2, /* 16-bit address + PANid */ + IEEE802154_ADDR_LONG = 0x3, /* 64-bit address + PANid */ +}; + +/* address length, octets */ +#define IEEE802154_ADDR_LEN 8 + +struct ieee802154_addr { + int addr_type; + u16 pan_id; + union { + u8 hwaddr[IEEE802154_ADDR_LEN]; + u16 short_addr; + }; +}; + +#define IEEE802154_PANID_BROADCAST 0xffff +#define IEEE802154_ADDR_BROADCAST 0xffff +#define IEEE802154_ADDR_UNDEF 0xfffe + +struct sockaddr_ieee802154 { + sa_family_t family; /* AF_IEEE802154 */ + struct ieee802154_addr addr; +}; + +/* master device */ +#define IEEE802154_SIOC_ADD_SLAVE (SIOCDEVPRIVATE + 0) + +#endif diff --git a/include/net/ieee802154/mac_def.h b/include/net/ieee802154/mac_def.h new file mode 100644 index 00000000000..8cb68463565 --- /dev/null +++ b/include/net/ieee802154/mac_def.h @@ -0,0 +1,160 @@ +/* + * IEEE802.15.4-2003 specification + * + * Copyright (C) 2007, 2008 Siemens AG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Written by: + * Pavel Smolenskiy + * Maxim Gorbachyov + * Maxim Osipov + * Dmitry Eremin-Solenikov + */ + +#ifndef IEEE802154_MAC_DEF_H +#define IEEE802154_MAC_DEF_H + +#define IEEE802154_FC_TYPE_BEACON 0x0 /* Frame is beacon */ +#define IEEE802154_FC_TYPE_DATA 0x1 /* Frame is data */ +#define IEEE802154_FC_TYPE_ACK 0x2 /* Frame is acknowledgment */ +#define IEEE802154_FC_TYPE_MAC_CMD 0x3 /* Frame is MAC command */ + +#define IEEE802154_FC_TYPE_SHIFT 0 +#define IEEE802154_FC_TYPE_MASK ((1 << 3) - 1) +#define IEEE802154_FC_TYPE(x) ((x & IEEE802154_FC_TYPE_MASK) >> IEEE802154_FC_TYPE_SHIFT) +#define IEEE802154_FC_SET_TYPE(v, x) do { \ + v = (((v) & ~IEEE802154_FC_TYPE_MASK) | \ + (((x) << IEEE802154_FC_TYPE_SHIFT) & IEEE802154_FC_TYPE_MASK)); \ + } while (0) + +#define IEEE802154_FC_SECEN (1 << 3) +#define IEEE802154_FC_FRPEND (1 << 4) +#define IEEE802154_FC_ACK_REQ (1 << 5) +#define IEEE802154_FC_INTRA_PAN (1 << 6) + +#define IEEE802154_FC_SAMODE_SHIFT 14 +#define IEEE802154_FC_SAMODE_MASK (3 << IEEE802154_FC_SAMODE_SHIFT) +#define IEEE802154_FC_DAMODE_SHIFT 10 +#define IEEE802154_FC_DAMODE_MASK (3 << IEEE802154_FC_DAMODE_SHIFT) + +#define IEEE802154_FC_SAMODE(x) \ + (((x) & IEEE802154_FC_SAMODE_MASK) >> IEEE802154_FC_SAMODE_SHIFT) + +#define IEEE802154_FC_DAMODE(x) \ + (((x) & IEEE802154_FC_DAMODE_MASK) >> IEEE802154_FC_DAMODE_SHIFT) + + +/* MAC's Command Frames Identifiers */ +#define IEEE802154_CMD_ASSOCIATION_REQ 0x01 +#define IEEE802154_CMD_ASSOCIATION_RESP 0x02 +#define IEEE802154_CMD_DISASSOCIATION_NOTIFY 0x03 +#define IEEE802154_CMD_DATA_REQ 0x04 +#define IEEE802154_CMD_PANID_CONFLICT_NOTIFY 0x05 +#define IEEE802154_CMD_ORPHAN_NOTIFY 0x06 +#define IEEE802154_CMD_BEACON_REQ 0x07 +#define IEEE802154_CMD_COORD_REALIGN_NOTIFY 0x08 +#define IEEE802154_CMD_GTS_REQ 0x09 + +/* + * The return values of MAC operations + */ +enum { + /* + * The requested operation was completed successfully. + * For a transmission request, this value indicates + * a successful transmission. + */ + IEEE802154_SUCCESS = 0x0, + + /* The beacon was lost following a synchronization request. */ + IEEE802154_BEACON_LOSS = 0xe0, + /* + * A transmission could not take place due to activity on the + * channel, i.e., the CSMA-CA mechanism has failed. + */ + IEEE802154_CHNL_ACCESS_FAIL = 0xe1, + /* The GTS request has been denied by the PAN coordinator. */ + IEEE802154_DENINED = 0xe2, + /* The attempt to disable the transceiver has failed. */ + IEEE802154_DISABLE_TRX_FAIL = 0xe3, + /* + * The received frame induces a failed security check according to + * the security suite. + */ + IEEE802154_FAILED_SECURITY_CHECK = 0xe4, + /* + * The frame resulting from secure processing has a length that is + * greater than aMACMaxFrameSize. + */ + IEEE802154_FRAME_TOO_LONG = 0xe5, + /* + * The requested GTS transmission failed because the specified GTS + * either did not have a transmit GTS direction or was not defined. + */ + IEEE802154_INVALID_GTS = 0xe6, + /* + * A request to purge an MSDU from the transaction queue was made using + * an MSDU handle that was not found in the transaction table. + */ + IEEE802154_INVALID_HANDLE = 0xe7, + /* A parameter in the primitive is out of the valid range.*/ + IEEE802154_INVALID_PARAMETER = 0xe8, + /* No acknowledgment was received after aMaxFrameRetries. */ + IEEE802154_NO_ACK = 0xe9, + /* A scan operation failed to find any network beacons.*/ + IEEE802154_NO_BEACON = 0xea, + /* No response data were available following a request. */ + IEEE802154_NO_DATA = 0xeb, + /* The operation failed because a short address was not allocated. */ + IEEE802154_NO_SHORT_ADDRESS = 0xec, + /* + * A receiver enable request was unsuccessful because it could not be + * completed within the CAP. + */ + IEEE802154_OUT_OF_CAP = 0xed, + /* + * A PAN identifier conflict has been detected and communicated to the + * PAN coordinator. + */ + IEEE802154_PANID_CONFLICT = 0xee, + /* A coordinator realignment command has been received. */ + IEEE802154_REALIGMENT = 0xef, + /* The transaction has expired and its information discarded. */ + IEEE802154_TRANSACTION_EXPIRED = 0xf0, + /* There is no capacity to store the transaction. */ + IEEE802154_TRANSACTION_OVERFLOW = 0xf1, + /* + * The transceiver was in the transmitter enabled state when the + * receiver was requested to be enabled. + */ + IEEE802154_TX_ACTIVE = 0xf2, + /* The appropriate key is not available in the ACL. */ + IEEE802154_UNAVAILABLE_KEY = 0xf3, + /* + * A SET/GET request was issued with the identifier of a PIB attribute + * that is not supported. + */ + IEEE802154_UNSUPPORTED_ATTR = 0xf4, + /* + * A request to perform a scan operation failed because the MLME was + * in the process of performing a previously initiated scan operation. + */ + IEEE802154_SCAN_IN_PROGRESS = 0xfc, +}; + + +#endif + + diff --git a/include/net/ieee802154/netdevice.h b/include/net/ieee802154/netdevice.h new file mode 100644 index 00000000000..e2506af3e7c --- /dev/null +++ b/include/net/ieee802154/netdevice.h @@ -0,0 +1,115 @@ +/* + * An interface between IEEE802.15.4 device and rest of the kernel. + * + * Copyright (C) 2007, 2008, 2009 Siemens AG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Written by: + * Pavel Smolenskiy + * Maxim Gorbachyov + * Maxim Osipov + * Dmitry Eremin-Solenikov + */ + +#ifndef IEEE802154_NETDEVICE_H +#define IEEE802154_NETDEVICE_H + +/* + * A control block of skb passed between the ARPHRD_IEEE802154 device + * and other stack parts. + */ +struct ieee802154_mac_cb { + u8 lqi; + struct ieee802154_addr sa; + struct ieee802154_addr da; + u8 flags; + u8 seq; +}; + +static inline struct ieee802154_mac_cb *mac_cb(struct sk_buff *skb) +{ + return (struct ieee802154_mac_cb *)skb->cb; +} + +#define MAC_CB_FLAG_TYPEMASK ((1 << 3) - 1) + +#define MAC_CB_FLAG_ACKREQ (1 << 3) +#define MAC_CB_FLAG_SECEN (1 << 4) +#define MAC_CB_FLAG_INTRAPAN (1 << 5) + +static inline int mac_cb_is_ackreq(struct sk_buff *skb) +{ + return mac_cb(skb)->flags & MAC_CB_FLAG_ACKREQ; +} + +static inline int mac_cb_is_secen(struct sk_buff *skb) +{ + return mac_cb(skb)->flags & MAC_CB_FLAG_SECEN; +} + +static inline int mac_cb_is_intrapan(struct sk_buff *skb) +{ + return mac_cb(skb)->flags & MAC_CB_FLAG_INTRAPAN; +} + +static inline int mac_cb_type(struct sk_buff *skb) +{ + return mac_cb(skb)->flags & MAC_CB_FLAG_TYPEMASK; +} + +#define IEEE802154_MAC_SCAN_ED 0 +#define IEEE802154_MAC_SCAN_ACTIVE 1 +#define IEEE802154_MAC_SCAN_PASSIVE 2 +#define IEEE802154_MAC_SCAN_ORPHAN 3 + +/* + * This should be located at net_device->ml_priv + */ +struct ieee802154_mlme_ops { + int (*assoc_req)(struct net_device *dev, + struct ieee802154_addr *addr, + u8 channel, u8 cap); + int (*assoc_resp)(struct net_device *dev, + struct ieee802154_addr *addr, + u16 short_addr, u8 status); + int (*disassoc_req)(struct net_device *dev, + struct ieee802154_addr *addr, + u8 reason); + int (*start_req)(struct net_device *dev, + struct ieee802154_addr *addr, + u8 channel, u8 bcn_ord, u8 sf_ord, + u8 pan_coord, u8 blx, u8 coord_realign); + int (*scan_req)(struct net_device *dev, + u8 type, u32 channels, u8 duration); + + /* + * FIXME: these should become the part of PIB/MIB interface. + * However we still don't have IB interface of any kind + */ + u16 (*get_pan_id)(struct net_device *dev); + u16 (*get_short_addr)(struct net_device *dev); + u8 (*get_dsn)(struct net_device *dev); + u8 (*get_bsn)(struct net_device *dev); +}; + +static inline struct ieee802154_mlme_ops *ieee802154_mlme_ops( + struct net_device *dev) +{ + return dev->ml_priv; +} + +#endif + + diff --git a/net/Kconfig b/net/Kconfig index c19f549c8e7..7051b971067 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -179,6 +179,7 @@ source "net/lapb/Kconfig" source "net/econet/Kconfig" source "net/wanrouter/Kconfig" source "net/phonet/Kconfig" +source "net/ieee802154/Kconfig" source "net/sched/Kconfig" source "net/dcb/Kconfig" diff --git a/net/Makefile b/net/Makefile index 9e00a55a901..ba324aefda7 100644 --- a/net/Makefile +++ b/net/Makefile @@ -60,6 +60,7 @@ obj-$(CONFIG_NET_9P) += 9p/ ifneq ($(CONFIG_DCB),) obj-y += dcb/ endif +obj-y += ieee802154/ ifeq ($(CONFIG_NET),y) obj-$(CONFIG_SYSCTL) += sysctl_net.o diff --git a/net/ieee802154/Kconfig b/net/ieee802154/Kconfig new file mode 100644 index 00000000000..1c1de97d264 --- /dev/null +++ b/net/ieee802154/Kconfig @@ -0,0 +1,12 @@ +config IEEE802154 + tristate "IEEE Std 802.15.4 Low-Rate Wireless Personal Area Networks support (EXPERIMENTAL)" + depends on EXPERIMENTAL + ---help--- + IEEE Std 802.15.4 defines a low data rate, low power and low + complexity short range wireless personal area networks. It was + designed to organise networks of sensors, switches, etc automation + devices. Maximum allowed data rate is 250 kb/s and typical personal + operating space around 10m. + + Say Y here to compile LR-WPAN support into the kernel or say M to + compile it as modules. diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile new file mode 100644 index 00000000000..7451c7cc8c8 --- /dev/null +++ b/net/ieee802154/Makefile @@ -0,0 +1,4 @@ +obj-$(CONFIG_IEEE802154) += af_802154.o +af_802154-y := af_ieee802154.o raw.o dgram.o + +ccflags-y += -Wall -DDEBUG diff --git a/net/ieee802154/af802154.h b/net/ieee802154/af802154.h new file mode 100644 index 00000000000..b1ec5253752 --- /dev/null +++ b/net/ieee802154/af802154.h @@ -0,0 +1,36 @@ +/* + * Internal interfaces for ieee 802.15.4 address family. + * + * Copyright 2007, 2008, 2009 Siemens AG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Written by: + * Sergey Lapin + * Dmitry Eremin-Solenikov + */ + +#ifndef AF802154_H +#define AF802154_H + +struct sk_buff; +struct net_devce; +extern struct proto ieee802154_raw_prot; +extern struct proto ieee802154_dgram_prot; +void ieee802154_raw_deliver(struct net_device *dev, struct sk_buff *skb); +int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb); +struct net_device *ieee802154_get_dev(struct net *net, + struct ieee802154_addr *addr); + +#endif diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c new file mode 100644 index 00000000000..882a927cefa --- /dev/null +++ b/net/ieee802154/af_ieee802154.c @@ -0,0 +1,372 @@ +/* + * IEEE802154.4 socket interface + * + * Copyright 2007, 2008 Siemens AG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Written by: + * Sergey Lapin + * Maxim Gorbachyov + */ + +#include +#include +#include +#include +#include +#include /* For TIOCOUTQ/INQ */ +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "af802154.h" + +#define DBG_DUMP(data, len) { \ + int i; \ + pr_debug("function: %s: data: len %d:\n", __func__, len); \ + for (i = 0; i < len; i++) {\ + pr_debug("%02x: %02x\n", i, (data)[i]); \ + } \ +} + +/* + * Utility function for families + */ +struct net_device *ieee802154_get_dev(struct net *net, + struct ieee802154_addr *addr) +{ + struct net_device *dev = NULL; + struct net_device *tmp; + u16 pan_id, short_addr; + + switch (addr->addr_type) { + case IEEE802154_ADDR_LONG: + rtnl_lock(); + dev = dev_getbyhwaddr(net, ARPHRD_IEEE802154, addr->hwaddr); + if (dev) + dev_hold(dev); + rtnl_unlock(); + break; + case IEEE802154_ADDR_SHORT: + if (addr->pan_id == 0xffff || + addr->short_addr == IEEE802154_ADDR_UNDEF || + addr->short_addr == 0xffff) + break; + + rtnl_lock(); + + for_each_netdev(net, tmp) { + if (tmp->type != ARPHRD_IEEE802154) + continue; + + pan_id = ieee802154_mlme_ops(tmp)->get_pan_id(tmp); + short_addr = + ieee802154_mlme_ops(tmp)->get_short_addr(tmp); + + if (pan_id == addr->pan_id && + short_addr == addr->short_addr) { + dev = tmp; + dev_hold(dev); + break; + } + } + + rtnl_unlock(); + break; + default: + pr_warning("Unsupported ieee802154 address type: %d\n", + addr->addr_type); + break; + } + + return dev; +} + +static int ieee802154_sock_release(struct socket *sock) +{ + struct sock *sk = sock->sk; + + if (sk) { + sock->sk = NULL; + sk->sk_prot->close(sk, 0); + } + return 0; +} +static int ieee802154_sock_sendmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *msg, size_t len) +{ + struct sock *sk = sock->sk; + + return sk->sk_prot->sendmsg(iocb, sk, msg, len); +} + +static int ieee802154_sock_bind(struct socket *sock, struct sockaddr *uaddr, + int addr_len) +{ + struct sock *sk = sock->sk; + + if (sk->sk_prot->bind) + return sk->sk_prot->bind(sk, uaddr, addr_len); + + return sock_no_bind(sock, uaddr, addr_len); +} + +static int ieee802154_sock_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags) +{ + struct sock *sk = sock->sk; + + if (uaddr->sa_family == AF_UNSPEC) + return sk->sk_prot->disconnect(sk, flags); + + return sk->sk_prot->connect(sk, uaddr, addr_len); +} + +static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg, + unsigned int cmd) +{ + struct ifreq ifr; + int ret = -EINVAL; + struct net_device *dev; + + if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) + return -EFAULT; + + ifr.ifr_name[IFNAMSIZ-1] = 0; + + dev_load(sock_net(sk), ifr.ifr_name); + dev = dev_get_by_name(sock_net(sk), ifr.ifr_name); + if (dev->type == ARPHRD_IEEE802154 || + dev->type == ARPHRD_IEEE802154_PHY) + ret = dev->netdev_ops->ndo_do_ioctl(dev, &ifr, cmd); + + if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq))) + ret = -EFAULT; + dev_put(dev); + + return ret; +} + +static int ieee802154_sock_ioctl(struct socket *sock, unsigned int cmd, + unsigned long arg) +{ + struct sock *sk = sock->sk; + + switch (cmd) { + case SIOCGSTAMP: + return sock_get_timestamp(sk, (struct timeval __user *)arg); + case SIOCGSTAMPNS: + return sock_get_timestampns(sk, (struct timespec __user *)arg); + case SIOCGIFADDR: + case SIOCSIFADDR: + return ieee802154_dev_ioctl(sk, (struct ifreq __user *)arg, + cmd); + default: + if (!sk->sk_prot->ioctl) + return -ENOIOCTLCMD; + return sk->sk_prot->ioctl(sk, cmd, arg); + } +} + +static const struct proto_ops ieee802154_raw_ops = { + .family = PF_IEEE802154, + .owner = THIS_MODULE, + .release = ieee802154_sock_release, + .bind = ieee802154_sock_bind, + .connect = ieee802154_sock_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = sock_no_getname, + .poll = datagram_poll, + .ioctl = ieee802154_sock_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .setsockopt = sock_common_setsockopt, + .getsockopt = sock_common_getsockopt, + .sendmsg = ieee802154_sock_sendmsg, + .recvmsg = sock_common_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +#ifdef CONFIG_COMPAT + .compat_setsockopt = compat_sock_common_setsockopt, + .compat_getsockopt = compat_sock_common_getsockopt, +#endif +}; + +static const struct proto_ops ieee802154_dgram_ops = { + .family = PF_IEEE802154, + .owner = THIS_MODULE, + .release = ieee802154_sock_release, + .bind = ieee802154_sock_bind, + .connect = ieee802154_sock_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = sock_no_getname, + .poll = datagram_poll, + .ioctl = ieee802154_sock_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .setsockopt = sock_common_setsockopt, + .getsockopt = sock_common_getsockopt, + .sendmsg = ieee802154_sock_sendmsg, + .recvmsg = sock_common_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +#ifdef CONFIG_COMPAT + .compat_setsockopt = compat_sock_common_setsockopt, + .compat_getsockopt = compat_sock_common_getsockopt, +#endif +}; + + +/* + * Create a socket. Initialise the socket, blank the addresses + * set the state. + */ +static int ieee802154_create(struct net *net, struct socket *sock, + int protocol) +{ + struct sock *sk; + int rc; + struct proto *proto; + const struct proto_ops *ops; + + if (net != &init_net) + return -EAFNOSUPPORT; + + switch (sock->type) { + case SOCK_RAW: + proto = &ieee802154_raw_prot; + ops = &ieee802154_raw_ops; + break; + case SOCK_DGRAM: + proto = &ieee802154_dgram_prot; + ops = &ieee802154_dgram_ops; + break; + default: + rc = -ESOCKTNOSUPPORT; + goto out; + } + + rc = -ENOMEM; + sk = sk_alloc(net, PF_IEEE802154, GFP_KERNEL, proto); + if (!sk) + goto out; + rc = 0; + + sock->ops = ops; + + sock_init_data(sock, sk); + /* FIXME: sk->sk_destruct */ + sk->sk_family = PF_IEEE802154; + + /* Checksums on by default */ + sock_set_flag(sk, SOCK_ZAPPED); + + if (sk->sk_prot->hash) + sk->sk_prot->hash(sk); + + if (sk->sk_prot->init) { + rc = sk->sk_prot->init(sk); + if (rc) + sk_common_release(sk); + } +out: + return rc; +} + +static struct net_proto_family ieee802154_family_ops = { + .family = PF_IEEE802154, + .create = ieee802154_create, + .owner = THIS_MODULE, +}; + +static int ieee802154_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + DBG_DUMP(skb->data, skb->len); + if (!netif_running(dev)) + return -ENODEV; + pr_debug("got frame, type %d, dev %p\n", dev->type, dev); + + if (!net_eq(dev_net(dev), &init_net)) + goto drop; + + ieee802154_raw_deliver(dev, skb); + + if (dev->type != ARPHRD_IEEE802154) + goto drop; + + if (skb->pkt_type != PACKET_OTHERHOST) + return ieee802154_dgram_deliver(dev, skb); + +drop: + kfree_skb(skb); + return NET_RX_DROP; +} + + +static struct packet_type ieee802154_packet_type = { + .type = __constant_htons(ETH_P_IEEE802154), + .func = ieee802154_rcv, +}; + +static int __init af_ieee802154_init(void) +{ + int rc = -EINVAL; + + rc = proto_register(&ieee802154_raw_prot, 1); + if (rc) + goto out; + + rc = proto_register(&ieee802154_dgram_prot, 1); + if (rc) + goto err_dgram; + + /* Tell SOCKET that we are alive */ + rc = sock_register(&ieee802154_family_ops); + if (rc) + goto err_sock; + dev_add_pack(&ieee802154_packet_type); + + rc = 0; + goto out; + +err_sock: + proto_unregister(&ieee802154_dgram_prot); +err_dgram: + proto_unregister(&ieee802154_raw_prot); +out: + return rc; +} +static void __exit af_ieee802154_remove(void) +{ + dev_remove_pack(&ieee802154_packet_type); + sock_unregister(PF_IEEE802154); + proto_unregister(&ieee802154_dgram_prot); + proto_unregister(&ieee802154_raw_prot); +} + +module_init(af_ieee802154_init); +module_exit(af_ieee802154_remove); + +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NETPROTO(PF_IEEE802154); diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c new file mode 100644 index 00000000000..1f5ea11c2fd --- /dev/null +++ b/net/ieee802154/dgram.c @@ -0,0 +1,394 @@ +/* + * ZigBee socket interface + * + * Copyright 2007, 2008 Siemens AG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Written by: + * Sergey Lapin + * Dmitry Eremin-Solenikov + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "af802154.h" + +static HLIST_HEAD(dgram_head); +static DEFINE_RWLOCK(dgram_lock); + +struct dgram_sock { + struct sock sk; + + int bound; + struct ieee802154_addr src_addr; + struct ieee802154_addr dst_addr; +}; + +static inline struct dgram_sock *dgram_sk(const struct sock *sk) +{ + return container_of(sk, struct dgram_sock, sk); +} + + +static void dgram_hash(struct sock *sk) +{ + write_lock_bh(&dgram_lock); + sk_add_node(sk, &dgram_head); + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + write_unlock_bh(&dgram_lock); +} + +static void dgram_unhash(struct sock *sk) +{ + write_lock_bh(&dgram_lock); + if (sk_del_node_init(sk)) + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + write_unlock_bh(&dgram_lock); +} + +static int dgram_init(struct sock *sk) +{ + struct dgram_sock *ro = dgram_sk(sk); + + ro->dst_addr.addr_type = IEEE802154_ADDR_LONG; + ro->dst_addr.pan_id = 0xffff; + memset(&ro->dst_addr.hwaddr, 0xff, sizeof(ro->dst_addr.hwaddr)); + return 0; +} + +static void dgram_close(struct sock *sk, long timeout) +{ + sk_common_release(sk); +} + +static int dgram_bind(struct sock *sk, struct sockaddr *uaddr, int len) +{ + struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr; + struct dgram_sock *ro = dgram_sk(sk); + int err = 0; + struct net_device *dev; + + ro->bound = 0; + + if (len < sizeof(*addr)) + return -EINVAL; + + if (addr->family != AF_IEEE802154) + return -EINVAL; + + lock_sock(sk); + + dev = ieee802154_get_dev(sock_net(sk), &addr->addr); + if (!dev) { + err = -ENODEV; + goto out; + } + + if (dev->type != ARPHRD_IEEE802154) { + err = -ENODEV; + goto out_put; + } + + memcpy(&ro->src_addr, &addr->addr, sizeof(struct ieee802154_addr)); + + ro->bound = 1; +out_put: + dev_put(dev); +out: + release_sock(sk); + + return err; +} + +static int dgram_ioctl(struct sock *sk, int cmd, unsigned long arg) +{ + switch (cmd) { + case SIOCOUTQ: + { + int amount = atomic_read(&sk->sk_wmem_alloc); + return put_user(amount, (int __user *)arg); + } + + case SIOCINQ: + { + struct sk_buff *skb; + unsigned long amount; + + amount = 0; + spin_lock_bh(&sk->sk_receive_queue.lock); + skb = skb_peek(&sk->sk_receive_queue); + if (skb != NULL) { + /* + * We will only return the amount + * of this packet since that is all + * that will be read. + */ + /* FIXME: parse the header for more correct value */ + amount = skb->len - (3+8+8); + } + spin_unlock_bh(&sk->sk_receive_queue.lock); + return put_user(amount, (int __user *)arg); + } + + } + return -ENOIOCTLCMD; +} + +/* FIXME: autobind */ +static int dgram_connect(struct sock *sk, struct sockaddr *uaddr, + int len) +{ + struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr; + struct dgram_sock *ro = dgram_sk(sk); + int err = 0; + + if (len < sizeof(*addr)) + return -EINVAL; + + if (addr->family != AF_IEEE802154) + return -EINVAL; + + lock_sock(sk); + + if (!ro->bound) { + err = -ENETUNREACH; + goto out; + } + + memcpy(&ro->dst_addr, &addr->addr, sizeof(struct ieee802154_addr)); + +out: + release_sock(sk); + return err; +} + +static int dgram_disconnect(struct sock *sk, int flags) +{ + struct dgram_sock *ro = dgram_sk(sk); + + lock_sock(sk); + + ro->dst_addr.addr_type = IEEE802154_ADDR_LONG; + memset(&ro->dst_addr.hwaddr, 0xff, sizeof(ro->dst_addr.hwaddr)); + + release_sock(sk); + + return 0; +} + +static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t size) +{ + struct net_device *dev; + unsigned mtu; + struct sk_buff *skb; + struct dgram_sock *ro = dgram_sk(sk); + int err; + + if (msg->msg_flags & MSG_OOB) { + pr_debug("msg->msg_flags = 0x%x\n", msg->msg_flags); + return -EOPNOTSUPP; + } + + if (!ro->bound) + dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154); + else + dev = ieee802154_get_dev(sock_net(sk), &ro->src_addr); + + if (!dev) { + pr_debug("no dev\n"); + err = -ENXIO; + goto out; + } + mtu = dev->mtu; + pr_debug("name = %s, mtu = %u\n", dev->name, mtu); + + skb = sock_alloc_send_skb(sk, LL_ALLOCATED_SPACE(dev) + size, + msg->msg_flags & MSG_DONTWAIT, + &err); + if (!skb) + goto out_dev; + + skb_reserve(skb, LL_RESERVED_SPACE(dev)); + + skb_reset_network_header(skb); + + mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA | MAC_CB_FLAG_ACKREQ; + mac_cb(skb)->seq = ieee802154_mlme_ops(dev)->get_dsn(dev); + err = dev_hard_header(skb, dev, ETH_P_IEEE802154, &ro->dst_addr, + ro->bound ? &ro->src_addr : NULL, size); + if (err < 0) + goto out_skb; + + skb_reset_mac_header(skb); + + err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); + if (err < 0) + goto out_skb; + + if (size > mtu) { + pr_debug("size = %u, mtu = %u\n", size, mtu); + err = -EINVAL; + goto out_skb; + } + + skb->dev = dev; + skb->sk = sk; + skb->protocol = htons(ETH_P_IEEE802154); + + dev_put(dev); + + err = dev_queue_xmit(skb); + if (err > 0) + err = net_xmit_errno(err); + + return err ?: size; + +out_skb: + kfree_skb(skb); +out_dev: + dev_put(dev); +out: + return err; +} + +static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len, int noblock, int flags, + int *addr_len) +{ + size_t copied = 0; + int err = -EOPNOTSUPP; + struct sk_buff *skb; + + skb = skb_recv_datagram(sk, flags, noblock, &err); + if (!skb) + goto out; + + copied = skb->len; + if (len < copied) { + msg->msg_flags |= MSG_TRUNC; + copied = len; + } + + /* FIXME: skip headers if necessary ?! */ + err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + if (err) + goto done; + + sock_recv_timestamp(msg, sk, skb); + + if (flags & MSG_TRUNC) + copied = skb->len; +done: + skb_free_datagram(sk, skb); +out: + if (err) + return err; + return copied; +} + +static int dgram_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + if (sock_queue_rcv_skb(sk, skb) < 0) { + atomic_inc(&sk->sk_drops); + kfree_skb(skb); + return NET_RX_DROP; + } + + return NET_RX_SUCCESS; +} + +static inline int ieee802154_match_sock(u8 *hw_addr, u16 pan_id, + u16 short_addr, struct dgram_sock *ro) +{ + if (!ro->bound) + return 1; + + if (ro->src_addr.addr_type == IEEE802154_ADDR_LONG && + !memcmp(ro->src_addr.hwaddr, hw_addr, IEEE802154_ADDR_LEN)) + return 1; + + if (ro->src_addr.addr_type == IEEE802154_ADDR_SHORT && + pan_id == ro->src_addr.pan_id && + short_addr == ro->src_addr.short_addr) + return 1; + + return 0; +} + +int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb) +{ + struct sock *sk, *prev = NULL; + struct hlist_node *node; + int ret = NET_RX_SUCCESS; + u16 pan_id, short_addr; + + /* Data frame processing */ + BUG_ON(dev->type != ARPHRD_IEEE802154); + + pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); + short_addr = ieee802154_mlme_ops(dev)->get_short_addr(dev); + + read_lock(&dgram_lock); + sk_for_each(sk, node, &dgram_head) { + if (ieee802154_match_sock(dev->dev_addr, pan_id, short_addr, + dgram_sk(sk))) { + if (prev) { + struct sk_buff *clone; + clone = skb_clone(skb, GFP_ATOMIC); + if (clone) + dgram_rcv_skb(prev, clone); + } + + prev = sk; + } + } + + if (prev) + dgram_rcv_skb(prev, skb); + else { + kfree_skb(skb); + ret = NET_RX_DROP; + } + read_unlock(&dgram_lock); + + return ret; +} + +struct proto ieee802154_dgram_prot = { + .name = "IEEE-802.15.4-MAC", + .owner = THIS_MODULE, + .obj_size = sizeof(struct dgram_sock), + .init = dgram_init, + .close = dgram_close, + .bind = dgram_bind, + .sendmsg = dgram_sendmsg, + .recvmsg = dgram_recvmsg, + .hash = dgram_hash, + .unhash = dgram_unhash, + .connect = dgram_connect, + .disconnect = dgram_disconnect, + .ioctl = dgram_ioctl, +}; + diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c new file mode 100644 index 00000000000..8b892e06049 --- /dev/null +++ b/net/ieee802154/raw.c @@ -0,0 +1,254 @@ +/* + * Raw IEEE 802.15.4 sockets + * + * Copyright 2007, 2008 Siemens AG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Written by: + * Sergey Lapin + * Dmitry Eremin-Solenikov + */ + +#include +#include +#include +#include +#include +#include + +#include "af802154.h" + +static HLIST_HEAD(raw_head); +static DEFINE_RWLOCK(raw_lock); + +static void raw_hash(struct sock *sk) +{ + write_lock_bh(&raw_lock); + sk_add_node(sk, &raw_head); + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + write_unlock_bh(&raw_lock); +} + +static void raw_unhash(struct sock *sk) +{ + write_lock_bh(&raw_lock); + if (sk_del_node_init(sk)) + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + write_unlock_bh(&raw_lock); +} + +static void raw_close(struct sock *sk, long timeout) +{ + sk_common_release(sk); +} + +static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int len) +{ + struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr; + int err = 0; + struct net_device *dev = NULL; + + if (len < sizeof(*addr)) + return -EINVAL; + + if (addr->family != AF_IEEE802154) + return -EINVAL; + + lock_sock(sk); + + dev = ieee802154_get_dev(sock_net(sk), &addr->addr); + if (!dev) { + err = -ENODEV; + goto out; + } + + if (dev->type != ARPHRD_IEEE802154_PHY && + dev->type != ARPHRD_IEEE802154) { + err = -ENODEV; + goto out_put; + } + + sk->sk_bound_dev_if = dev->ifindex; + sk_dst_reset(sk); + +out_put: + dev_put(dev); +out: + release_sock(sk); + + return err; +} + +static int raw_connect(struct sock *sk, struct sockaddr *uaddr, + int addr_len) +{ + return -ENOTSUPP; +} + +static int raw_disconnect(struct sock *sk, int flags) +{ + return 0; +} + +static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t size) +{ + struct net_device *dev; + unsigned mtu; + struct sk_buff *skb; + int err; + + if (msg->msg_flags & MSG_OOB) { + pr_debug("msg->msg_flags = 0x%x\n", msg->msg_flags); + return -EOPNOTSUPP; + } + + lock_sock(sk); + if (!sk->sk_bound_dev_if) + dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154); + else + dev = dev_get_by_index(sock_net(sk), sk->sk_bound_dev_if); + release_sock(sk); + + if (!dev) { + pr_debug("no dev\n"); + err = -ENXIO; + goto out; + } + + mtu = dev->mtu; + pr_debug("name = %s, mtu = %u\n", dev->name, mtu); + + if (size > mtu) { + pr_debug("size = %u, mtu = %u\n", size, mtu); + err = -EINVAL; + goto out_dev; + } + + skb = sock_alloc_send_skb(sk, LL_ALLOCATED_SPACE(dev) + size, + msg->msg_flags & MSG_DONTWAIT, &err); + if (!skb) + goto out_dev; + + skb_reserve(skb, LL_RESERVED_SPACE(dev)); + + skb_reset_mac_header(skb); + skb_reset_network_header(skb); + + err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); + if (err < 0) + goto out_skb; + + skb->dev = dev; + skb->sk = sk; + skb->protocol = htons(ETH_P_IEEE802154); + + dev_put(dev); + + err = dev_queue_xmit(skb); + if (err > 0) + err = net_xmit_errno(err); + + return err ?: size; + +out_skb: + kfree_skb(skb); +out_dev: + dev_put(dev); +out: + return err; +} + +static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len, int noblock, int flags, int *addr_len) +{ + size_t copied = 0; + int err = -EOPNOTSUPP; + struct sk_buff *skb; + + skb = skb_recv_datagram(sk, flags, noblock, &err); + if (!skb) + goto out; + + copied = skb->len; + if (len < copied) { + msg->msg_flags |= MSG_TRUNC; + copied = len; + } + + err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + if (err) + goto done; + + sock_recv_timestamp(msg, sk, skb); + + if (flags & MSG_TRUNC) + copied = skb->len; +done: + skb_free_datagram(sk, skb); +out: + if (err) + return err; + return copied; +} + +static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + if (sock_queue_rcv_skb(sk, skb) < 0) { + atomic_inc(&sk->sk_drops); + kfree_skb(skb); + return NET_RX_DROP; + } + + return NET_RX_SUCCESS; +} + + +void ieee802154_raw_deliver(struct net_device *dev, struct sk_buff *skb) +{ + struct sock *sk; + struct hlist_node *node; + + read_lock(&raw_lock); + sk_for_each(sk, node, &raw_head) { + bh_lock_sock(sk); + if (!sk->sk_bound_dev_if || + sk->sk_bound_dev_if == dev->ifindex) { + + struct sk_buff *clone; + + clone = skb_clone(skb, GFP_ATOMIC); + if (clone) + raw_rcv_skb(sk, clone); + } + bh_unlock_sock(sk); + } + read_unlock(&raw_lock); +} + +struct proto ieee802154_raw_prot = { + .name = "IEEE-802.15.4-RAW", + .owner = THIS_MODULE, + .obj_size = sizeof(struct sock), + .close = raw_close, + .bind = raw_bind, + .sendmsg = raw_sendmsg, + .recvmsg = raw_recvmsg, + .hash = raw_hash, + .unhash = raw_unhash, + .connect = raw_connect, + .disconnect = raw_disconnect, +}; + -- cgit v1.2.3-70-g09d2 From 2c21d11518b688cd4c8e7ddfcd4ba41482ad075b Mon Sep 17 00:00:00 2001 From: Sergey Lapin Date: Mon, 8 Jun 2009 12:18:49 +0000 Subject: net: add NL802154 interface for configuration of 802.15.4 devices Add a netlink interface for configuration of IEEE 802.15.4 device. Also this interface specifies events notification sent by devices towards higher layers. Signed-off-by: Dmitry Eremin-Solenikov Signed-off-by: Sergey Lapin Signed-off-by: David S. Miller --- include/linux/nl802154.h | 119 +++++++++ include/net/ieee802154/nl802154.h | 41 +++ net/ieee802154/Makefile | 3 +- net/ieee802154/netlink.c | 523 ++++++++++++++++++++++++++++++++++++++ net/ieee802154/nl_policy.c | 52 ++++ 5 files changed, 737 insertions(+), 1 deletion(-) create mode 100644 include/linux/nl802154.h create mode 100644 include/net/ieee802154/nl802154.h create mode 100644 net/ieee802154/netlink.c create mode 100644 net/ieee802154/nl_policy.c (limited to 'include') diff --git a/include/linux/nl802154.h b/include/linux/nl802154.h new file mode 100644 index 00000000000..2cda00ccfcc --- /dev/null +++ b/include/linux/nl802154.h @@ -0,0 +1,119 @@ +/* + * nl802154.h + * + * Copyright (C) 2007, 2008, 2009 Siemens AG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + */ + +#ifndef NL802154_H +#define NL802154_H + +#define IEEE802154_NL_NAME "802.15.4 MAC" +#define IEEE802154_MCAST_COORD_NAME "coordinator" +#define IEEE802154_MCAST_BEACON_NAME "beacon" + +enum { + __IEEE802154_ATTR_INVALID, + + IEEE802154_ATTR_DEV_NAME, + IEEE802154_ATTR_DEV_INDEX, + + IEEE802154_ATTR_STATUS, + + IEEE802154_ATTR_SHORT_ADDR, + IEEE802154_ATTR_HW_ADDR, + IEEE802154_ATTR_PAN_ID, + + IEEE802154_ATTR_CHANNEL, + + IEEE802154_ATTR_COORD_SHORT_ADDR, + IEEE802154_ATTR_COORD_HW_ADDR, + IEEE802154_ATTR_COORD_PAN_ID, + + IEEE802154_ATTR_SRC_SHORT_ADDR, + IEEE802154_ATTR_SRC_HW_ADDR, + IEEE802154_ATTR_SRC_PAN_ID, + + IEEE802154_ATTR_DEST_SHORT_ADDR, + IEEE802154_ATTR_DEST_HW_ADDR, + IEEE802154_ATTR_DEST_PAN_ID, + + IEEE802154_ATTR_CAPABILITY, + IEEE802154_ATTR_REASON, + IEEE802154_ATTR_SCAN_TYPE, + IEEE802154_ATTR_CHANNELS, + IEEE802154_ATTR_DURATION, + IEEE802154_ATTR_ED_LIST, + IEEE802154_ATTR_BCN_ORD, + IEEE802154_ATTR_SF_ORD, + IEEE802154_ATTR_PAN_COORD, + IEEE802154_ATTR_BAT_EXT, + IEEE802154_ATTR_COORD_REALIGN, + IEEE802154_ATTR_SEC, + + __IEEE802154_ATTR_MAX, +}; + +#define IEEE802154_ATTR_MAX (__IEEE802154_ATTR_MAX - 1) + +extern struct nla_policy ieee802154_policy[]; + +/* commands */ +/* REQ should be responded with CONF + * and INDIC with RESP + */ +enum { + __IEEE802154_COMMAND_INVALID, + + IEEE802154_ASSOCIATE_REQ, + IEEE802154_ASSOCIATE_CONF, + IEEE802154_DISASSOCIATE_REQ, + IEEE802154_DISASSOCIATE_CONF, + IEEE802154_GET_REQ, + IEEE802154_GET_CONF, + IEEE802154_RESET_REQ, + IEEE802154_RESET_CONF, + IEEE802154_SCAN_REQ, + IEEE802154_SCAN_CONF, + IEEE802154_SET_REQ, + IEEE802154_SET_CONF, + IEEE802154_START_REQ, + IEEE802154_START_CONF, + IEEE802154_SYNC_REQ, + IEEE802154_POLL_REQ, + IEEE802154_POLL_CONF, + + IEEE802154_ASSOCIATE_INDIC, + IEEE802154_ASSOCIATE_RESP, + IEEE802154_DISASSOCIATE_INDIC, + IEEE802154_BEACON_NOTIFY_INDIC, + IEEE802154_ORPHAN_INDIC, + IEEE802154_ORPHAN_RESP, + IEEE802154_COMM_STATUS_INDIC, + IEEE802154_SYNC_LOSS_INDIC, + + IEEE802154_GTS_REQ, /* Not supported yet */ + IEEE802154_GTS_INDIC, /* Not supported yet */ + IEEE802154_GTS_CONF, /* Not supported yet */ + IEEE802154_RX_ENABLE_REQ, /* Not supported yet */ + IEEE802154_RX_ENABLE_CONF, /* Not supported yet */ + + __IEEE802154_CMD_MAX, +}; + +#define IEEE802154_CMD_MAX (__IEEE802154_CMD_MAX - 1) + +#endif diff --git a/include/net/ieee802154/nl802154.h b/include/net/ieee802154/nl802154.h new file mode 100644 index 00000000000..78efcdf52b5 --- /dev/null +++ b/include/net/ieee802154/nl802154.h @@ -0,0 +1,41 @@ +/* + * nl802154.h + * + * Copyright (C) 2007, 2008, 2009 Siemens AG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + */ + +#ifndef IEEE802154_NL_H +#define IEEE802154_NL_H + +struct net_device; +struct ieee802154_addr; + +int ieee802154_nl_assoc_indic(struct net_device *dev, + struct ieee802154_addr *addr, u8 cap); +int ieee802154_nl_assoc_confirm(struct net_device *dev, + u16 short_addr, u8 status); +int ieee802154_nl_disassoc_indic(struct net_device *dev, + struct ieee802154_addr *addr, u8 reason); +int ieee802154_nl_disassoc_confirm(struct net_device *dev, + u8 status); +int ieee802154_nl_scan_confirm(struct net_device *dev, + u8 status, u8 scan_type, u32 unscanned, + u8 *edl/*, struct list_head *pan_desc_list */); +int ieee802154_nl_beacon_indic(struct net_device *dev, u16 panid, + u16 coord_addr); + +#endif diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile index 7451c7cc8c8..f99338a2610 100644 --- a/net/ieee802154/Makefile +++ b/net/ieee802154/Makefile @@ -1,4 +1,5 @@ -obj-$(CONFIG_IEEE802154) += af_802154.o +obj-$(CONFIG_IEEE802154) += nl802154.o af_802154.o +nl802154-y := netlink.o nl_policy.o af_802154-y := af_ieee802154.o raw.o dgram.o ccflags-y += -Wall -DDEBUG diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c new file mode 100644 index 00000000000..105ad10876a --- /dev/null +++ b/net/ieee802154/netlink.c @@ -0,0 +1,523 @@ +/* + * Netlink inteface for IEEE 802.15.4 stack + * + * Copyright 2007, 2008 Siemens AG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Written by: + * Sergey Lapin + * Dmitry Eremin-Solenikov + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned int ieee802154_seq_num; + +static struct genl_family ieee802154_coordinator_family = { + .id = GENL_ID_GENERATE, + .hdrsize = 0, + .name = IEEE802154_NL_NAME, + .version = 1, + .maxattr = IEEE802154_ATTR_MAX, +}; + +static struct genl_multicast_group ieee802154_coord_mcgrp = { + .name = IEEE802154_MCAST_COORD_NAME, +}; + +static struct genl_multicast_group ieee802154_beacon_mcgrp = { + .name = IEEE802154_MCAST_BEACON_NAME, +}; + +/* Requests to userspace */ +static struct sk_buff *ieee802154_nl_create(int flags, u8 req) +{ + void *hdr; + struct sk_buff *msg = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + + if (!msg) + return NULL; + + hdr = genlmsg_put(msg, 0, ieee802154_seq_num++, + &ieee802154_coordinator_family, flags, req); + if (!hdr) { + nlmsg_free(msg); + return NULL; + } + + return msg; +} + +static int ieee802154_nl_finish(struct sk_buff *msg) +{ + /* XXX: nlh is right at the start of msg */ + void *hdr = genlmsg_data(NLMSG_DATA(msg->data)); + + if (!genlmsg_end(msg, hdr)) + goto out; + + return genlmsg_multicast(msg, 0, ieee802154_coord_mcgrp.id, + GFP_ATOMIC); +out: + nlmsg_free(msg); + return -ENOBUFS; +} + +int ieee802154_nl_assoc_indic(struct net_device *dev, + struct ieee802154_addr *addr, u8 cap) +{ + struct sk_buff *msg; + + pr_debug("%s\n", __func__); + + if (addr->addr_type != IEEE802154_ADDR_LONG) { + pr_err("%s: received non-long source address!\n", __func__); + return -EINVAL; + } + + msg = ieee802154_nl_create(0, IEEE802154_ASSOCIATE_INDIC); + if (!msg) + return -ENOBUFS; + + NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, dev->name); + NLA_PUT_U32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex); + NLA_PUT(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN, + dev->dev_addr); + + NLA_PUT(msg, IEEE802154_ATTR_SRC_HW_ADDR, IEEE802154_ADDR_LEN, + addr->hwaddr); + + NLA_PUT_U8(msg, IEEE802154_ATTR_CAPABILITY, cap); + + return ieee802154_nl_finish(msg); + +nla_put_failure: + nlmsg_free(msg); + return -ENOBUFS; +} +EXPORT_SYMBOL(ieee802154_nl_assoc_indic); + +int ieee802154_nl_assoc_confirm(struct net_device *dev, u16 short_addr, + u8 status) +{ + struct sk_buff *msg; + + pr_debug("%s\n", __func__); + + msg = ieee802154_nl_create(0, IEEE802154_ASSOCIATE_CONF); + if (!msg) + return -ENOBUFS; + + NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, dev->name); + NLA_PUT_U32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex); + NLA_PUT(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN, + dev->dev_addr); + + NLA_PUT_U16(msg, IEEE802154_ATTR_SHORT_ADDR, short_addr); + NLA_PUT_U8(msg, IEEE802154_ATTR_STATUS, status); + + return ieee802154_nl_finish(msg); + +nla_put_failure: + nlmsg_free(msg); + return -ENOBUFS; +} +EXPORT_SYMBOL(ieee802154_nl_assoc_confirm); + +int ieee802154_nl_disassoc_indic(struct net_device *dev, + struct ieee802154_addr *addr, u8 reason) +{ + struct sk_buff *msg; + + pr_debug("%s\n", __func__); + + msg = ieee802154_nl_create(0, IEEE802154_DISASSOCIATE_INDIC); + if (!msg) + return -ENOBUFS; + + NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, dev->name); + NLA_PUT_U32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex); + NLA_PUT(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN, + dev->dev_addr); + + if (addr->addr_type == IEEE802154_ADDR_LONG) + NLA_PUT(msg, IEEE802154_ATTR_SRC_HW_ADDR, IEEE802154_ADDR_LEN, + addr->hwaddr); + else + NLA_PUT_U16(msg, IEEE802154_ATTR_SRC_SHORT_ADDR, + addr->short_addr); + + NLA_PUT_U8(msg, IEEE802154_ATTR_REASON, reason); + + return ieee802154_nl_finish(msg); + +nla_put_failure: + nlmsg_free(msg); + return -ENOBUFS; +} +EXPORT_SYMBOL(ieee802154_nl_disassoc_indic); + +int ieee802154_nl_disassoc_confirm(struct net_device *dev, u8 status) +{ + struct sk_buff *msg; + + pr_debug("%s\n", __func__); + + msg = ieee802154_nl_create(0, IEEE802154_DISASSOCIATE_CONF); + if (!msg) + return -ENOBUFS; + + NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, dev->name); + NLA_PUT_U32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex); + NLA_PUT(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN, + dev->dev_addr); + + NLA_PUT_U8(msg, IEEE802154_ATTR_STATUS, status); + + return ieee802154_nl_finish(msg); + +nla_put_failure: + nlmsg_free(msg); + return -ENOBUFS; +} +EXPORT_SYMBOL(ieee802154_nl_disassoc_confirm); + +int ieee802154_nl_beacon_indic(struct net_device *dev, + u16 panid, u16 coord_addr) +{ + struct sk_buff *msg; + + pr_debug("%s\n", __func__); + + msg = ieee802154_nl_create(0, IEEE802154_BEACON_NOTIFY_INDIC); + if (!msg) + return -ENOBUFS; + + NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, dev->name); + NLA_PUT_U32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex); + NLA_PUT(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN, + dev->dev_addr); + NLA_PUT_U16(msg, IEEE802154_ATTR_COORD_SHORT_ADDR, coord_addr); + NLA_PUT_U16(msg, IEEE802154_ATTR_COORD_PAN_ID, panid); + + return ieee802154_nl_finish(msg); + +nla_put_failure: + nlmsg_free(msg); + return -ENOBUFS; +} +EXPORT_SYMBOL(ieee802154_nl_beacon_indic); + +int ieee802154_nl_scan_confirm(struct net_device *dev, + u8 status, u8 scan_type, u32 unscanned, + u8 *edl/* , struct list_head *pan_desc_list */) +{ + struct sk_buff *msg; + + pr_debug("%s\n", __func__); + + msg = ieee802154_nl_create(0, IEEE802154_SCAN_CONF); + if (!msg) + return -ENOBUFS; + + NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, dev->name); + NLA_PUT_U32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex); + NLA_PUT(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN, + dev->dev_addr); + + NLA_PUT_U8(msg, IEEE802154_ATTR_STATUS, status); + NLA_PUT_U8(msg, IEEE802154_ATTR_SCAN_TYPE, scan_type); + NLA_PUT_U32(msg, IEEE802154_ATTR_CHANNELS, unscanned); + + if (edl) + NLA_PUT(msg, IEEE802154_ATTR_ED_LIST, 27, edl); + + return ieee802154_nl_finish(msg); + +nla_put_failure: + nlmsg_free(msg); + return -ENOBUFS; +} +EXPORT_SYMBOL(ieee802154_nl_scan_confirm); + +/* Requests from userspace */ +static struct net_device *ieee802154_nl_get_dev(struct genl_info *info) +{ + struct net_device *dev; + + if (info->attrs[IEEE802154_ATTR_DEV_NAME]) { + char name[IFNAMSIZ + 1]; + nla_strlcpy(name, info->attrs[IEEE802154_ATTR_DEV_NAME], + sizeof(name)); + dev = dev_get_by_name(&init_net, name); + } else if (info->attrs[IEEE802154_ATTR_DEV_INDEX]) + dev = dev_get_by_index(&init_net, + nla_get_u32(info->attrs[IEEE802154_ATTR_DEV_INDEX])); + else + return NULL; + + if (dev->type != ARPHRD_IEEE802154) { + dev_put(dev); + return NULL; + } + + return dev; +} + +static int ieee802154_associate_req(struct sk_buff *skb, + struct genl_info *info) +{ + struct net_device *dev; + struct ieee802154_addr addr; + int ret = -EINVAL; + + if (!info->attrs[IEEE802154_ATTR_CHANNEL] || + !info->attrs[IEEE802154_ATTR_COORD_PAN_ID] || + (!info->attrs[IEEE802154_ATTR_COORD_HW_ADDR] && + !info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR]) || + !info->attrs[IEEE802154_ATTR_CAPABILITY]) + return -EINVAL; + + dev = ieee802154_nl_get_dev(info); + if (!dev) + return -ENODEV; + + if (info->attrs[IEEE802154_ATTR_COORD_HW_ADDR]) { + addr.addr_type = IEEE802154_ADDR_LONG; + nla_memcpy(addr.hwaddr, + info->attrs[IEEE802154_ATTR_COORD_HW_ADDR], + IEEE802154_ADDR_LEN); + } else { + addr.addr_type = IEEE802154_ADDR_SHORT; + addr.short_addr = nla_get_u16( + info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR]); + } + addr.pan_id = nla_get_u16(info->attrs[IEEE802154_ATTR_COORD_PAN_ID]); + + ret = ieee802154_mlme_ops(dev)->assoc_req(dev, &addr, + nla_get_u8(info->attrs[IEEE802154_ATTR_CHANNEL]), + nla_get_u8(info->attrs[IEEE802154_ATTR_CAPABILITY])); + + dev_put(dev); + return ret; +} + +static int ieee802154_associate_resp(struct sk_buff *skb, + struct genl_info *info) +{ + struct net_device *dev; + struct ieee802154_addr addr; + int ret = -EINVAL; + + if (!info->attrs[IEEE802154_ATTR_STATUS] || + !info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] || + !info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]) + return -EINVAL; + + dev = ieee802154_nl_get_dev(info); + if (!dev) + return -ENODEV; + + addr.addr_type = IEEE802154_ADDR_LONG; + nla_memcpy(addr.hwaddr, info->attrs[IEEE802154_ATTR_DEST_HW_ADDR], + IEEE802154_ADDR_LEN); + addr.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); + + + ret = ieee802154_mlme_ops(dev)->assoc_resp(dev, &addr, + nla_get_u16(info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]), + nla_get_u8(info->attrs[IEEE802154_ATTR_STATUS])); + + dev_put(dev); + return ret; +} + +static int ieee802154_disassociate_req(struct sk_buff *skb, + struct genl_info *info) +{ + struct net_device *dev; + struct ieee802154_addr addr; + int ret = -EINVAL; + + if ((!info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] && + !info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]) || + !info->attrs[IEEE802154_ATTR_REASON]) + return -EINVAL; + + dev = ieee802154_nl_get_dev(info); + if (!dev) + return -ENODEV; + + if (info->attrs[IEEE802154_ATTR_DEST_HW_ADDR]) { + addr.addr_type = IEEE802154_ADDR_LONG; + nla_memcpy(addr.hwaddr, + info->attrs[IEEE802154_ATTR_DEST_HW_ADDR], + IEEE802154_ADDR_LEN); + } else { + addr.addr_type = IEEE802154_ADDR_SHORT; + addr.short_addr = nla_get_u16( + info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]); + } + addr.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); + + ret = ieee802154_mlme_ops(dev)->disassoc_req(dev, &addr, + nla_get_u8(info->attrs[IEEE802154_ATTR_REASON])); + + dev_put(dev); + return ret; +} + +/* + * PANid, channel, beacon_order = 15, superframe_order = 15, + * PAN_coordinator, battery_life_extension = 0, + * coord_realignment = 0, security_enable = 0 +*/ +static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info) +{ + struct net_device *dev; + struct ieee802154_addr addr; + + u8 channel, bcn_ord, sf_ord; + int pan_coord, blx, coord_realign; + int ret; + + if (!info->attrs[IEEE802154_ATTR_COORD_PAN_ID] || + !info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR] || + !info->attrs[IEEE802154_ATTR_CHANNEL] || + !info->attrs[IEEE802154_ATTR_BCN_ORD] || + !info->attrs[IEEE802154_ATTR_SF_ORD] || + !info->attrs[IEEE802154_ATTR_PAN_COORD] || + !info->attrs[IEEE802154_ATTR_BAT_EXT] || + !info->attrs[IEEE802154_ATTR_COORD_REALIGN] + ) + return -EINVAL; + + dev = ieee802154_nl_get_dev(info); + if (!dev) + return -ENODEV; + + addr.addr_type = IEEE802154_ADDR_SHORT; + addr.short_addr = nla_get_u16( + info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR]); + addr.pan_id = nla_get_u16(info->attrs[IEEE802154_ATTR_COORD_PAN_ID]); + + channel = nla_get_u8(info->attrs[IEEE802154_ATTR_CHANNEL]); + bcn_ord = nla_get_u8(info->attrs[IEEE802154_ATTR_BCN_ORD]); + sf_ord = nla_get_u8(info->attrs[IEEE802154_ATTR_SF_ORD]); + pan_coord = nla_get_u8(info->attrs[IEEE802154_ATTR_PAN_COORD]); + blx = nla_get_u8(info->attrs[IEEE802154_ATTR_BAT_EXT]); + coord_realign = nla_get_u8(info->attrs[IEEE802154_ATTR_COORD_REALIGN]); + + ret = ieee802154_mlme_ops(dev)->start_req(dev, &addr, channel, + bcn_ord, sf_ord, pan_coord, blx, coord_realign); + + dev_put(dev); + return ret; +} + +static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info) +{ + struct net_device *dev; + int ret; + u8 type; + u32 channels; + u8 duration; + + if (!info->attrs[IEEE802154_ATTR_SCAN_TYPE] || + !info->attrs[IEEE802154_ATTR_CHANNELS] || + !info->attrs[IEEE802154_ATTR_DURATION]) + return -EINVAL; + + dev = ieee802154_nl_get_dev(info); + if (!dev) + return -ENODEV; + + type = nla_get_u8(info->attrs[IEEE802154_ATTR_SCAN_TYPE]); + channels = nla_get_u32(info->attrs[IEEE802154_ATTR_CHANNELS]); + duration = nla_get_u8(info->attrs[IEEE802154_ATTR_DURATION]); + + ret = ieee802154_mlme_ops(dev)->scan_req(dev, type, channels, + duration); + + dev_put(dev); + return ret; +} + +#define IEEE802154_OP(_cmd, _func) \ + { \ + .cmd = _cmd, \ + .policy = ieee802154_policy, \ + .doit = _func, \ + .dumpit = NULL, \ + .flags = GENL_ADMIN_PERM, \ + } + +static struct genl_ops ieee802154_coordinator_ops[] = { + IEEE802154_OP(IEEE802154_ASSOCIATE_REQ, ieee802154_associate_req), + IEEE802154_OP(IEEE802154_ASSOCIATE_RESP, ieee802154_associate_resp), + IEEE802154_OP(IEEE802154_DISASSOCIATE_REQ, ieee802154_disassociate_req), + IEEE802154_OP(IEEE802154_SCAN_REQ, ieee802154_scan_req), + IEEE802154_OP(IEEE802154_START_REQ, ieee802154_start_req), +}; + +static int __init ieee802154_nl_init(void) +{ + int rc; + int i; + + rc = genl_register_family(&ieee802154_coordinator_family); + if (rc) + goto fail; + + rc = genl_register_mc_group(&ieee802154_coordinator_family, + &ieee802154_coord_mcgrp); + if (rc) + goto fail; + + rc = genl_register_mc_group(&ieee802154_coordinator_family, + &ieee802154_beacon_mcgrp); + if (rc) + goto fail; + + + for (i = 0; i < ARRAY_SIZE(ieee802154_coordinator_ops); i++) { + rc = genl_register_ops(&ieee802154_coordinator_family, + &ieee802154_coordinator_ops[i]); + if (rc) + goto fail; + } + + return 0; + +fail: + genl_unregister_family(&ieee802154_coordinator_family); + return rc; +} +module_init(ieee802154_nl_init); + +static void __exit ieee802154_nl_exit(void) +{ + genl_unregister_family(&ieee802154_coordinator_family); +} +module_exit(ieee802154_nl_exit); + diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c new file mode 100644 index 00000000000..c7d71d1adca --- /dev/null +++ b/net/ieee802154/nl_policy.c @@ -0,0 +1,52 @@ +/* + * nl802154.h + * + * Copyright (C) 2007, 2008 Siemens AG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + */ + +#include +#include +#include + +#define NLA_HW_ADDR NLA_U64 + +struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = { + [IEEE802154_ATTR_DEV_NAME] = { .type = NLA_STRING, }, + [IEEE802154_ATTR_DEV_INDEX] = { .type = NLA_U32, }, + + [IEEE802154_ATTR_STATUS] = { .type = NLA_U8, }, + [IEEE802154_ATTR_SHORT_ADDR] = { .type = NLA_U16, }, + [IEEE802154_ATTR_HW_ADDR] = { .type = NLA_HW_ADDR, }, + [IEEE802154_ATTR_PAN_ID] = { .type = NLA_U16, }, + [IEEE802154_ATTR_CHANNEL] = { .type = NLA_U8, }, + [IEEE802154_ATTR_COORD_SHORT_ADDR] = { .type = NLA_U16, }, + [IEEE802154_ATTR_COORD_HW_ADDR] = { .type = NLA_HW_ADDR, }, + [IEEE802154_ATTR_COORD_PAN_ID] = { .type = NLA_U16, }, + [IEEE802154_ATTR_SRC_SHORT_ADDR] = { .type = NLA_U16, }, + [IEEE802154_ATTR_SRC_HW_ADDR] = { .type = NLA_HW_ADDR, }, + [IEEE802154_ATTR_SRC_PAN_ID] = { .type = NLA_U16, }, + [IEEE802154_ATTR_DEST_SHORT_ADDR] = { .type = NLA_U16, }, + [IEEE802154_ATTR_DEST_HW_ADDR] = { .type = NLA_HW_ADDR, }, + [IEEE802154_ATTR_DEST_PAN_ID] = { .type = NLA_U16, }, + + [IEEE802154_ATTR_CAPABILITY] = { .type = NLA_U8, }, + [IEEE802154_ATTR_REASON] = { .type = NLA_U8, }, + [IEEE802154_ATTR_SCAN_TYPE] = { .type = NLA_U8, }, + [IEEE802154_ATTR_CHANNELS] = { .type = NLA_U32, }, + [IEEE802154_ATTR_DURATION] = { .type = NLA_U8, }, + [IEEE802154_ATTR_ED_LIST] = { .len = 27 }, +}; -- cgit v1.2.3-70-g09d2 From 18760f1e74e8dfe8f30d4891e66163d1e6feb893 Mon Sep 17 00:00:00 2001 From: Chaitanya Lala Date: Mon, 8 Jun 2009 14:28:54 +0000 Subject: e1000e: Expose MDI-X status via ethtool change Ethtool is a standard way of getting information about ethernet interfaces. We enhance ethtool kernel interface & e1000e to make the MDI-X status readable via ethtool in userspace. Signed-off-by: Chaitanya Lala Signed-off-by: Arthur Jones Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/e1000e/ethtool.c | 9 +++++++++ include/linux/ethtool.h | 8 +++++++- 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/net/e1000e/ethtool.c b/drivers/net/e1000e/ethtool.c index 3d643561752..1bf4d2a5d34 100644 --- a/drivers/net/e1000e/ethtool.c +++ b/drivers/net/e1000e/ethtool.c @@ -167,6 +167,15 @@ static int e1000_get_settings(struct net_device *netdev, ecmd->autoneg = ((hw->phy.media_type == e1000_media_type_fiber) || hw->mac.autoneg) ? AUTONEG_ENABLE : AUTONEG_DISABLE; + + /* MDI-X => 2; MDI =>1; Invalid =>0 */ + if ((hw->phy.media_type == e1000_media_type_copper) && + !hw->mac.get_link_status) + ecmd->eth_tp_mdix = hw->phy.is_mdix ? ETH_TP_MDI_X : + ETH_TP_MDI; + else + ecmd->eth_tp_mdix = ETH_TP_MDI_INVALID; + return 0; } diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 380b04272bf..9b660bd2e2b 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -30,7 +30,8 @@ struct ethtool_cmd { __u32 maxtxpkt; /* Tx pkts before generating tx int */ __u32 maxrxpkt; /* Rx pkts before generating rx int */ __u16 speed_hi; - __u16 reserved2; + __u8 eth_tp_mdix; + __u8 reserved2; __u32 lp_advertising; /* Features the link partner advertises */ __u32 reserved[2]; }; @@ -632,6 +633,11 @@ struct ethtool_ops { #define AUTONEG_DISABLE 0x00 #define AUTONEG_ENABLE 0x01 +/* Mode MDI or MDI-X */ +#define ETH_TP_MDI_INVALID 0x00 +#define ETH_TP_MDI 0x01 +#define ETH_TP_MDI_X 0x02 + /* Wake-On-Lan options. */ #define WAKE_PHY (1 << 0) #define WAKE_UCAST (1 << 1) -- cgit v1.2.3-70-g09d2 From 1d589bb16b825b3a7b4edd34d997f1f1f953033d Mon Sep 17 00:00:00 2001 From: john cooper Date: Tue, 9 Jun 2009 14:41:40 +0200 Subject: Add serial number support for virtio_blk, V4a This patch extracts the opaque data from pci i/o region 0 via the added VIRTIO_BLK_F_IDENTIFY field. By convention this data takes the form of that returned by an ATA IDENTIFY DEVICE command, however the driver (except for structure size) makes no interpretation of the data. The structure data is copied wholesale to userspace via a HDIO_GET_IDENTITY ioctl command (eg: hdparm -i ). Signed-off-by: john cooper Signed-off-by: Rusty Russell Signed-off-by: Jens Axboe --- drivers/block/virtio_blk.c | 37 ++++++++++++++++++++++++++++++++++--- include/linux/virtio_blk.h | 4 ++++ 2 files changed, 38 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index c4845b16946..c0facaa55cf 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -171,11 +171,43 @@ static void do_virtblk_request(struct request_queue *q) vblk->vq->vq_ops->kick(vblk->vq); } +/* return ATA identify data + */ +static int virtblk_identify(struct gendisk *disk, void *argp) +{ + struct virtio_blk *vblk = disk->private_data; + void *opaque; + int err = -ENOMEM; + + opaque = kmalloc(VIRTIO_BLK_ID_BYTES, GFP_KERNEL); + if (!opaque) + goto out; + + err = virtio_config_buf(vblk->vdev, VIRTIO_BLK_F_IDENTIFY, + offsetof(struct virtio_blk_config, identify), opaque, + VIRTIO_BLK_ID_BYTES); + + if (err) + goto out_kfree; + + if (copy_to_user(argp, opaque, VIRTIO_BLK_ID_BYTES)) + err = -EFAULT; + +out_kfree: + kfree(opaque); +out: + return err; +} + static int virtblk_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, unsigned long data) { struct gendisk *disk = bdev->bd_disk; struct virtio_blk *vblk = disk->private_data; + void __user *argp = (void __user *)data; + + if (cmd == HDIO_GET_IDENTITY) + return virtblk_identify(disk, argp); /* * Only allow the generic SCSI ioctls if the host can support it. @@ -183,8 +215,7 @@ static int virtblk_ioctl(struct block_device *bdev, fmode_t mode, if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI)) return -ENOIOCTLCMD; - return scsi_cmd_ioctl(disk->queue, disk, mode, cmd, - (void __user *)data); + return scsi_cmd_ioctl(disk->queue, disk, mode, cmd, argp); } /* We provide getgeo only to please some old bootloader/partitioning tools */ @@ -390,7 +421,7 @@ static struct virtio_device_id id_table[] = { static unsigned int features[] = { VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, - VIRTIO_BLK_F_SCSI, + VIRTIO_BLK_F_SCSI, VIRTIO_BLK_F_IDENTIFY }; static struct virtio_driver virtio_blk = { diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h index 4dbcbc1c348..be7d255fc7c 100644 --- a/include/linux/virtio_blk.h +++ b/include/linux/virtio_blk.h @@ -16,6 +16,9 @@ #define VIRTIO_BLK_F_RO 5 /* Disk is read-only */ #define VIRTIO_BLK_F_BLK_SIZE 6 /* Block size of disk is available*/ #define VIRTIO_BLK_F_SCSI 7 /* Supports scsi command passthru */ +#define VIRTIO_BLK_F_IDENTIFY 8 /* ATA IDENTIFY supported */ + +#define VIRTIO_BLK_ID_BYTES (sizeof(__u16[256])) /* IDENTIFY DATA */ struct virtio_blk_config { @@ -33,6 +36,7 @@ struct virtio_blk_config } geometry; /* block size of device (if VIRTIO_BLK_F_BLK_SIZE) */ __u32 blk_size; + __u8 identify[VIRTIO_BLK_ID_BYTES]; } __attribute__((packed)); /* These two define direction. */ -- cgit v1.2.3-70-g09d2 From a31e1ffd2231b8fdf7eda9ed750a4a0df9bcad4e Mon Sep 17 00:00:00 2001 From: Laszlo Attila Toth Date: Tue, 9 Jun 2009 15:16:34 +0200 Subject: netfilter: xt_socket: added new revision of the 'socket' match supporting flags If the XT_SOCKET_TRANSPARENT flag is set, enabled 'transparent' socket option is required for the socket to be matched. Signed-off-by: Laszlo Attila Toth Signed-off-by: Patrick McHardy --- include/linux/netfilter/xt_socket.h | 12 +++++++ net/netfilter/xt_socket.c | 63 ++++++++++++++++++++++++++++++------- 2 files changed, 64 insertions(+), 11 deletions(-) create mode 100644 include/linux/netfilter/xt_socket.h (limited to 'include') diff --git a/include/linux/netfilter/xt_socket.h b/include/linux/netfilter/xt_socket.h new file mode 100644 index 00000000000..6f475b8ff34 --- /dev/null +++ b/include/linux/netfilter/xt_socket.h @@ -0,0 +1,12 @@ +#ifndef _XT_SOCKET_H +#define _XT_SOCKET_H + +enum { + XT_SOCKET_TRANSPARENT = 1 << 0, +}; + +struct xt_socket_mtinfo1 { + __u8 flags; +}; + +#endif /* _XT_SOCKET_H */ diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 1acc089be7e..ebf00ad5b19 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -22,6 +22,8 @@ #include #include +#include + #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #define XT_SOCKET_HAVE_CONNTRACK 1 #include @@ -86,7 +88,8 @@ extract_icmp_fields(const struct sk_buff *skb, static bool -socket_mt(const struct sk_buff *skb, const struct xt_match_param *par) +socket_match(const struct sk_buff *skb, const struct xt_match_param *par, + const struct xt_socket_mtinfo1 *info) { const struct iphdr *iph = ip_hdr(skb); struct udphdr _hdr, *hp = NULL; @@ -141,10 +144,24 @@ socket_mt(const struct sk_buff *skb, const struct xt_match_param *par) sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol, saddr, daddr, sport, dport, par->in, false); if (sk != NULL) { - bool wildcard = (sk->sk_state != TCP_TIME_WAIT && inet_sk(sk)->rcv_saddr == 0); + bool wildcard; + bool transparent = true; + + /* Ignore sockets listening on INADDR_ANY */ + wildcard = (sk->sk_state != TCP_TIME_WAIT && + inet_sk(sk)->rcv_saddr == 0); + + /* Ignore non-transparent sockets, + if XT_SOCKET_TRANSPARENT is used */ + if (info && info->flags & XT_SOCKET_TRANSPARENT) + transparent = ((sk->sk_state != TCP_TIME_WAIT && + inet_sk(sk)->transparent) || + (sk->sk_state == TCP_TIME_WAIT && + inet_twsk(sk)->tw_transparent)); nf_tproxy_put_sock(sk); - if (wildcard) + + if (wildcard || !transparent) sk = NULL; } @@ -157,23 +174,47 @@ socket_mt(const struct sk_buff *skb, const struct xt_match_param *par) return (sk != NULL); } -static struct xt_match socket_mt_reg __read_mostly = { - .name = "socket", - .family = AF_INET, - .match = socket_mt, - .hooks = 1 << NF_INET_PRE_ROUTING, - .me = THIS_MODULE, +static bool +socket_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) +{ + return socket_match(skb, par, NULL); +} + +static bool +socket_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par) +{ + return socket_match(skb, par, par->matchinfo); +} + +static struct xt_match socket_mt_reg[] __read_mostly = { + { + .name = "socket", + .revision = 0, + .family = NFPROTO_IPV4, + .match = socket_mt_v0, + .hooks = 1 << NF_INET_PRE_ROUTING, + .me = THIS_MODULE, + }, + { + .name = "socket", + .revision = 1, + .family = NFPROTO_IPV4, + .match = socket_mt_v1, + .matchsize = sizeof(struct xt_socket_mtinfo1), + .hooks = 1 << NF_INET_PRE_ROUTING, + .me = THIS_MODULE, + }, }; static int __init socket_mt_init(void) { nf_defrag_ipv4_enable(); - return xt_register_match(&socket_mt_reg); + return xt_register_matches(socket_mt_reg, ARRAY_SIZE(socket_mt_reg)); } static void __exit socket_mt_exit(void) { - xt_unregister_match(&socket_mt_reg); + xt_unregister_matches(socket_mt_reg, ARRAY_SIZE(socket_mt_reg)); } module_init(socket_mt_init); -- cgit v1.2.3-70-g09d2 From 43514774ff40c4fbe0cbbd3d8293a359f1a9fe71 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 8 Jun 2009 18:14:41 -0700 Subject: [SCSI] iscsi class: Add new NETLINK_ISCSI messages for cnic/bnx2i driver. Add ISCSI_NETLINK messages for iSCSI NICs to get information such as path from userspace. Original iscsid messages are now always sent as multicast to group 1. The new messages are sent to group 2. The multicast changes were made by Mike Christie. Signed-off-by: Michael Chan Signed-off-by: Benjamin Li Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/scsi/scsi_transport_iscsi.c | 122 +++++++++++++++++++++++++----------- include/scsi/iscsi_if.h | 42 +++++++++++++ include/scsi/scsi_transport_iscsi.h | 5 ++ 3 files changed, 133 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index d69a53aa406..f3e664628d7 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -37,7 +37,6 @@ #define ISCSI_TRANSPORT_VERSION "2.0-870" struct iscsi_internal { - int daemon_pid; struct scsi_transport_template t; struct iscsi_transport *iscsi_transport; struct list_head list; @@ -938,23 +937,9 @@ iscsi_if_transport_lookup(struct iscsi_transport *tt) } static int -iscsi_broadcast_skb(struct sk_buff *skb, gfp_t gfp) +iscsi_multicast_skb(struct sk_buff *skb, uint32_t group, gfp_t gfp) { - return netlink_broadcast(nls, skb, 0, 1, gfp); -} - -static int -iscsi_unicast_skb(struct sk_buff *skb, int pid) -{ - int rc; - - rc = netlink_unicast(nls, skb, pid, MSG_DONTWAIT); - if (rc < 0) { - printk(KERN_ERR "iscsi: can not unicast skb (%d)\n", rc); - return rc; - } - - return 0; + return nlmsg_multicast(nls, skb, 0, group, gfp); } int iscsi_recv_pdu(struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr, @@ -980,7 +965,7 @@ int iscsi_recv_pdu(struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr, return -ENOMEM; } - nlh = __nlmsg_put(skb, priv->daemon_pid, 0, 0, (len - sizeof(*nlh)), 0); + nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0); ev = NLMSG_DATA(nlh); memset(ev, 0, sizeof(*ev)); ev->transport_handle = iscsi_handle(conn->transport); @@ -991,10 +976,45 @@ int iscsi_recv_pdu(struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr, memcpy(pdu, hdr, sizeof(struct iscsi_hdr)); memcpy(pdu + sizeof(struct iscsi_hdr), data, data_size); - return iscsi_unicast_skb(skb, priv->daemon_pid); + return iscsi_multicast_skb(skb, ISCSI_NL_GRP_ISCSID, GFP_ATOMIC); } EXPORT_SYMBOL_GPL(iscsi_recv_pdu); +int iscsi_offload_mesg(struct Scsi_Host *shost, + struct iscsi_transport *transport, uint32_t type, + char *data, uint16_t data_size) +{ + struct nlmsghdr *nlh; + struct sk_buff *skb; + struct iscsi_uevent *ev; + int len = NLMSG_SPACE(sizeof(*ev) + data_size); + + skb = alloc_skb(len, GFP_NOIO); + if (!skb) { + printk(KERN_ERR "can not deliver iscsi offload message:OOM\n"); + return -ENOMEM; + } + + nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0); + ev = NLMSG_DATA(nlh); + memset(ev, 0, sizeof(*ev)); + ev->type = type; + ev->transport_handle = iscsi_handle(transport); + switch (type) { + case ISCSI_KEVENT_PATH_REQ: + ev->r.req_path.host_no = shost->host_no; + break; + case ISCSI_KEVENT_IF_DOWN: + ev->r.notify_if_down.host_no = shost->host_no; + break; + } + + memcpy((char *)ev + sizeof(*ev), data, data_size); + + return iscsi_multicast_skb(skb, ISCSI_NL_GRP_UIP, GFP_NOIO); +} +EXPORT_SYMBOL_GPL(iscsi_offload_mesg); + void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error) { struct nlmsghdr *nlh; @@ -1014,7 +1034,7 @@ void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error) return; } - nlh = __nlmsg_put(skb, priv->daemon_pid, 0, 0, (len - sizeof(*nlh)), 0); + nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0); ev = NLMSG_DATA(nlh); ev->transport_handle = iscsi_handle(conn->transport); ev->type = ISCSI_KEVENT_CONN_ERROR; @@ -1022,7 +1042,7 @@ void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error) ev->r.connerror.cid = conn->cid; ev->r.connerror.sid = iscsi_conn_get_sid(conn); - iscsi_broadcast_skb(skb, GFP_ATOMIC); + iscsi_multicast_skb(skb, ISCSI_NL_GRP_ISCSID, GFP_ATOMIC); iscsi_cls_conn_printk(KERN_INFO, conn, "detected conn error (%d)\n", error); @@ -1030,8 +1050,8 @@ void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error) EXPORT_SYMBOL_GPL(iscsi_conn_error_event); static int -iscsi_if_send_reply(int pid, int seq, int type, int done, int multi, - void *payload, int size) +iscsi_if_send_reply(uint32_t group, int seq, int type, int done, int multi, + void *payload, int size) { struct sk_buff *skb; struct nlmsghdr *nlh; @@ -1045,10 +1065,10 @@ iscsi_if_send_reply(int pid, int seq, int type, int done, int multi, return -ENOMEM; } - nlh = __nlmsg_put(skb, pid, seq, t, (len - sizeof(*nlh)), 0); + nlh = __nlmsg_put(skb, 0, 0, t, (len - sizeof(*nlh)), 0); nlh->nlmsg_flags = flags; memcpy(NLMSG_DATA(nlh), payload, size); - return iscsi_unicast_skb(skb, pid); + return iscsi_multicast_skb(skb, group, GFP_ATOMIC); } static int @@ -1085,7 +1105,7 @@ iscsi_if_get_stats(struct iscsi_transport *transport, struct nlmsghdr *nlh) return -ENOMEM; } - nlhstat = __nlmsg_put(skbstat, priv->daemon_pid, 0, 0, + nlhstat = __nlmsg_put(skbstat, 0, 0, 0, (len - sizeof(*nlhstat)), 0); evstat = NLMSG_DATA(nlhstat); memset(evstat, 0, sizeof(*evstat)); @@ -1109,7 +1129,8 @@ iscsi_if_get_stats(struct iscsi_transport *transport, struct nlmsghdr *nlh) skb_trim(skbstat, NLMSG_ALIGN(actual_size)); nlhstat->nlmsg_len = actual_size; - err = iscsi_unicast_skb(skbstat, priv->daemon_pid); + err = iscsi_multicast_skb(skbstat, ISCSI_NL_GRP_ISCSID, + GFP_ATOMIC); } while (err < 0 && err != -ECONNREFUSED); return err; @@ -1143,7 +1164,7 @@ int iscsi_session_event(struct iscsi_cls_session *session, return -ENOMEM; } - nlh = __nlmsg_put(skb, priv->daemon_pid, 0, 0, (len - sizeof(*nlh)), 0); + nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0); ev = NLMSG_DATA(nlh); ev->transport_handle = iscsi_handle(session->transport); @@ -1172,7 +1193,7 @@ int iscsi_session_event(struct iscsi_cls_session *session, * this will occur if the daemon is not up, so we just warn * the user and when the daemon is restarted it will handle it */ - rc = iscsi_broadcast_skb(skb, GFP_KERNEL); + rc = iscsi_multicast_skb(skb, ISCSI_NL_GRP_ISCSID, GFP_KERNEL); if (rc == -ESRCH) iscsi_cls_session_printk(KERN_ERR, session, "Cannot notify userspace of session " @@ -1393,7 +1414,31 @@ iscsi_set_host_param(struct iscsi_transport *transport, } static int -iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +iscsi_set_path(struct iscsi_transport *transport, struct iscsi_uevent *ev) +{ + struct Scsi_Host *shost; + struct iscsi_path *params; + int err; + + if (!transport->set_path) + return -ENOSYS; + + shost = scsi_host_lookup(ev->u.set_path.host_no); + if (!shost) { + printk(KERN_ERR "set path could not find host no %u\n", + ev->u.set_path.host_no); + return -ENODEV; + } + + params = (struct iscsi_path *)((char *)ev + sizeof(*ev)); + err = transport->set_path(shost, params); + + scsi_host_put(shost); + return err; +} + +static int +iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) { int err = 0; struct iscsi_uevent *ev = NLMSG_DATA(nlh); @@ -1403,6 +1448,11 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) struct iscsi_cls_conn *conn; struct iscsi_endpoint *ep = NULL; + if (nlh->nlmsg_type == ISCSI_UEVENT_PATH_UPDATE) + *group = ISCSI_NL_GRP_UIP; + else + *group = ISCSI_NL_GRP_ISCSID; + priv = iscsi_if_transport_lookup(iscsi_ptr(ev->transport_handle)); if (!priv) return -EINVAL; @@ -1411,8 +1461,6 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (!try_module_get(transport->owner)) return -EINVAL; - priv->daemon_pid = NETLINK_CREDS(skb)->pid; - switch (nlh->nlmsg_type) { case ISCSI_UEVENT_CREATE_SESSION: err = iscsi_if_create_session(priv, ep, ev, @@ -1506,6 +1554,9 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) case ISCSI_UEVENT_SET_HOST_PARAM: err = iscsi_set_host_param(transport, ev); break; + case ISCSI_UEVENT_PATH_UPDATE: + err = iscsi_set_path(transport, ev); + break; default: err = -ENOSYS; break; @@ -1528,6 +1579,7 @@ iscsi_if_rx(struct sk_buff *skb) uint32_t rlen; struct nlmsghdr *nlh; struct iscsi_uevent *ev; + uint32_t group; nlh = nlmsg_hdr(skb); if (nlh->nlmsg_len < sizeof(*nlh) || @@ -1540,7 +1592,7 @@ iscsi_if_rx(struct sk_buff *skb) if (rlen > skb->len) rlen = skb->len; - err = iscsi_if_recv_msg(skb, nlh); + err = iscsi_if_recv_msg(skb, nlh, &group); if (err) { ev->type = ISCSI_KEVENT_IF_ERROR; ev->iferror = err; @@ -1554,8 +1606,7 @@ iscsi_if_rx(struct sk_buff *skb) */ if (ev->type == ISCSI_UEVENT_GET_STATS && !err) break; - err = iscsi_if_send_reply( - NETLINK_CREDS(skb)->pid, nlh->nlmsg_seq, + err = iscsi_if_send_reply(group, nlh->nlmsg_seq, nlh->nlmsg_type, 0, 0, ev, sizeof(*ev)); } while (err < 0 && err != -ECONNREFUSED); skb_pull(skb, rlen); @@ -1803,7 +1854,6 @@ iscsi_register_transport(struct iscsi_transport *tt) if (!priv) return NULL; INIT_LIST_HEAD(&priv->list); - priv->daemon_pid = -1; priv->iscsi_transport = tt; priv->t.user_scan = iscsi_user_scan; priv->t.create_work_queue = 1; diff --git a/include/scsi/iscsi_if.h b/include/scsi/iscsi_if.h index 2c1a4af9eaf..4426f00da5f 100644 --- a/include/scsi/iscsi_if.h +++ b/include/scsi/iscsi_if.h @@ -22,6 +22,11 @@ #define ISCSI_IF_H #include +#include +#include + +#define ISCSI_NL_GRP_ISCSID 1 +#define ISCSI_NL_GRP_UIP 2 #define UEVENT_BASE 10 #define KEVENT_BASE 100 @@ -53,6 +58,8 @@ enum iscsi_uevent_e { ISCSI_UEVENT_CREATE_BOUND_SESSION = UEVENT_BASE + 18, ISCSI_UEVENT_TRANSPORT_EP_CONNECT_THROUGH_HOST = UEVENT_BASE + 19, + ISCSI_UEVENT_PATH_UPDATE = UEVENT_BASE + 20, + /* up events */ ISCSI_KEVENT_RECV_PDU = KEVENT_BASE + 1, ISCSI_KEVENT_CONN_ERROR = KEVENT_BASE + 2, @@ -60,6 +67,9 @@ enum iscsi_uevent_e { ISCSI_KEVENT_DESTROY_SESSION = KEVENT_BASE + 4, ISCSI_KEVENT_UNBIND_SESSION = KEVENT_BASE + 5, ISCSI_KEVENT_CREATE_SESSION = KEVENT_BASE + 6, + + ISCSI_KEVENT_PATH_REQ = KEVENT_BASE + 7, + ISCSI_KEVENT_IF_DOWN = KEVENT_BASE + 8, }; enum iscsi_tgt_dscvr { @@ -159,6 +169,9 @@ struct iscsi_uevent { uint32_t param; /* enum iscsi_host_param */ uint32_t len; } set_host_param; + struct msg_set_path { + uint32_t host_no; + } set_path; } u; union { /* messages k -> u */ @@ -192,9 +205,38 @@ struct iscsi_uevent { struct msg_transport_connect_ret { uint64_t handle; } ep_connect_ret; + struct msg_req_path { + uint32_t host_no; + } req_path; + struct msg_notify_if_down { + uint32_t host_no; + } notify_if_down; } r; } __attribute__ ((aligned (sizeof(uint64_t)))); +/* + * To keep the struct iscsi_uevent size the same for userspace code + * compatibility, the main structure for ISCSI_UEVENT_PATH_UPDATE and + * ISCSI_KEVENT_PATH_REQ is defined separately and comes after the + * struct iscsi_uevent in the NETLINK_ISCSI message. + */ +struct iscsi_path { + uint64_t handle; + uint8_t mac_addr[6]; + uint8_t mac_addr_old[6]; + uint32_t ip_addr_len; /* 4 or 16 */ + union { + struct in_addr v4_addr; + struct in6_addr v6_addr; + } src; + union { + struct in_addr v4_addr; + struct in6_addr v6_addr; + } dst; + uint16_t vlan_id; + uint16_t pmtu; +} __attribute__ ((aligned (sizeof(uint64_t)))); + /* * Common error codes */ diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index 8cb7a31d996..349c7f30720 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -133,6 +133,7 @@ struct iscsi_transport { void (*ep_disconnect) (struct iscsi_endpoint *ep); int (*tgt_dscvr) (struct Scsi_Host *shost, enum iscsi_tgt_dscvr type, uint32_t enable, struct sockaddr *dst_addr); + int (*set_path) (struct Scsi_Host *shost, struct iscsi_path *params); }; /* @@ -149,6 +150,10 @@ extern void iscsi_conn_error_event(struct iscsi_cls_conn *conn, extern int iscsi_recv_pdu(struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr, char *data, uint32_t data_size); +extern int iscsi_offload_mesg(struct Scsi_Host *shost, + struct iscsi_transport *transport, uint32_t type, + char *data, uint16_t data_size); + struct iscsi_cls_conn { struct list_head conn_list; /* item in connlist */ void *dd_data; /* LLD private data */ -- cgit v1.2.3-70-g09d2 From 55782138e47d9baf2f7d3a7af9e7cf42adf72c56 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 9 Jun 2009 13:43:05 +0800 Subject: tracing/events: convert block trace points to TRACE_EVENT() TRACE_EVENT is a more generic way to define tracepoints. Doing so adds these new capabilities to this tracepoint: - zero-copy and per-cpu splice() tracing - binary tracing without printf overhead - structured logging records exposed under /debug/tracing/events - trace events embedded in function tracer output and other plugins - user-defined, per tracepoint filter expressions ... Cons: - no dev_t info for the output of plug, unplug_timer and unplug_io events. no dev_t info for getrq and sleeprq events if bio == NULL. no dev_t info for rq_abort,...,rq_requeue events if rq->rq_disk == NULL. This is mainly because we can't get the deivce from a request queue. But this may change in the future. - A packet command is converted to a string in TP_assign, not TP_print. While blktrace do the convertion just before output. Since pc requests should be rather rare, this is not a big issue. - In blktrace, an event can have 2 different print formats, but a TRACE_EVENT has a unique format, which means we have some unused data in a trace entry. The overhead is minimized by using __dynamic_array() instead of __array(). I've benchmarked the ioctl blktrace vs the splice based TRACE_EVENT tracing: dd dd + ioctl blktrace dd + TRACE_EVENT (splice) 1 7.36s, 42.7 MB/s 7.50s, 42.0 MB/s 7.41s, 42.5 MB/s 2 7.43s, 42.3 MB/s 7.48s, 42.1 MB/s 7.43s, 42.4 MB/s 3 7.38s, 42.6 MB/s 7.45s, 42.2 MB/s 7.41s, 42.5 MB/s So the overhead of tracing is very small, and no regression when using those trace events vs blktrace. And the binary output of TRACE_EVENT is much smaller than blktrace: # ls -l -h -rw-r--r-- 1 root root 8.8M 06-09 13:24 sda.blktrace.0 -rw-r--r-- 1 root root 195K 06-09 13:24 sda.blktrace.1 -rw-r--r-- 1 root root 2.7M 06-09 13:25 trace_splice.out Following are some comparisons between TRACE_EVENT and blktrace: plug: kjournald-480 [000] 303.084981: block_plug: [kjournald] kjournald-480 [000] 303.084981: 8,0 P N [kjournald] unplug_io: kblockd/0-118 [000] 300.052973: block_unplug_io: [kblockd/0] 1 kblockd/0-118 [000] 300.052974: 8,0 U N [kblockd/0] 1 remap: kjournald-480 [000] 303.085042: block_remap: 8,0 W 102736992 + 8 <- (8,8) 33384 kjournald-480 [000] 303.085043: 8,0 A W 102736992 + 8 <- (8,8) 33384 bio_backmerge: kjournald-480 [000] 303.085086: block_bio_backmerge: 8,0 W 102737032 + 8 [kjournald] kjournald-480 [000] 303.085086: 8,0 M W 102737032 + 8 [kjournald] getrq: kjournald-480 [000] 303.084974: block_getrq: 8,0 W 102736984 + 8 [kjournald] kjournald-480 [000] 303.084975: 8,0 G W 102736984 + 8 [kjournald] bash-2066 [001] 1072.953770: 8,0 G N [bash] bash-2066 [001] 1072.953773: block_getrq: 0,0 N 0 + 0 [bash] rq_complete: konsole-2065 [001] 300.053184: block_rq_complete: 8,0 W () 103669040 + 16 [0] konsole-2065 [001] 300.053191: 8,0 C W 103669040 + 16 [0] ksoftirqd/1-7 [001] 1072.953811: 8,0 C N (5a 00 08 00 00 00 00 00 24 00) [0] ksoftirqd/1-7 [001] 1072.953813: block_rq_complete: 0,0 N (5a 00 08 00 00 00 00 00 24 00) 0 + 0 [0] rq_insert: kjournald-480 [000] 303.084985: block_rq_insert: 8,0 W 0 () 102736984 + 8 [kjournald] kjournald-480 [000] 303.084986: 8,0 I W 102736984 + 8 [kjournald] Changelog from v2 -> v3: - use the newly introduced __dynamic_array(). Changelog from v1 -> v2: - use __string() instead of __array() to minimize the memory required to store hex dump of rq->cmd(). - support large pc requests. - add missing blk_fill_rwbs_rq() in block_rq_requeue TRACE_EVENT. - some cleanups. Signed-off-by: Li Zefan LKML-Reference: <4A2DF669.5070905@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- block/blk-core.c | 16 +- block/elevator.c | 8 +- drivers/md/dm.c | 5 +- fs/bio.c | 3 +- include/linux/blktrace_api.h | 13 ++ include/trace/block.h | 76 ------- include/trace/events/block.h | 483 +++++++++++++++++++++++++++++++++++++++++++ kernel/trace/Makefile | 5 +- kernel/trace/blktrace.c | 78 ++++++- mm/bounce.c | 5 +- 10 files changed, 588 insertions(+), 104 deletions(-) delete mode 100644 include/trace/block.h create mode 100644 include/trace/events/block.h (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index 1306de9cce0..9475bf99b89 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -28,22 +28,14 @@ #include #include #include -#include + +#define CREATE_TRACE_POINTS +#include #include "blk.h" -DEFINE_TRACE(block_plug); -DEFINE_TRACE(block_unplug_io); -DEFINE_TRACE(block_unplug_timer); -DEFINE_TRACE(block_getrq); -DEFINE_TRACE(block_sleeprq); -DEFINE_TRACE(block_rq_requeue); -DEFINE_TRACE(block_bio_backmerge); -DEFINE_TRACE(block_bio_frontmerge); -DEFINE_TRACE(block_bio_queue); -DEFINE_TRACE(block_rq_complete); -DEFINE_TRACE(block_remap); /* Also used in drivers/md/dm.c */ EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap); +EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); static int __make_request(struct request_queue *q, struct bio *bio); diff --git a/block/elevator.c b/block/elevator.c index 7073a907257..e220f0c543e 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -33,17 +33,16 @@ #include #include #include -#include #include #include +#include + #include "blk.h" static DEFINE_SPINLOCK(elv_list_lock); static LIST_HEAD(elv_list); -DEFINE_TRACE(block_rq_abort); - /* * Merge hash stuff. */ @@ -55,9 +54,6 @@ static const int elv_hash_shift = 6; #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) -DEFINE_TRACE(block_rq_insert); -DEFINE_TRACE(block_rq_issue); - /* * Query io scheduler to see if the current process issuing bio may be * merged with rq. diff --git a/drivers/md/dm.c b/drivers/md/dm.c index e2ee4a79ea2..3fd8b1e6548 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -20,7 +20,8 @@ #include #include #include -#include + +#include #define DM_MSG_PREFIX "core" @@ -53,8 +54,6 @@ struct dm_target_io { union map_info info; }; -DEFINE_TRACE(block_bio_complete); - /* * For request-based dm. * One of these is allocated per request. diff --git a/fs/bio.c b/fs/bio.c index 98711647ece..740699c4f90 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -26,10 +26,9 @@ #include #include #include -#include #include /* for struct sg_iovec */ -DEFINE_TRACE(block_split); +#include /* * Test patch to inline a certain number of bi_io_vec's inside the bio diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 82b4636030e..c7ec31dd04c 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -218,5 +218,18 @@ static inline int blk_trace_init_sysfs(struct device *dev) #endif /* CONFIG_BLK_DEV_IO_TRACE */ +#ifdef CONFIG_EVENT_TRACING + +static inline int blk_cmd_buf_len(struct request *rq) +{ + return blk_pc_request(rq) ? rq->cmd_len * 3 : 1; +} + +extern void blk_dump_cmd(char *buf, struct request *rq); +extern void blk_fill_rwbs(char *rwbs, u32 rw, int bytes); +extern void blk_fill_rwbs_rq(char *rwbs, struct request *rq); + +#endif /* CONFIG_EVENT_TRACING */ + #endif /* __KERNEL__ */ #endif diff --git a/include/trace/block.h b/include/trace/block.h deleted file mode 100644 index 5b12efa096b..00000000000 --- a/include/trace/block.h +++ /dev/null @@ -1,76 +0,0 @@ -#ifndef _TRACE_BLOCK_H -#define _TRACE_BLOCK_H - -#include -#include - -DECLARE_TRACE(block_rq_abort, - TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq)); - -DECLARE_TRACE(block_rq_insert, - TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq)); - -DECLARE_TRACE(block_rq_issue, - TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq)); - -DECLARE_TRACE(block_rq_requeue, - TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq)); - -DECLARE_TRACE(block_rq_complete, - TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq)); - -DECLARE_TRACE(block_bio_bounce, - TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio)); - -DECLARE_TRACE(block_bio_complete, - TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio)); - -DECLARE_TRACE(block_bio_backmerge, - TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio)); - -DECLARE_TRACE(block_bio_frontmerge, - TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio)); - -DECLARE_TRACE(block_bio_queue, - TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio)); - -DECLARE_TRACE(block_getrq, - TP_PROTO(struct request_queue *q, struct bio *bio, int rw), - TP_ARGS(q, bio, rw)); - -DECLARE_TRACE(block_sleeprq, - TP_PROTO(struct request_queue *q, struct bio *bio, int rw), - TP_ARGS(q, bio, rw)); - -DECLARE_TRACE(block_plug, - TP_PROTO(struct request_queue *q), - TP_ARGS(q)); - -DECLARE_TRACE(block_unplug_timer, - TP_PROTO(struct request_queue *q), - TP_ARGS(q)); - -DECLARE_TRACE(block_unplug_io, - TP_PROTO(struct request_queue *q), - TP_ARGS(q)); - -DECLARE_TRACE(block_split, - TP_PROTO(struct request_queue *q, struct bio *bio, unsigned int pdu), - TP_ARGS(q, bio, pdu)); - -DECLARE_TRACE(block_remap, - TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, - sector_t from), - TP_ARGS(q, bio, dev, from)); - -#endif diff --git a/include/trace/events/block.h b/include/trace/events/block.h new file mode 100644 index 00000000000..a99d1e565bb --- /dev/null +++ b/include/trace/events/block.h @@ -0,0 +1,483 @@ +#if !defined(_TRACE_BLOCK_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_BLOCK_H + +#include +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM block + +TRACE_EVENT(block_rq_abort, + + TP_PROTO(struct request_queue *q, struct request *rq), + + TP_ARGS(q, rq), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( int, errors ) + __array( char, rwbs, 6 ) + __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) + ), + + TP_fast_assign( + __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; + __entry->sector = blk_pc_request(rq) ? 0 : rq->hard_sector; + __entry->nr_sector = blk_pc_request(rq) ? + 0 : rq->hard_nr_sectors; + __entry->errors = rq->errors; + + blk_fill_rwbs_rq(__entry->rwbs, rq); + blk_dump_cmd(__get_str(cmd), rq); + ), + + TP_printk("%d,%d %s (%s) %llu + %u [%d]", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, __get_str(cmd), + __entry->sector, __entry->nr_sector, __entry->errors) +); + +TRACE_EVENT(block_rq_insert, + + TP_PROTO(struct request_queue *q, struct request *rq), + + TP_ARGS(q, rq), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( unsigned int, bytes ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) + ), + + TP_fast_assign( + __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; + __entry->sector = blk_pc_request(rq) ? 0 : rq->hard_sector; + __entry->nr_sector = blk_pc_request(rq) ? + 0 : rq->hard_nr_sectors; + __entry->bytes = blk_pc_request(rq) ? rq->data_len : 0; + + blk_fill_rwbs_rq(__entry->rwbs, rq); + blk_dump_cmd(__get_str(cmd), rq); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %u (%s) %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, __entry->bytes, __get_str(cmd), + __entry->sector, __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_rq_issue, + + TP_PROTO(struct request_queue *q, struct request *rq), + + TP_ARGS(q, rq), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( unsigned int, bytes ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) + ), + + TP_fast_assign( + __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; + __entry->sector = blk_pc_request(rq) ? 0 : rq->hard_sector; + __entry->nr_sector = blk_pc_request(rq) ? + 0 : rq->hard_nr_sectors; + __entry->bytes = blk_pc_request(rq) ? rq->data_len : 0; + + blk_fill_rwbs_rq(__entry->rwbs, rq); + blk_dump_cmd(__get_str(cmd), rq); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %u (%s) %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, __entry->bytes, __get_str(cmd), + __entry->sector, __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_rq_requeue, + + TP_PROTO(struct request_queue *q, struct request *rq), + + TP_ARGS(q, rq), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( int, errors ) + __array( char, rwbs, 6 ) + __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) + ), + + TP_fast_assign( + __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; + __entry->sector = blk_pc_request(rq) ? 0 : rq->hard_sector; + __entry->nr_sector = blk_pc_request(rq) ? + 0 : rq->hard_nr_sectors; + __entry->errors = rq->errors; + + blk_fill_rwbs_rq(__entry->rwbs, rq); + blk_dump_cmd(__get_str(cmd), rq); + ), + + TP_printk("%d,%d %s (%s) %llu + %u [%d]", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, __get_str(cmd), + __entry->sector, __entry->nr_sector, __entry->errors) +); + +TRACE_EVENT(block_rq_complete, + + TP_PROTO(struct request_queue *q, struct request *rq), + + TP_ARGS(q, rq), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( int, errors ) + __array( char, rwbs, 6 ) + __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) + ), + + TP_fast_assign( + __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; + __entry->sector = blk_pc_request(rq) ? 0 : rq->hard_sector; + __entry->nr_sector = blk_pc_request(rq) ? + 0 : rq->hard_nr_sectors; + __entry->errors = rq->errors; + + blk_fill_rwbs_rq(__entry->rwbs, rq); + blk_dump_cmd(__get_str(cmd), rq); + ), + + TP_printk("%d,%d %s (%s) %llu + %u [%d]", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, __get_str(cmd), + __entry->sector, __entry->nr_sector, __entry->errors) +); +TRACE_EVENT(block_bio_bounce, + + TP_PROTO(struct request_queue *q, struct bio *bio), + + TP_ARGS(q, bio), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + __entry->sector, __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_bio_complete, + + TP_PROTO(struct request_queue *q, struct bio *bio), + + TP_ARGS(q, bio), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned, nr_sector ) + __field( int, error ) + __array( char, rwbs, 6 ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + ), + + TP_printk("%d,%d %s %llu + %u [%d]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + __entry->sector, __entry->nr_sector, __entry->error) +); + +TRACE_EVENT(block_bio_backmerge, + + TP_PROTO(struct request_queue *q, struct bio *bio), + + TP_ARGS(q, bio), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + __entry->sector, __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_bio_frontmerge, + + TP_PROTO(struct request_queue *q, struct bio *bio), + + TP_ARGS(q, bio), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned, nr_sector ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + __entry->sector, __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_bio_queue, + + TP_PROTO(struct request_queue *q, struct bio *bio), + + TP_ARGS(q, bio), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + __entry->sector, __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_getrq, + + TP_PROTO(struct request_queue *q, struct bio *bio, int rw), + + TP_ARGS(q, bio, rw), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio ? bio->bi_bdev->bd_dev : 0; + __entry->sector = bio ? bio->bi_sector : 0; + __entry->nr_sector = bio ? bio->bi_size >> 9 : 0; + blk_fill_rwbs(__entry->rwbs, + bio ? bio->bi_rw : 0, __entry->nr_sector); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + __entry->sector, __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_sleeprq, + + TP_PROTO(struct request_queue *q, struct bio *bio, int rw), + + TP_ARGS(q, bio, rw), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio ? bio->bi_bdev->bd_dev : 0; + __entry->sector = bio ? bio->bi_sector : 0; + __entry->nr_sector = bio ? bio->bi_size >> 9 : 0; + blk_fill_rwbs(__entry->rwbs, + bio ? bio->bi_rw : 0, __entry->nr_sector); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + __entry->sector, __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_plug, + + TP_PROTO(struct request_queue *q), + + TP_ARGS(q), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("[%s]", __entry->comm) +); + +TRACE_EVENT(block_unplug_timer, + + TP_PROTO(struct request_queue *q), + + TP_ARGS(q), + + TP_STRUCT__entry( + __field( int, nr_rq ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->nr_rq = q->rq.count[READ] + q->rq.count[WRITE]; + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) +); + +TRACE_EVENT(block_unplug_io, + + TP_PROTO(struct request_queue *q), + + TP_ARGS(q), + + TP_STRUCT__entry( + __field( int, nr_rq ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->nr_rq = q->rq.count[READ] + q->rq.count[WRITE]; + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) +); + +TRACE_EVENT(block_split, + + TP_PROTO(struct request_queue *q, struct bio *bio, + unsigned int new_sector), + + TP_ARGS(q, bio, new_sector), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( sector_t, new_sector ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->new_sector = new_sector; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu / %llu [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + __entry->sector, __entry->new_sector, __entry->comm) +); + +TRACE_EVENT(block_remap, + + TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, + sector_t from), + + TP_ARGS(q, bio, dev, from), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( dev_t, old_dev ) + __field( sector_t, old_sector ) + __array( char, rwbs, 6 ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + __entry->old_dev = dev; + __entry->old_sector = from; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + ), + + TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + __entry->sector, __entry->nr_sector, + MAJOR(__entry->old_dev), MINOR(__entry->old_dev), + __entry->old_sector) +); + +#endif /* _TRACE_BLOCK_H */ + +/* This part must be outside protection */ +#include + diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 06b85850fab..844164dca90 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -45,7 +45,10 @@ obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o obj-$(CONFIG_POWER_TRACER) += trace_power.o obj-$(CONFIG_KMEMTRACE) += kmemtrace.o obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o -obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o +obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o +ifeq ($(CONFIG_BLOCK),y) +obj-$(CONFIG_EVENT_TRACING) += blktrace.o +endif obj-$(CONFIG_EVENT_TRACING) += trace_events.o obj-$(CONFIG_EVENT_TRACING) += trace_export.o obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index e3abf55bc8e..7bd6a9893c2 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -23,10 +23,14 @@ #include #include #include -#include #include + +#include + #include "trace_output.h" +#ifdef CONFIG_BLK_DEV_IO_TRACE + static unsigned int blktrace_seq __read_mostly = 1; static struct trace_array *blk_tr; @@ -1658,3 +1662,75 @@ int blk_trace_init_sysfs(struct device *dev) return sysfs_create_group(&dev->kobj, &blk_trace_attr_group); } +#endif /* CONFIG_BLK_DEV_IO_TRACE */ + +#ifdef CONFIG_EVENT_TRACING + +void blk_dump_cmd(char *buf, struct request *rq) +{ + int i, end; + int len = rq->cmd_len; + unsigned char *cmd = rq->cmd; + + if (!blk_pc_request(rq)) { + buf[0] = '\0'; + return; + } + + for (end = len - 1; end >= 0; end--) + if (cmd[end]) + break; + end++; + + for (i = 0; i < len; i++) { + buf += sprintf(buf, "%s%02x", i == 0 ? "" : " ", cmd[i]); + if (i == end && end != len - 1) { + sprintf(buf, " .."); + break; + } + } +} + +void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) +{ + int i = 0; + + if (rw & WRITE) + rwbs[i++] = 'W'; + else if (rw & 1 << BIO_RW_DISCARD) + rwbs[i++] = 'D'; + else if (bytes) + rwbs[i++] = 'R'; + else + rwbs[i++] = 'N'; + + if (rw & 1 << BIO_RW_AHEAD) + rwbs[i++] = 'A'; + if (rw & 1 << BIO_RW_BARRIER) + rwbs[i++] = 'B'; + if (rw & 1 << BIO_RW_SYNCIO) + rwbs[i++] = 'S'; + if (rw & 1 << BIO_RW_META) + rwbs[i++] = 'M'; + + rwbs[i] = '\0'; +} + +void blk_fill_rwbs_rq(char *rwbs, struct request *rq) +{ + int rw = rq->cmd_flags & 0x03; + int bytes; + + if (blk_discard_rq(rq)) + rw |= (1 << BIO_RW_DISCARD); + + if (blk_pc_request(rq)) + bytes = rq->data_len; + else + bytes = rq->hard_nr_sectors << 9; + + blk_fill_rwbs(rwbs, rw, bytes); +} + +#endif /* CONFIG_EVENT_TRACING */ + diff --git a/mm/bounce.c b/mm/bounce.c index e590272fe7a..65f5e17e411 100644 --- a/mm/bounce.c +++ b/mm/bounce.c @@ -14,16 +14,15 @@ #include #include #include -#include #include +#include + #define POOL_SIZE 64 #define ISA_POOL_SIZE 16 static mempool_t *page_pool, *isa_page_pool; -DEFINE_TRACE(block_bio_bounce); - #ifdef CONFIG_HIGHMEM static __init int init_emergency_pool(void) { -- cgit v1.2.3-70-g09d2 From 80a538e49daddbf3bf783f3464e91bd3181957b2 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Mon, 8 Jun 2009 14:40:20 +0800 Subject: drm/i915: Enable probe on new chipset Signed-off-by: Zhenyu Wang Signed-off-by: Eric Anholt --- include/drm/drm_pciids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h index fc55db78019..8b4c80cdd27 100644 --- a/include/drm/drm_pciids.h +++ b/include/drm/drm_pciids.h @@ -536,4 +536,6 @@ {0x8086, 0xa001, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ {0x8086, 0xa011, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ {0x8086, 0x35e8, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ + {0x8086, 0x0042, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ + {0x8086, 0x0046, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ {0, 0, 0} -- cgit v1.2.3-70-g09d2 From 6556d1df88fe68f9836beeb43342a336691cb67c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 9 Jun 2009 14:04:26 -0400 Subject: tracing: fix the block trace points print size The sector field is either u64 or unsigned long depending on the arch. This patch casts the sector to unsigned long long to prevent the printf warnings. [ Impact: remove compile warnings ] Signed-off-by: Steven Rostedt --- include/trace/events/block.h | 45 +++++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/trace/events/block.h b/include/trace/events/block.h index a99d1e565bb..53effd496a5 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -37,7 +37,8 @@ TRACE_EVENT(block_rq_abort, TP_printk("%d,%d %s (%s) %llu + %u [%d]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, __get_str(cmd), - __entry->sector, __entry->nr_sector, __entry->errors) + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->errors) ); TRACE_EVENT(block_rq_insert, @@ -71,7 +72,8 @@ TRACE_EVENT(block_rq_insert, TP_printk("%d,%d %s %u (%s) %llu + %u [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, __entry->bytes, __get_str(cmd), - __entry->sector, __entry->nr_sector, __entry->comm) + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) ); TRACE_EVENT(block_rq_issue, @@ -105,7 +107,8 @@ TRACE_EVENT(block_rq_issue, TP_printk("%d,%d %s %u (%s) %llu + %u [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, __entry->bytes, __get_str(cmd), - __entry->sector, __entry->nr_sector, __entry->comm) + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) ); TRACE_EVENT(block_rq_requeue, @@ -137,7 +140,8 @@ TRACE_EVENT(block_rq_requeue, TP_printk("%d,%d %s (%s) %llu + %u [%d]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, __get_str(cmd), - __entry->sector, __entry->nr_sector, __entry->errors) + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->errors) ); TRACE_EVENT(block_rq_complete, @@ -169,7 +173,8 @@ TRACE_EVENT(block_rq_complete, TP_printk("%d,%d %s (%s) %llu + %u [%d]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, __get_str(cmd), - __entry->sector, __entry->nr_sector, __entry->errors) + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->errors) ); TRACE_EVENT(block_bio_bounce, @@ -195,7 +200,8 @@ TRACE_EVENT(block_bio_bounce, TP_printk("%d,%d %s %llu + %u [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - __entry->sector, __entry->nr_sector, __entry->comm) + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) ); TRACE_EVENT(block_bio_complete, @@ -221,7 +227,8 @@ TRACE_EVENT(block_bio_complete, TP_printk("%d,%d %s %llu + %u [%d]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - __entry->sector, __entry->nr_sector, __entry->error) + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->error) ); TRACE_EVENT(block_bio_backmerge, @@ -248,7 +255,8 @@ TRACE_EVENT(block_bio_backmerge, TP_printk("%d,%d %s %llu + %u [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - __entry->sector, __entry->nr_sector, __entry->comm) + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) ); TRACE_EVENT(block_bio_frontmerge, @@ -275,7 +283,8 @@ TRACE_EVENT(block_bio_frontmerge, TP_printk("%d,%d %s %llu + %u [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - __entry->sector, __entry->nr_sector, __entry->comm) + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) ); TRACE_EVENT(block_bio_queue, @@ -302,7 +311,8 @@ TRACE_EVENT(block_bio_queue, TP_printk("%d,%d %s %llu + %u [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - __entry->sector, __entry->nr_sector, __entry->comm) + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) ); TRACE_EVENT(block_getrq, @@ -330,7 +340,8 @@ TRACE_EVENT(block_getrq, TP_printk("%d,%d %s %llu + %u [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - __entry->sector, __entry->nr_sector, __entry->comm) + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) ); TRACE_EVENT(block_sleeprq, @@ -358,7 +369,8 @@ TRACE_EVENT(block_sleeprq, TP_printk("%d,%d %s %llu + %u [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - __entry->sector, __entry->nr_sector, __entry->comm) + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) ); TRACE_EVENT(block_plug, @@ -441,7 +453,9 @@ TRACE_EVENT(block_split, TP_printk("%d,%d %s %llu / %llu [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - __entry->sector, __entry->new_sector, __entry->comm) + (unsigned long long)__entry->sector, + (unsigned long long)__entry->new_sector, + __entry->comm) ); TRACE_EVENT(block_remap, @@ -471,9 +485,10 @@ TRACE_EVENT(block_remap, TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - __entry->sector, __entry->nr_sector, + (unsigned long long)__entry->sector, + __entry->nr_sector, MAJOR(__entry->old_dev), MINOR(__entry->old_dev), - __entry->old_sector) + (unsigned long long)__entry->old_sector) ); #endif /* _TRACE_BLOCK_H */ -- cgit v1.2.3-70-g09d2 From 725c624a58a10ef90a2ff889e122158fabf36147 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 8 Jun 2009 19:09:45 -0400 Subject: tracing: add trace_seq_vprint interface The code to update the print formats for events requires a vprintf format in the trace_seq. This patch adds that interface. Signed-off-by: Steven Rostedt --- include/linux/trace_seq.h | 2 ++ kernel/trace/trace_output.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) (limited to 'include') diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index ba9627f00d3..c68bccba207 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -27,6 +27,8 @@ trace_seq_init(struct trace_seq *s) #ifdef CONFIG_TRACING extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); +extern int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args) + __attribute__ ((format (printf, 2, 0))); extern int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary); extern void trace_print_seq(struct seq_file *m, struct trace_seq *s); diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 425725c1622..c05aff465dc 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -100,6 +100,38 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...) } EXPORT_SYMBOL_GPL(trace_seq_printf); +/** + * trace_seq_vprintf - sequence printing of trace information + * @s: trace sequence descriptor + * @fmt: printf format string + * + * The tracer may use either sequence operations or its own + * copy to user routines. To simplify formating of a trace + * trace_seq_printf is used to store strings into a special + * buffer (@s). Then the output may be either used by + * the sequencer or pulled into another buffer. + */ +int +trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args) +{ + int len = (PAGE_SIZE - 1) - s->len; + int ret; + + if (!len) + return 0; + + ret = vsnprintf(s->buffer + s->len, len, fmt, args); + + /* If we can't write it all, don't bother writing anything */ + if (ret >= len) + return 0; + + s->len += ret; + + return len; +} +EXPORT_SYMBOL_GPL(trace_seq_vprintf); + int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) { int len = (PAGE_SIZE - 1) - s->len; -- cgit v1.2.3-70-g09d2 From fd6c3a8dc44329d3aff9a578b5120982f63711ee Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 12 Mar 2009 10:58:33 +0000 Subject: initconst adjustments - add .init.rodata to INIT_DATA, and group all initconst flavors together - move strings generated from __setup_param() into .init.rodata - add .*init.rodata to modpost's sets of init sections - make modpost warn about references between meminit and cpuinit as well as memexit and cpuexit sections (as CPU and memory hotplug are independently selectable features) Signed-off-by: Jan Beulich Signed-off-by: Sam Ravnborg --- include/asm-generic/vmlinux.lds.h | 5 ++-- include/linux/init.h | 3 ++- scripts/mod/modpost.c | 48 +++++++++++++++++++++++++++++++-------- 3 files changed, 43 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 89853bcd27a..3edb1149974 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -336,10 +336,11 @@ #define INIT_DATA \ *(.init.data) \ DEV_DISCARD(init.data) \ - DEV_DISCARD(init.rodata) \ CPU_DISCARD(init.data) \ - CPU_DISCARD(init.rodata) \ MEM_DISCARD(init.data) \ + *(.init.rodata) \ + DEV_DISCARD(init.rodata) \ + CPU_DISCARD(init.rodata) \ MEM_DISCARD(init.rodata) #define INIT_TEXT \ diff --git a/include/linux/init.h b/include/linux/init.h index 0e06c176f18..9f70c9f25d4 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -225,7 +225,8 @@ struct obs_kernel_param { * obs_kernel_param "array" too far apart in .init.setup. */ #define __setup_param(str, unique_id, fn, early) \ - static char __setup_str_##unique_id[] __initdata __aligned(1) = str; \ + static const char __setup_str_##unique_id[] __initconst \ + __aligned(1) = str; \ static struct obs_kernel_param __setup_##unique_id \ __used __section(.init.setup) \ __attribute__((aligned((sizeof(long))))) \ diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 161b7846733..94e71efb2c1 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -763,6 +763,8 @@ static void check_section(const char *modname, struct elf_info *elf, #define ALL_INIT_DATA_SECTIONS \ + ".init.setup$", ".init.rodata$", \ + ".devinit.rodata$", ".cpuinit.rodata$", ".meminit.rodata$" \ ".init.data$", ".devinit.data$", ".cpuinit.data$", ".meminit.data$" #define ALL_EXIT_DATA_SECTIONS \ ".exit.data$", ".devexit.data$", ".cpuexit.data$", ".memexit.data$" @@ -772,21 +774,23 @@ static void check_section(const char *modname, struct elf_info *elf, #define ALL_EXIT_TEXT_SECTIONS \ ".exit.text$", ".devexit.text$", ".cpuexit.text$", ".memexit.text$" -#define ALL_INIT_SECTIONS ALL_INIT_DATA_SECTIONS, ALL_INIT_TEXT_SECTIONS -#define ALL_EXIT_SECTIONS ALL_EXIT_DATA_SECTIONS, ALL_EXIT_TEXT_SECTIONS +#define ALL_INIT_SECTIONS INIT_SECTIONS, DEV_INIT_SECTIONS, \ + CPU_INIT_SECTIONS, MEM_INIT_SECTIONS +#define ALL_EXIT_SECTIONS EXIT_SECTIONS, DEV_EXIT_SECTIONS, \ + CPU_EXIT_SECTIONS, MEM_EXIT_SECTIONS #define DATA_SECTIONS ".data$", ".data.rel$" #define TEXT_SECTIONS ".text$" -#define INIT_SECTIONS ".init.data$", ".init.text$" -#define DEV_INIT_SECTIONS ".devinit.data$", ".devinit.text$" -#define CPU_INIT_SECTIONS ".cpuinit.data$", ".cpuinit.text$" -#define MEM_INIT_SECTIONS ".meminit.data$", ".meminit.text$" +#define INIT_SECTIONS ".init.*" +#define DEV_INIT_SECTIONS ".devinit.*" +#define CPU_INIT_SECTIONS ".cpuinit.*" +#define MEM_INIT_SECTIONS ".meminit.*" -#define EXIT_SECTIONS ".exit.data$", ".exit.text$" -#define DEV_EXIT_SECTIONS ".devexit.data$", ".devexit.text$" -#define CPU_EXIT_SECTIONS ".cpuexit.data$", ".cpuexit.text$" -#define MEM_EXIT_SECTIONS ".memexit.data$", ".memexit.text$" +#define EXIT_SECTIONS ".exit.*" +#define DEV_EXIT_SECTIONS ".devexit.*" +#define CPU_EXIT_SECTIONS ".cpuexit.*" +#define MEM_EXIT_SECTIONS ".memexit.*" /* init data sections */ static const char *init_data_sections[] = { ALL_INIT_DATA_SECTIONS, NULL }; @@ -869,12 +873,36 @@ const struct sectioncheck sectioncheck[] = { .tosec = { INIT_SECTIONS, NULL }, .mismatch = XXXINIT_TO_INIT, }, +/* Do not reference cpuinit code/data from meminit code/data */ +{ + .fromsec = { MEM_INIT_SECTIONS, NULL }, + .tosec = { CPU_INIT_SECTIONS, NULL }, + .mismatch = XXXINIT_TO_INIT, +}, +/* Do not reference meminit code/data from cpuinit code/data */ +{ + .fromsec = { CPU_INIT_SECTIONS, NULL }, + .tosec = { MEM_INIT_SECTIONS, NULL }, + .mismatch = XXXINIT_TO_INIT, +}, /* Do not reference exit code/data from devexit/cpuexit/memexit code/data */ { .fromsec = { DEV_EXIT_SECTIONS, CPU_EXIT_SECTIONS, MEM_EXIT_SECTIONS, NULL }, .tosec = { EXIT_SECTIONS, NULL }, .mismatch = XXXEXIT_TO_EXIT, }, +/* Do not reference cpuexit code/data from memexit code/data */ +{ + .fromsec = { MEM_EXIT_SECTIONS, NULL }, + .tosec = { CPU_EXIT_SECTIONS, NULL }, + .mismatch = XXXEXIT_TO_EXIT, +}, +/* Do not reference memexit code/data from cpuexit code/data */ +{ + .fromsec = { CPU_EXIT_SECTIONS, NULL }, + .tosec = { MEM_EXIT_SECTIONS, NULL }, + .mismatch = XXXEXIT_TO_EXIT, +}, /* Do not use exit code/data from init code */ { .fromsec = { ALL_INIT_SECTIONS, NULL }, -- cgit v1.2.3-70-g09d2 From ef53dae8658cf0e93d380983824a661067948d87 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sun, 7 Jun 2009 20:46:37 +0200 Subject: Improve vmlinux.lds.h support for arch specific linker scripts To support alingment of the individual architecture specific linker scripts provide a set of general definitions in vmlinux.lds.h With these definitions applied the diverse linekr scripts can be reduced in line count and their readability are improved - IMO. A sample linker script is included to give the preferred order of the sections for the architectures that do not have any special requirments. These definitions are also a first step towards eventual support for -ffunction-sections. The definitions makes it much easier to do a global renaming of section names - but the main purpose is to clean up the linker scripts. Tim Aboot has provided a lot of inputs to improve the definitions - all faults are mine. Signed-off-by: Sam Ravnborg Cc: Tim Abbott --- include/asm-generic/vmlinux.lds.h | 231 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 224 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 3edb1149974..fba42236e94 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -1,4 +1,58 @@ -#include +/* + * Helper macros to support writing architecture specific + * linker scripts. + * + * A minimal linker scripts has following content: + * [This is a sample, architectures may have special requiriements] + * + * OUTPUT_FORMAT(...) + * OUTPUT_ARCH(...) + * ENTRY(...) + * SECTIONS + * { + * . = START; + * __init_begin = .; + * HEAD_SECTION + * INIT_TEXT_SECTION(PAGE_SIZE) + * INIT_DATA_SECTION(...) + * PERCPU(PAGE_SIZE) + * __init_end = .; + * + * _stext = .; + * TEXT_SECTION = 0 + * _etext = .; + * + * _sdata = .; + * RO_DATA_SECTION(PAGE_SIZE) + * RW_DATA_SECTION(...) + * _edata = .; + * + * EXCEPTION_TABLE(...) + * NOTES + * + * __bss_start = .; + * BSS_SECTION(0, 0) + * __bss_stop = .; + * _end = .; + * + * /DISCARD/ : { + * EXIT_TEXT + * EXIT_DATA + * *(.exitcall.exit) + * } + * STABS_DEBUG + * DWARF_DEBUG + * } + * + * [__init_begin, __init_end] is the init section that may be freed after init + * [_stext, _etext] is the text section + * [_sdata, _edata] is the data section + * + * Some of the included output section have their own set of constants. + * Examples are: [__initramfs_start, __initramfs_end] for initramfs and + * [__nosave_begin, __nosave_end] for the nosave data + */ + #include #ifndef LOAD_OFFSET #define LOAD_OFFSET 0 @@ -116,7 +170,36 @@ FTRACE_EVENTS() \ TRACE_SYSCALLS() -#define RO_DATA(align) \ +/* + * Data section helpers + */ +#define NOSAVE_DATA \ + . = ALIGN(PAGE_SIZE); \ + VMLINUX_SYMBOL(__nosave_begin) = .; \ + *(.data.nosave) \ + . = ALIGN(PAGE_SIZE); \ + VMLINUX_SYMBOL(__nosave_end) = .; + +#define PAGE_ALIGNED_DATA(page_align) \ + . = ALIGN(page_align); \ + *(.data.page_aligned) + +#define READ_MOSTLY_DATA(align) \ + . = ALIGN(align); \ + *(.data.read_mostly) + +#define CACHELINE_ALIGNED_DATA(align) \ + . = ALIGN(align); \ + *(.data.cacheline_aligned) + +#define INIT_TASK(align) \ + . = ALIGN(align); \ + *(.data.init_task) + +/* + * Read only Data + */ +#define RO_DATA_SECTION(align) \ . = ALIGN((align)); \ .rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start_rodata) = .; \ @@ -270,9 +353,10 @@ } \ . = ALIGN((align)); -/* RODATA provided for backward compatibility. +/* RODATA & RO_DATA provided for backward compatibility. * All archs are supposed to use RO_DATA() */ -#define RODATA RO_DATA(4096) +#define RODATA RO_DATA_SECTION(4096) +#define RO_DATA(align) RO_DATA_SECTION(align) #define SECURITY_INIT \ .security_initcall.init : AT(ADDR(.security_initcall.init) - LOAD_OFFSET) { \ @@ -332,6 +416,31 @@ /* Section used for early init (in .S files) */ #define HEAD_TEXT *(HEAD_TEXT_SECTION) +#define HEAD_SECTION \ + .head.text : AT(ADDR(.head.text) - LOAD_OFFSET) { \ + HEAD_TEXT \ + } + +/* + * Exception table + */ +#define EXCEPTION_TABLE(align) \ + . = ALIGN(align); \ + __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__start___ex_table) = .; \ + *(__ex_table) \ + VMLINUX_SYMBOL(__stop___ex_table) = .; \ + } + +/* + * Init task + */ +#define INIT_TASK_DATA(align) \ + . = ALIGN(align); \ + .data.init_task : { \ + INIT_TASK \ + } + /* init and exit section handling */ #define INIT_DATA \ *(.init.data) \ @@ -364,9 +473,32 @@ CPU_DISCARD(exit.text) \ MEM_DISCARD(exit.text) - /* DWARF debug sections. - Symbols in the DWARF debugging sections are relative to - the beginning of the section so we begin them at 0. */ +/* + * bss (Block Started by Symbol) - uninitialized data + * zeroed during startup + */ +#define SBSS \ + .sbss : AT(ADDR(.sbss) - LOAD_OFFSET) { \ + *(.sbss) \ + *(.scommon) \ + } + +#define BSS(bss_align) \ + . = ALIGN(bss_align); \ + .bss : AT(ADDR(.bss) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__bss_start) = .; \ + *(.bss.page_aligned) \ + *(.dynbss) \ + *(.bss) \ + *(COMMON) \ + VMLINUX_SYMBOL(__bss_stop) = .; \ + } + +/* + * DWARF debug sections. + * Symbols in the DWARF debugging sections are relative to + * the beginning of the section so we begin them at 0. + */ #define DWARF_DEBUG \ /* DWARF 1 */ \ .debug 0 : { *(.debug) } \ @@ -433,6 +565,12 @@ VMLINUX_SYMBOL(__stop_notes) = .; \ } +#define INIT_SETUP(initsetup_align) \ + . = ALIGN(initsetup_align); \ + VMLINUX_SYMBOL(__setup_start) = .; \ + *(.init.setup) \ + VMLINUX_SYMBOL(__setup_end) = .; + #define INITCALLS \ *(.initcallearly.init) \ VMLINUX_SYMBOL(__early_initcall_end) = .; \ @@ -454,6 +592,31 @@ *(.initcall7.init) \ *(.initcall7s.init) +#define INIT_CALLS \ + VMLINUX_SYMBOL(__initcall_start) = .; \ + INITCALLS \ + VMLINUX_SYMBOL(__initcall_end) = .; + +#define CON_INITCALL \ + VMLINUX_SYMBOL(__con_initcall_start) = .; \ + *(.con_initcall.init) \ + VMLINUX_SYMBOL(__con_initcall_end) = .; + +#define SECURITY_INITCALL \ + VMLINUX_SYMBOL(__security_initcall_start) = .; \ + *(.security_initcall.init) \ + VMLINUX_SYMBOL(__security_initcall_end) = .; + +#ifdef CONFIG_BLK_DEV_INITRD +#define INIT_RAM_FS \ + . = ALIGN(PAGE_SIZE); \ + VMLINUX_SYMBOL(__initramfs_start) = .; \ + *(.init.ramfs) \ + VMLINUX_SYMBOL(__initramfs_end) = .; +#else +#define INITRAMFS +#endif + /** * PERCPU_VADDR - define output section for percpu area * @vaddr: explicit base address (optional) @@ -510,3 +673,57 @@ *(.data.percpu.shared_aligned) \ VMLINUX_SYMBOL(__per_cpu_end) = .; \ } + + +/* + * Definition of the high level *_SECTION macros + * They will fit only a subset of the architectures + */ + + +/* + * Writeable data. + * All sections are combined in a single .data section. + * The sections following CONSTRUCTORS are arranged so their + * typical alignment matches. + * A cacheline is typical/always less than a PAGE_SIZE so + * the sections that has this restriction (or similar) + * is located before the ones requiring PAGE_SIZE alignment. + * NOSAVE_DATA starts and ends with a PAGE_SIZE alignment which + * matches the requirment of PAGE_ALIGNED_DATA. + * +/* use 0 as page_align if page_aligned data is not used */ +#define RW_DATA_SECTION(cacheline, nosave, pagealigned, inittask) \ + . = ALIGN(PAGE_SIZE); \ + .data : AT(ADDR(.data) - LOAD_OFFSET) { \ + INIT_TASK(inittask) \ + CACHELINE_ALIGNED_DATA(cacheline) \ + READ_MOSTLY_DATA(cacheline) \ + DATA_DATA \ + CONSTRUCTORS \ + NOSAVE_DATA(nosave) \ + PAGE_ALIGNED_DATA(pagealigned) \ + } + +#define INIT_TEXT_SECTION(inittext_align) \ + . = ALIGN(inittext_align); \ + .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(_sinittext) = .; \ + INIT_TEXT \ + VMLINUX_SYMBOL(_einittext) = .; \ + } + +#define INIT_DATA_SECTION(initsetup_align) \ + .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { \ + INIT_DATA \ + INIT_SETUP(initsetup_align) \ + INIT_CALLS \ + CON_INITCALL \ + SECURITY_INITCALL \ + INIT_RAM_FS \ + } + +#define BSS_SECTION(sbss_align, bss_align) \ + SBSS \ + BSS(bss_align) \ + . = ALIGN(4); \ -- cgit v1.2.3-70-g09d2 From 1506e30b5f25f6c3357167a18f0e4ae6f5662a28 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 9 Jun 2009 17:49:06 -0700 Subject: rfkill: include err.h Since we use ERR_PTR and similar macros, we need to include linux/err.h. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/linux/rfkill.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index ee3eddea856..d7e818ad0bc 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -105,6 +105,7 @@ enum rfkill_user_states { #include #include #include +#include /* this is opaque */ struct rfkill; -- cgit v1.2.3-70-g09d2 From cf9e4e15e8f6306b2559979269ead7c02e6b2b95 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 11 Feb 2009 16:03:36 +0800 Subject: KVM: Split IOAPIC structure Prepared for reuse ioapic_redir_entry for MSI. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- include/linux/kvm_types.h | 17 +++++++++++++++++ virt/kvm/ioapic.c | 6 +++--- virt/kvm/ioapic.h | 17 +---------------- 3 files changed, 21 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 2b8318c83e5..b84aca3c4ad 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -40,4 +40,21 @@ typedef unsigned long hfn_t; typedef hfn_t pfn_t; +union kvm_ioapic_redirect_entry { + u64 bits; + struct { + u8 vector; + u8 delivery_mode:3; + u8 dest_mode:1; + u8 delivery_status:1; + u8 polarity:1; + u8 remote_irr:1; + u8 trig_mode:1; + u8 mask:1; + u8 reserve:7; + u8 reserved[4]; + u8 dest_id; + } fields; +}; + #endif /* __KVM_TYPES_H__ */ diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index c3b99def9cb..812801317e3 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -85,7 +85,7 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx) { - union ioapic_redir_entry *pent; + union kvm_ioapic_redirect_entry *pent; int injected = -1; pent = &ioapic->redirtbl[idx]; @@ -284,7 +284,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) { u32 old_irr = ioapic->irr; u32 mask = 1 << irq; - union ioapic_redir_entry entry; + union kvm_ioapic_redirect_entry entry; int ret = 1; if (irq >= 0 && irq < IOAPIC_NUM_PINS) { @@ -305,7 +305,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin, int trigger_mode) { - union ioapic_redir_entry *ent; + union kvm_ioapic_redirect_entry *ent; ent = &ioapic->redirtbl[pin]; diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index a34bd5e6436..008ec873d01 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -40,22 +40,7 @@ struct kvm_ioapic { u32 id; u32 irr; u32 pad; - union ioapic_redir_entry { - u64 bits; - struct { - u8 vector; - u8 delivery_mode:3; - u8 dest_mode:1; - u8 delivery_status:1; - u8 polarity:1; - u8 remote_irr:1; - u8 trig_mode:1; - u8 mask:1; - u8 reserve:7; - u8 reserved[4]; - u8 dest_id; - } fields; - } redirtbl[IOAPIC_NUM_PINS]; + union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS]; struct kvm_io_device dev; struct kvm *kvm; void (*ack_notifier)(void *opaque, int irq); -- cgit v1.2.3-70-g09d2 From 116191b69b608d0f1513e3abe71d6a46800f2bd6 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 11 Feb 2009 16:03:37 +0800 Subject: KVM: Unify the delivery of IOAPIC and MSI interrupts Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 3 ++ virt/kvm/ioapic.c | 91 +++++++++++++++++----------------------------- virt/kvm/irq_comm.c | 95 +++++++++++++++++++++++++++++------------------- 3 files changed, 95 insertions(+), 94 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 894a56e365e..1a2f98fbece 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -352,6 +352,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, struct kvm_irq_mask_notifier *kimn); void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask); +void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic, + union kvm_ioapic_redirect_entry *entry, + unsigned long *deliver_bitmask); int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_register_irq_ack_notifier(struct kvm *kvm, diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 812801317e3..883fd0dc9b7 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -203,79 +203,56 @@ u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest, static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) { - u8 dest = ioapic->redirtbl[irq].fields.dest_id; - u8 dest_mode = ioapic->redirtbl[irq].fields.dest_mode; - u8 delivery_mode = ioapic->redirtbl[irq].fields.delivery_mode; - u8 vector = ioapic->redirtbl[irq].fields.vector; - u8 trig_mode = ioapic->redirtbl[irq].fields.trig_mode; - u32 deliver_bitmask; + union kvm_ioapic_redirect_entry entry = ioapic->redirtbl[irq]; + unsigned long deliver_bitmask; struct kvm_vcpu *vcpu; int vcpu_id, r = -1; ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " "vector=%x trig_mode=%x\n", - dest, dest_mode, delivery_mode, vector, trig_mode); + entry.fields.dest, entry.fields.dest_mode, + entry.fields.delivery_mode, entry.fields.vector, + entry.fields.trig_mode); - deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic, dest, - dest_mode); + kvm_get_intr_delivery_bitmask(ioapic, &entry, &deliver_bitmask); if (!deliver_bitmask) { ioapic_debug("no target on destination\n"); return 0; } - switch (delivery_mode) { - case IOAPIC_LOWEST_PRIORITY: - vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector, - deliver_bitmask); + /* Always delivery PIT interrupt to vcpu 0 */ #ifdef CONFIG_X86 - if (irq == 0) - vcpu = ioapic->kvm->vcpus[0]; + if (irq == 0) + deliver_bitmask = 1; #endif - if (vcpu != NULL) - r = ioapic_inj_irq(ioapic, vcpu, vector, - trig_mode, delivery_mode); - else - ioapic_debug("null lowest prio vcpu: " - "mask=%x vector=%x delivery_mode=%x\n", - deliver_bitmask, vector, IOAPIC_LOWEST_PRIORITY); - break; - case IOAPIC_FIXED: -#ifdef CONFIG_X86 - if (irq == 0) - deliver_bitmask = 1; -#endif - for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) { - if (!(deliver_bitmask & (1 << vcpu_id))) - continue; - deliver_bitmask &= ~(1 << vcpu_id); - vcpu = ioapic->kvm->vcpus[vcpu_id]; - if (vcpu) { + + for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) { + if (!(deliver_bitmask & (1 << vcpu_id))) + continue; + deliver_bitmask &= ~(1 << vcpu_id); + vcpu = ioapic->kvm->vcpus[vcpu_id]; + if (vcpu) { + if (entry.fields.delivery_mode == + IOAPIC_LOWEST_PRIORITY || + entry.fields.delivery_mode == IOAPIC_FIXED) { if (r < 0) r = 0; - r += ioapic_inj_irq(ioapic, vcpu, vector, - trig_mode, delivery_mode); - } - } - break; - case IOAPIC_NMI: - for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) { - if (!(deliver_bitmask & (1 << vcpu_id))) - continue; - deliver_bitmask &= ~(1 << vcpu_id); - vcpu = ioapic->kvm->vcpus[vcpu_id]; - if (vcpu) { - ioapic_inj_nmi(vcpu); + r += ioapic_inj_irq(ioapic, vcpu, + entry.fields.vector, + entry.fields.trig_mode, + entry.fields.delivery_mode); + } else if (entry.fields.delivery_mode == IOAPIC_NMI) { r = 1; - } - else - ioapic_debug("NMI to vcpu %d failed\n", - vcpu->vcpu_id); - } - break; - default: - printk(KERN_WARNING "Unsupported delivery mode %d\n", - delivery_mode); - break; + ioapic_inj_nmi(vcpu); + } else + ioapic_debug("unsupported delivery mode %x!\n", + entry.fields.delivery_mode); + } else + ioapic_debug("null destination vcpu: " + "mask=%x vector=%x delivery_mode=%x\n", + entry.fields.deliver_bitmask, + entry.fields.vector, + entry.fields.delivery_mode); } return r; } diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 864ac5483ba..aec7a0d93a3 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -43,53 +43,74 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level); } +void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic, + union kvm_ioapic_redirect_entry *entry, + unsigned long *deliver_bitmask) +{ + struct kvm_vcpu *vcpu; + + *deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic, + entry->fields.dest_id, entry->fields.dest_mode); + switch (entry->fields.delivery_mode) { + case IOAPIC_LOWEST_PRIORITY: + vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, + entry->fields.vector, *deliver_bitmask); + *deliver_bitmask = 1 << vcpu->vcpu_id; + break; + case IOAPIC_FIXED: + case IOAPIC_NMI: + break; + default: + if (printk_ratelimit()) + printk(KERN_INFO "kvm: unsupported delivery mode %d\n", + entry->fields.delivery_mode); + *deliver_bitmask = 0; + } +} + static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int level) { int vcpu_id, r = -1; struct kvm_vcpu *vcpu; struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); - int dest_id = (e->msi.address_lo & MSI_ADDR_DEST_ID_MASK) - >> MSI_ADDR_DEST_ID_SHIFT; - int vector = (e->msi.data & MSI_DATA_VECTOR_MASK) - >> MSI_DATA_VECTOR_SHIFT; - int dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT, - (unsigned long *)&e->msi.address_lo); - int trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT, - (unsigned long *)&e->msi.data); - int delivery_mode = test_bit(MSI_DATA_DELIVERY_MODE_SHIFT, - (unsigned long *)&e->msi.data); - u32 deliver_bitmask; + union kvm_ioapic_redirect_entry entry; + unsigned long deliver_bitmask; BUG_ON(!ioapic); - deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic, - dest_id, dest_mode); - /* IOAPIC delivery mode value is the same as MSI here */ - switch (delivery_mode) { - case IOAPIC_LOWEST_PRIORITY: - vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector, - deliver_bitmask); - if (vcpu != NULL) - r = kvm_apic_set_irq(vcpu, vector, trig_mode); - else - printk(KERN_INFO "kvm: null lowest priority vcpu!\n"); - break; - case IOAPIC_FIXED: - for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) { - if (!(deliver_bitmask & (1 << vcpu_id))) - continue; - deliver_bitmask &= ~(1 << vcpu_id); - vcpu = ioapic->kvm->vcpus[vcpu_id]; - if (vcpu) { - if (r < 0) - r = 0; - r += kvm_apic_set_irq(vcpu, vector, trig_mode); - } + entry.bits = 0; + entry.fields.dest_id = (e->msi.address_lo & + MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT; + entry.fields.vector = (e->msi.data & + MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT; + entry.fields.dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT, + (unsigned long *)&e->msi.address_lo); + entry.fields.trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT, + (unsigned long *)&e->msi.data); + entry.fields.delivery_mode = test_bit( + MSI_DATA_DELIVERY_MODE_SHIFT, + (unsigned long *)&e->msi.data); + + /* TODO Deal with RH bit of MSI message address */ + + kvm_get_intr_delivery_bitmask(ioapic, &entry, &deliver_bitmask); + + if (!deliver_bitmask) { + printk(KERN_WARNING "kvm: no destination for MSI delivery!"); + return -1; + } + for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) { + if (!(deliver_bitmask & (1 << vcpu_id))) + continue; + deliver_bitmask &= ~(1 << vcpu_id); + vcpu = ioapic->kvm->vcpus[vcpu_id]; + if (vcpu) { + if (r < 0) + r = 0; + r += kvm_apic_set_irq(vcpu, entry.fields.vector, + entry.fields.trig_mode); } - break; - default: - break; } return r; } -- cgit v1.2.3-70-g09d2 From c1e01514296e8a4a43ff0c88dcff635cb90feb5f Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 25 Feb 2009 17:22:26 +0800 Subject: KVM: Ioctls for init MSI-X entry Introduce KVM_SET_MSIX_NR and KVM_SET_MSIX_ENTRY two ioctls. This two ioctls are used by userspace to specific guest device MSI-X entry number and correlate MSI-X entry with GSI during the initialization stage. MSI-X should be well initialzed before enabling. Don't support change MSI-X entry number for now. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- include/linux/kvm.h | 18 ++++++++ include/linux/kvm_host.h | 10 +++++ virt/kvm/kvm_main.c | 104 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 132 insertions(+) (limited to 'include') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 8cc137911b3..78cdee8c635 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -487,6 +487,10 @@ struct kvm_irq_routing { #define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71) #define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \ struct kvm_assigned_pci_dev) +#define KVM_ASSIGN_SET_MSIX_NR \ + _IOW(KVMIO, 0x73, struct kvm_assigned_msix_nr) +#define KVM_ASSIGN_SET_MSIX_ENTRY \ + _IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry) /* * ioctls for vcpu fds @@ -607,4 +611,18 @@ struct kvm_assigned_irq { #define KVM_DEV_IRQ_ASSIGN_MSI_ACTION KVM_DEV_IRQ_ASSIGN_ENABLE_MSI #define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI (1 << 0) +struct kvm_assigned_msix_nr { + __u32 assigned_dev_id; + __u16 entry_nr; + __u16 padding; +}; + +#define KVM_MAX_MSIX_PER_DEV 512 +struct kvm_assigned_msix_entry { + __u32 assigned_dev_id; + __u32 gsi; + __u16 entry; /* The index of entry in the MSI-X table */ + __u16 padding[3]; +}; + #endif diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1a2f98fbece..432edc27e82 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -319,6 +319,12 @@ struct kvm_irq_ack_notifier { void (*irq_acked)(struct kvm_irq_ack_notifier *kian); }; +struct kvm_guest_msix_entry { + u32 vector; + u16 entry; + u16 flags; +}; + struct kvm_assigned_dev_kernel { struct kvm_irq_ack_notifier ack_notifier; struct work_struct interrupt_work; @@ -326,13 +332,17 @@ struct kvm_assigned_dev_kernel { int assigned_dev_id; int host_busnr; int host_devfn; + unsigned int entries_nr; int host_irq; bool host_irq_disabled; + struct msix_entry *host_msix_entries; int guest_irq; + struct kvm_guest_msix_entry *guest_msix_entries; #define KVM_ASSIGNED_DEV_GUEST_INTX (1 << 0) #define KVM_ASSIGNED_DEV_GUEST_MSI (1 << 1) #define KVM_ASSIGNED_DEV_HOST_INTX (1 << 8) #define KVM_ASSIGNED_DEV_HOST_MSI (1 << 9) +#define KVM_ASSIGNED_DEV_MSIX ((1 << 2) | (1 << 10)) unsigned long irq_requested_type; int irq_source_id; int flags; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4d0dd390aa5..1ceb96901f3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1593,6 +1593,88 @@ static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset) return 0; } +#ifdef __KVM_HAVE_MSIX +static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm, + struct kvm_assigned_msix_nr *entry_nr) +{ + int r = 0; + struct kvm_assigned_dev_kernel *adev; + + mutex_lock(&kvm->lock); + + adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + entry_nr->assigned_dev_id); + if (!adev) { + r = -EINVAL; + goto msix_nr_out; + } + + if (adev->entries_nr == 0) { + adev->entries_nr = entry_nr->entry_nr; + if (adev->entries_nr == 0 || + adev->entries_nr >= KVM_MAX_MSIX_PER_DEV) { + r = -EINVAL; + goto msix_nr_out; + } + + adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) * + entry_nr->entry_nr, + GFP_KERNEL); + if (!adev->host_msix_entries) { + r = -ENOMEM; + goto msix_nr_out; + } + adev->guest_msix_entries = kzalloc( + sizeof(struct kvm_guest_msix_entry) * + entry_nr->entry_nr, GFP_KERNEL); + if (!adev->guest_msix_entries) { + kfree(adev->host_msix_entries); + r = -ENOMEM; + goto msix_nr_out; + } + } else /* Not allowed set MSI-X number twice */ + r = -EINVAL; +msix_nr_out: + mutex_unlock(&kvm->lock); + return r; +} + +static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm, + struct kvm_assigned_msix_entry *entry) +{ + int r = 0, i; + struct kvm_assigned_dev_kernel *adev; + + mutex_lock(&kvm->lock); + + adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + entry->assigned_dev_id); + + if (!adev) { + r = -EINVAL; + goto msix_entry_out; + } + + for (i = 0; i < adev->entries_nr; i++) + if (adev->guest_msix_entries[i].vector == 0 || + adev->guest_msix_entries[i].entry == entry->entry) { + adev->guest_msix_entries[i].entry = entry->entry; + adev->guest_msix_entries[i].vector = entry->gsi; + adev->host_msix_entries[i].entry = entry->entry; + break; + } + if (i == adev->entries_nr) { + r = -ENOSPC; + goto msix_entry_out; + } + +msix_entry_out: + mutex_unlock(&kvm->lock); + + return r; +} +#endif + static long kvm_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -1917,7 +1999,29 @@ static long kvm_vm_ioctl(struct file *filp, vfree(entries); break; } +#ifdef __KVM_HAVE_MSIX + case KVM_ASSIGN_SET_MSIX_NR: { + struct kvm_assigned_msix_nr entry_nr; + r = -EFAULT; + if (copy_from_user(&entry_nr, argp, sizeof entry_nr)) + goto out; + r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr); + if (r) + goto out; + break; + } + case KVM_ASSIGN_SET_MSIX_ENTRY: { + struct kvm_assigned_msix_entry entry; + r = -EFAULT; + if (copy_from_user(&entry, argp, sizeof entry)) + goto out; + r = kvm_vm_ioctl_set_msix_entry(kvm, &entry); + if (r) + goto out; + break; + } #endif +#endif /* KVM_CAP_IRQ_ROUTING */ default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); } -- cgit v1.2.3-70-g09d2 From 2350bd1f62c8706c22b8e58c3bfff10806c0a31b Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 25 Feb 2009 17:22:27 +0800 Subject: KVM: Add MSI-X interrupt injection logic We have to handle more than one interrupt with one handler for MSI-X. Avi suggested to use a flag to indicate the pending. So here is it. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 66 +++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 60 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 432edc27e82..3832243625d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -319,6 +319,7 @@ struct kvm_irq_ack_notifier { void (*irq_acked)(struct kvm_irq_ack_notifier *kian); }; +#define KVM_ASSIGNED_MSIX_PENDING 0x1 struct kvm_guest_msix_entry { u32 vector; u16 entry; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 1ceb96901f3..8bd44d6985c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -95,25 +95,69 @@ static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *h return NULL; } +static int find_index_from_host_irq(struct kvm_assigned_dev_kernel + *assigned_dev, int irq) +{ + int i, index; + struct msix_entry *host_msix_entries; + + host_msix_entries = assigned_dev->host_msix_entries; + + index = -1; + for (i = 0; i < assigned_dev->entries_nr; i++) + if (irq == host_msix_entries[i].vector) { + index = i; + break; + } + if (index < 0) { + printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n"); + return 0; + } + + return index; +} + static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) { struct kvm_assigned_dev_kernel *assigned_dev; + struct kvm *kvm; + int irq, i; assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, interrupt_work); + kvm = assigned_dev->kvm; /* This is taken to safely inject irq inside the guest. When * the interrupt injection (or the ioapic code) uses a * finer-grained lock, update this */ - mutex_lock(&assigned_dev->kvm->lock); - kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, - assigned_dev->guest_irq, 1); - - if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI) { - enable_irq(assigned_dev->host_irq); - assigned_dev->host_irq_disabled = false; + mutex_lock(&kvm->lock); + if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) { + struct kvm_guest_msix_entry *guest_entries = + assigned_dev->guest_msix_entries; + for (i = 0; i < assigned_dev->entries_nr; i++) { + if (!(guest_entries[i].flags & + KVM_ASSIGNED_MSIX_PENDING)) + continue; + guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING; + kvm_set_irq(assigned_dev->kvm, + assigned_dev->irq_source_id, + guest_entries[i].vector, 1); + irq = assigned_dev->host_msix_entries[i].vector; + if (irq != 0) + enable_irq(irq); + assigned_dev->host_irq_disabled = false; + } + } else { + kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, + assigned_dev->guest_irq, 1); + if (assigned_dev->irq_requested_type & + KVM_ASSIGNED_DEV_GUEST_MSI) { + enable_irq(assigned_dev->host_irq); + assigned_dev->host_irq_disabled = false; + } } + mutex_unlock(&assigned_dev->kvm->lock); } @@ -122,6 +166,14 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) struct kvm_assigned_dev_kernel *assigned_dev = (struct kvm_assigned_dev_kernel *) dev_id; + if (assigned_dev->irq_requested_type == KVM_ASSIGNED_DEV_MSIX) { + int index = find_index_from_host_irq(assigned_dev, irq); + if (index < 0) + return IRQ_HANDLED; + assigned_dev->guest_msix_entries[index].flags |= + KVM_ASSIGNED_MSIX_PENDING; + } + schedule_work(&assigned_dev->interrupt_work); disable_irq_nosync(irq); -- cgit v1.2.3-70-g09d2 From d510d6cc653bc4b3094ea73afe12600d0ab445b3 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 25 Feb 2009 17:22:28 +0800 Subject: KVM: Enable MSI-X for KVM assigned device This patch finally enable MSI-X. What we need for MSI-X: 1. Intercept one page in MMIO region of device. So that we can get guest desired MSI-X table and set up the real one. Now this have been done by guest, and transfer to kernel using ioctl KVM_SET_MSIX_NR and KVM_SET_MSIX_ENTRY. 2. Information for incoming interrupt. Now one device can have more than one interrupt, and they are all handled by one workqueue structure. So we need to identify them. The previous patch enable gsi_msg_pending_bitmap get this done. 3. Mapping from host IRQ to guest gsi as well as guest gsi to real MSI/MSI-X message address/data. We used same entry number for the host and guest here, so that it's easy to find the correlated guest gsi. What we lack for now: 1. The PCI spec said nothing can existed with MSI-X table in the same page of MMIO region, except pending bits. The patch ignore pending bits as the first step (so they are always 0 - no pending). 2. The PCI spec allowed to change MSI-X table dynamically. That means, the OS can enable MSI-X, then mask one MSI-X entry, modify it, and unmask it. The patch didn't support this, and Linux also don't work in this way. 3. The patch didn't implement MSI-X mask all and mask single entry. I would implement the former in driver/pci/msi.c later. And for single entry, userspace should have reposibility to handle it. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm.h | 1 + include/linux/kvm.h | 8 ++++ virt/kvm/kvm_main.c | 98 +++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 101 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index dc3f6cf1170..125be8b1956 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -16,6 +16,7 @@ #define __KVM_HAVE_MSI #define __KVM_HAVE_USER_NMI #define __KVM_HAVE_GUEST_DEBUG +#define __KVM_HAVE_MSIX /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 78cdee8c635..640835ed270 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -409,6 +409,9 @@ struct kvm_trace_rec { #ifdef __KVM_HAVE_DEVICE_ASSIGNMENT #define KVM_CAP_DEVICE_DEASSIGNMENT 27 #endif +#ifdef __KVM_HAVE_MSIX +#define KVM_CAP_DEVICE_MSIX 28 +#endif /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */ #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30 @@ -611,6 +614,11 @@ struct kvm_assigned_irq { #define KVM_DEV_IRQ_ASSIGN_MSI_ACTION KVM_DEV_IRQ_ASSIGN_ENABLE_MSI #define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI (1 << 0) +#define KVM_DEV_IRQ_ASSIGN_MSIX_ACTION (KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX |\ + KVM_DEV_IRQ_ASSIGN_MASK_MSIX) +#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX (1 << 1) +#define KVM_DEV_IRQ_ASSIGN_MASK_MSIX (1 << 2) + struct kvm_assigned_msix_nr { __u32 assigned_dev_id; __u16 entry_nr; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 8bd44d6985c..3bed82754a5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -236,13 +236,33 @@ static void kvm_free_assigned_irq(struct kvm *kvm, * now, the kvm state is still legal for probably we also have to wait * interrupt_work done. */ - disable_irq_nosync(assigned_dev->host_irq); - cancel_work_sync(&assigned_dev->interrupt_work); + if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) { + int i; + for (i = 0; i < assigned_dev->entries_nr; i++) + disable_irq_nosync(assigned_dev-> + host_msix_entries[i].vector); + + cancel_work_sync(&assigned_dev->interrupt_work); - free_irq(assigned_dev->host_irq, (void *)assigned_dev); + for (i = 0; i < assigned_dev->entries_nr; i++) + free_irq(assigned_dev->host_msix_entries[i].vector, + (void *)assigned_dev); - if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) - pci_disable_msi(assigned_dev->dev); + assigned_dev->entries_nr = 0; + kfree(assigned_dev->host_msix_entries); + kfree(assigned_dev->guest_msix_entries); + pci_disable_msix(assigned_dev->dev); + } else { + /* Deal with MSI and INTx */ + disable_irq_nosync(assigned_dev->host_irq); + cancel_work_sync(&assigned_dev->interrupt_work); + + free_irq(assigned_dev->host_irq, (void *)assigned_dev); + + if (assigned_dev->irq_requested_type & + KVM_ASSIGNED_DEV_HOST_MSI) + pci_disable_msi(assigned_dev->dev); + } assigned_dev->irq_requested_type = 0; } @@ -373,6 +393,60 @@ static int assigned_device_update_msi(struct kvm *kvm, } #endif +#ifdef __KVM_HAVE_MSIX +static int assigned_device_update_msix(struct kvm *kvm, + struct kvm_assigned_dev_kernel *adev, + struct kvm_assigned_irq *airq) +{ + /* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */ + int i, r; + + adev->ack_notifier.gsi = -1; + + if (irqchip_in_kernel(kvm)) { + if (airq->flags & KVM_DEV_IRQ_ASSIGN_MASK_MSIX) + return -ENOTTY; + + if (!(airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX)) { + /* Guest disable MSI-X */ + kvm_free_assigned_irq(kvm, adev); + if (msi2intx) { + pci_enable_msi(adev->dev); + if (adev->dev->msi_enabled) + return assigned_device_update_msi(kvm, + adev, airq); + } + return assigned_device_update_intx(kvm, adev, airq); + } + + /* host_msix_entries and guest_msix_entries should have been + * initialized */ + if (adev->entries_nr == 0) + return -EINVAL; + + kvm_free_assigned_irq(kvm, adev); + + r = pci_enable_msix(adev->dev, adev->host_msix_entries, + adev->entries_nr); + if (r) + return r; + + for (i = 0; i < adev->entries_nr; i++) { + r = request_irq((adev->host_msix_entries + i)->vector, + kvm_assigned_dev_intr, 0, + "kvm_assigned_msix_device", + (void *)adev); + if (r) + return r; + } + } + + adev->irq_requested_type |= KVM_ASSIGNED_DEV_MSIX; + + return 0; +} +#endif + static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, struct kvm_assigned_irq *assigned_irq) @@ -417,12 +491,24 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, } } - if ((match->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) && + if (match->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) + current_flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX; + else if ((match->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) && (match->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI)) current_flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSI; changed_flags = assigned_irq->flags ^ current_flags; +#ifdef __KVM_HAVE_MSIX + if (changed_flags & KVM_DEV_IRQ_ASSIGN_MSIX_ACTION) { + r = assigned_device_update_msix(kvm, match, assigned_irq); + if (r) { + printk(KERN_WARNING "kvm: failed to execute " + "MSI-X action!\n"); + goto out_release; + } + } else +#endif if ((changed_flags & KVM_DEV_IRQ_ASSIGN_MSI_ACTION) || (msi2intx && match->dev->msi_enabled)) { #ifdef CONFIG_X86 -- cgit v1.2.3-70-g09d2 From b95b51d580bff9376850eef29d34c3aa08c26db7 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 26 Feb 2009 13:55:33 +0100 Subject: KVM: declare ioapic functions only on affected hardware Since "KVM: Unify the delivery of IOAPIC and MSI interrupts" I get the following warnings: CC [M] arch/s390/kvm/kvm-s390.o In file included from arch/s390/kvm/kvm-s390.c:22: include/linux/kvm_host.h:357: warning: 'struct kvm_ioapic' declared inside parameter list include/linux/kvm_host.h:357: warning: its scope is only this definition or declaration, which is probably not what you want This patch limits IOAPIC functions for architectures that have one. Signed-off-by: Christian Borntraeger Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3832243625d..3b91ec9982c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -363,9 +363,11 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, struct kvm_irq_mask_notifier *kimn); void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask); +#ifdef __KVM_HAVE_IOAPIC void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic, union kvm_ioapic_redirect_entry *entry, unsigned long *deliver_bitmask); +#endif int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_register_irq_ack_notifier(struct kvm *kvm, -- cgit v1.2.3-70-g09d2 From a53c17d21c46a752f5ac6695376481bc27865b04 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 5 Mar 2009 16:34:49 +0200 Subject: KVM: ioapic/msi interrupt delivery consolidation ioapic_deliver() and kvm_set_msi() have code duplication. Move the code into ioapic_deliver_entry() function and call it from both places. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- include/linux/kvm_host.h | 2 +- virt/kvm/ioapic.c | 61 +++++++++++++++++++++++++----------------------- virt/kvm/ioapic.h | 4 ++-- virt/kvm/irq_comm.c | 32 +++---------------------- 4 files changed, 38 insertions(+), 61 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3b91ec9982c..ec9d078b1e8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -364,7 +364,7 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask); #ifdef __KVM_HAVE_IOAPIC -void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic, +void kvm_get_intr_delivery_bitmask(struct kvm *kvm, union kvm_ioapic_redirect_entry *entry, unsigned long *deliver_bitmask); #endif diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index d4a7948b010..b71c0442cec 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -142,54 +142,57 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) } } -static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) +int ioapic_deliver_entry(struct kvm *kvm, union kvm_ioapic_redirect_entry *e) { - union kvm_ioapic_redirect_entry entry = ioapic->redirtbl[irq]; DECLARE_BITMAP(deliver_bitmask, KVM_MAX_VCPUS); - struct kvm_vcpu *vcpu; - int vcpu_id, r = -1; + int i, r = -1; - ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " - "vector=%x trig_mode=%x\n", - entry.fields.dest, entry.fields.dest_mode, - entry.fields.delivery_mode, entry.fields.vector, - entry.fields.trig_mode); - - /* Always delivery PIT interrupt to vcpu 0 */ -#ifdef CONFIG_X86 - if (irq == 0) { - bitmap_zero(deliver_bitmask, KVM_MAX_VCPUS); - __set_bit(0, deliver_bitmask); - } else -#endif - kvm_get_intr_delivery_bitmask(ioapic, &entry, deliver_bitmask); + kvm_get_intr_delivery_bitmask(kvm, e, deliver_bitmask); if (find_first_bit(deliver_bitmask, KVM_MAX_VCPUS) >= KVM_MAX_VCPUS) { ioapic_debug("no target on destination\n"); - return 0; + return r; } - while ((vcpu_id = find_first_bit(deliver_bitmask, KVM_MAX_VCPUS)) + while ((i = find_first_bit(deliver_bitmask, KVM_MAX_VCPUS)) < KVM_MAX_VCPUS) { - __clear_bit(vcpu_id, deliver_bitmask); - vcpu = ioapic->kvm->vcpus[vcpu_id]; + struct kvm_vcpu *vcpu = kvm->vcpus[i]; + __clear_bit(i, deliver_bitmask); if (vcpu) { if (r < 0) r = 0; - r += kvm_apic_set_irq(vcpu, - entry.fields.vector, - entry.fields.trig_mode, - entry.fields.delivery_mode); + r += kvm_apic_set_irq(vcpu, e->fields.vector, + e->fields.delivery_mode, + e->fields.trig_mode); } else ioapic_debug("null destination vcpu: " "mask=%x vector=%x delivery_mode=%x\n", - entry.fields.deliver_bitmask, - entry.fields.vector, - entry.fields.delivery_mode); + e->fields.deliver_bitmask, + e->fields.vector, e->fields.delivery_mode); } return r; } +static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) +{ + union kvm_ioapic_redirect_entry entry = ioapic->redirtbl[irq]; + + ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " + "vector=%x trig_mode=%x\n", + entry.fields.dest, entry.fields.dest_mode, + entry.fields.delivery_mode, entry.fields.vector, + entry.fields.trig_mode); + +#ifdef CONFIG_X86 + /* Always delivery PIT interrupt to vcpu 0 */ + if (irq == 0) { + entry.fields.dest_mode = 0; /* Physical mode. */ + entry.fields.dest_id = ioapic->kvm->vcpus[0]->vcpu_id; + } +#endif + return ioapic_deliver_entry(ioapic->kvm, &entry); +} + int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) { u32 old_irr = ioapic->irr; diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index c8032ab2a4e..bedeea59cc1 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -70,8 +70,8 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); int kvm_ioapic_init(struct kvm *kvm); int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); void kvm_ioapic_reset(struct kvm_ioapic *ioapic); -void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic, +void kvm_get_intr_delivery_bitmask(struct kvm *kvm, union kvm_ioapic_redirect_entry *entry, unsigned long *deliver_bitmask); - +int ioapic_deliver_entry(struct kvm *kvm, union kvm_ioapic_redirect_entry *e); #endif diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 325c6685f20..35397a569b2 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -43,12 +43,11 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level); } -void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic, +void kvm_get_intr_delivery_bitmask(struct kvm *kvm, union kvm_ioapic_redirect_entry *entry, unsigned long *deliver_bitmask) { int i; - struct kvm *kvm = ioapic->kvm; struct kvm_vcpu *vcpu; bitmap_zero(deliver_bitmask, KVM_MAX_VCPUS); @@ -90,7 +89,7 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic, switch (entry->fields.delivery_mode) { case IOAPIC_LOWEST_PRIORITY: /* Select one in deliver_bitmask */ - vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, + vcpu = kvm_get_lowest_prio_vcpu(kvm, entry->fields.vector, deliver_bitmask); bitmap_zero(deliver_bitmask, KVM_MAX_VCPUS); if (!vcpu) @@ -111,13 +110,7 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic, static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int level) { - int vcpu_id, r = -1; - struct kvm_vcpu *vcpu; - struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); union kvm_ioapic_redirect_entry entry; - DECLARE_BITMAP(deliver_bitmask, KVM_MAX_VCPUS); - - BUG_ON(!ioapic); entry.bits = 0; entry.fields.dest_id = (e->msi.address_lo & @@ -133,26 +126,7 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, (unsigned long *)&e->msi.data); /* TODO Deal with RH bit of MSI message address */ - - kvm_get_intr_delivery_bitmask(ioapic, &entry, deliver_bitmask); - - if (find_first_bit(deliver_bitmask, KVM_MAX_VCPUS) >= KVM_MAX_VCPUS) { - printk(KERN_WARNING "kvm: no destination for MSI delivery!"); - return -1; - } - while ((vcpu_id = find_first_bit(deliver_bitmask, - KVM_MAX_VCPUS)) < KVM_MAX_VCPUS) { - __clear_bit(vcpu_id, deliver_bitmask); - vcpu = ioapic->kvm->vcpus[vcpu_id]; - if (vcpu) { - if (r < 0) - r = 0; - r += kvm_apic_set_irq(vcpu, entry.fields.vector, - entry.fields.dest_mode, - entry.fields.trig_mode); - } - } - return r; + return ioapic_deliver_entry(kvm, &entry); } /* This should be called with the kvm->lock mutex held -- cgit v1.2.3-70-g09d2 From 343f94fe4d16ec898da77720c03da9e09f8523d2 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 5 Mar 2009 16:34:54 +0200 Subject: KVM: consolidate ioapic/ipi interrupt delivery logic Use kvm_apic_match_dest() in kvm_get_intr_delivery_bitmask() instead of duplicating the same code. Use kvm_get_intr_delivery_bitmask() in apic_send_ipi() to figure out ipi destination instead of reimplementing the logic. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/ia64/kvm/kvm-ia64.c | 8 ++++++ arch/ia64/kvm/lapic.h | 3 ++ arch/x86/kvm/lapic.c | 69 +++++++++++++++++--------------------------- arch/x86/kvm/lapic.h | 2 ++ include/linux/kvm_host.h | 5 ---- virt/kvm/ioapic.c | 5 +++- virt/kvm/ioapic.h | 10 ++++--- virt/kvm/irq_comm.c | 74 ++++++++++++++---------------------------------- 8 files changed, 70 insertions(+), 106 deletions(-) (limited to 'include') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 99d6d174d93..8eea9cba7b7 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1852,6 +1852,14 @@ struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector, return lvcpu; } +int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, + int short_hand, int dest, int dest_mode) +{ + return (dest_mode == 0) ? + kvm_apic_match_physical_addr(target, dest) : + kvm_apic_match_logical_addr(target, dest); +} + static int find_highest_bits(int *dat) { u32 bits, bitnum; diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h index cbcfaa6195c..31602e7338d 100644 --- a/arch/ia64/kvm/lapic.h +++ b/arch/ia64/kvm/lapic.h @@ -20,6 +20,9 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu); int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); +int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, + int short_hand, int dest, int dest_mode); +bool kvm_apic_present(struct kvm_vcpu *vcpu); int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 dmode, u8 trig); #endif diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index a42f968a23e..998862a3c26 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -260,7 +260,7 @@ static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr) int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) { - return kvm_apic_id(apic) == dest; + return dest == 0xff || kvm_apic_id(apic) == dest; } int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) @@ -289,37 +289,34 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) return result; } -static int apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, +int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, int short_hand, int dest, int dest_mode) { int result = 0; struct kvm_lapic *target = vcpu->arch.apic; apic_debug("target %p, source %p, dest 0x%x, " - "dest_mode 0x%x, short_hand 0x%x", + "dest_mode 0x%x, short_hand 0x%x\n", target, source, dest, dest_mode, short_hand); ASSERT(!target); switch (short_hand) { case APIC_DEST_NOSHORT: - if (dest_mode == 0) { + if (dest_mode == 0) /* Physical mode. */ - if ((dest == 0xFF) || (dest == kvm_apic_id(target))) - result = 1; - } else + result = kvm_apic_match_physical_addr(target, dest); + else /* Logical mode. */ result = kvm_apic_match_logical_addr(target, dest); break; case APIC_DEST_SELF: - if (target == source) - result = 1; + result = (target == source); break; case APIC_DEST_ALLINC: result = 1; break; case APIC_DEST_ALLBUT: - if (target != source) - result = 1; + result = (target != source); break; default: printk(KERN_WARNING "Bad dest shorthand value %x\n", @@ -492,38 +489,26 @@ static void apic_send_ipi(struct kvm_lapic *apic) unsigned int delivery_mode = icr_low & APIC_MODE_MASK; unsigned int vector = icr_low & APIC_VECTOR_MASK; - struct kvm_vcpu *target; - struct kvm_vcpu *vcpu; - DECLARE_BITMAP(lpr_map, KVM_MAX_VCPUS); + DECLARE_BITMAP(deliver_bitmask, KVM_MAX_VCPUS); int i; - bitmap_zero(lpr_map, KVM_MAX_VCPUS); apic_debug("icr_high 0x%x, icr_low 0x%x, " "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, " "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n", icr_high, icr_low, short_hand, dest, trig_mode, level, dest_mode, delivery_mode, vector); - for (i = 0; i < KVM_MAX_VCPUS; i++) { - vcpu = apic->vcpu->kvm->vcpus[i]; - if (!vcpu) - continue; - - if (vcpu->arch.apic && - apic_match_dest(vcpu, apic, short_hand, dest, dest_mode)) { - if (delivery_mode == APIC_DM_LOWEST) - __set_bit(vcpu->vcpu_id, lpr_map); - else - __apic_accept_irq(vcpu->arch.apic, delivery_mode, - vector, level, trig_mode); - } - } - - if (delivery_mode == APIC_DM_LOWEST) { - target = kvm_get_lowest_prio_vcpu(vcpu->kvm, vector, lpr_map); - if (target != NULL) - __apic_accept_irq(target->arch.apic, delivery_mode, - vector, level, trig_mode); + kvm_get_intr_delivery_bitmask(apic->vcpu->kvm, apic, dest, dest_mode, + delivery_mode == APIC_DM_LOWEST, short_hand, + deliver_bitmask); + + while ((i = find_first_bit(deliver_bitmask, KVM_MAX_VCPUS)) + < KVM_MAX_VCPUS) { + struct kvm_vcpu *vcpu = apic->vcpu->kvm->vcpus[i]; + __clear_bit(i, deliver_bitmask); + if (vcpu) + __apic_accept_irq(vcpu->arch.apic, delivery_mode, + vector, level, trig_mode); } } @@ -930,16 +915,14 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_lapic_reset); -int kvm_lapic_enabled(struct kvm_vcpu *vcpu) +bool kvm_apic_present(struct kvm_vcpu *vcpu) { - struct kvm_lapic *apic = vcpu->arch.apic; - int ret = 0; - - if (!apic) - return 0; - ret = apic_enabled(apic); + return vcpu->arch.apic && apic_hw_enabled(vcpu->arch.apic); +} - return ret; +int kvm_lapic_enabled(struct kvm_vcpu *vcpu) +{ + return kvm_apic_present(vcpu) && apic_sw_enabled(vcpu->arch.apic); } EXPORT_SYMBOL_GPL(kvm_lapic_enabled); diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 1b0e3c03cb3..b66dc14a969 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -37,6 +37,8 @@ u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu); int kvm_lapic_enabled(struct kvm_vcpu *vcpu); +bool kvm_apic_present(struct kvm_vcpu *vcpu); +bool kvm_lapic_present(struct kvm_vcpu *vcpu); int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ec9d078b1e8..fb60f31c4fb 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -363,11 +363,6 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, struct kvm_irq_mask_notifier *kimn); void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask); -#ifdef __KVM_HAVE_IOAPIC -void kvm_get_intr_delivery_bitmask(struct kvm *kvm, - union kvm_ioapic_redirect_entry *entry, - unsigned long *deliver_bitmask); -#endif int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_register_irq_ack_notifier(struct kvm *kvm, diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index b71c0442cec..43969bbf127 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -147,7 +147,10 @@ int ioapic_deliver_entry(struct kvm *kvm, union kvm_ioapic_redirect_entry *e) DECLARE_BITMAP(deliver_bitmask, KVM_MAX_VCPUS); int i, r = -1; - kvm_get_intr_delivery_bitmask(kvm, e, deliver_bitmask); + kvm_get_intr_delivery_bitmask(kvm, NULL, e->fields.dest_id, + e->fields.dest_mode, + e->fields.delivery_mode == IOAPIC_LOWEST_PRIORITY, + 0, deliver_bitmask); if (find_first_bit(deliver_bitmask, KVM_MAX_VCPUS) >= KVM_MAX_VCPUS) { ioapic_debug("no target on destination\n"); diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index bedeea59cc1..d996c7abc46 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -65,13 +65,15 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm) } struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector, - unsigned long *bitmap); + unsigned long *bitmap); +int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, + int short_hand, int dest, int dest_mode); void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); int kvm_ioapic_init(struct kvm *kvm); int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); void kvm_ioapic_reset(struct kvm_ioapic *ioapic); -void kvm_get_intr_delivery_bitmask(struct kvm *kvm, - union kvm_ioapic_redirect_entry *entry, - unsigned long *deliver_bitmask); +void kvm_get_intr_delivery_bitmask(struct kvm *kvm, struct kvm_lapic *src, + int dest_id, int dest_mode, bool low_prio, int short_hand, + unsigned long *deliver_bitmask); int ioapic_deliver_entry(struct kvm *kvm, union kvm_ioapic_redirect_entry *e); #endif diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 35397a569b2..e43701c0a5c 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -43,67 +43,35 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level); } -void kvm_get_intr_delivery_bitmask(struct kvm *kvm, - union kvm_ioapic_redirect_entry *entry, - unsigned long *deliver_bitmask) +void kvm_get_intr_delivery_bitmask(struct kvm *kvm, struct kvm_lapic *src, + int dest_id, int dest_mode, bool low_prio, int short_hand, + unsigned long *deliver_bitmask) { int i; struct kvm_vcpu *vcpu; + if (dest_mode == 0 && dest_id == 0xff && low_prio) + printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n"); + bitmap_zero(deliver_bitmask, KVM_MAX_VCPUS); + for (i = 0; i < KVM_MAX_VCPUS; i++) { + vcpu = kvm->vcpus[i]; - if (entry->fields.dest_mode == 0) { /* Physical mode. */ - if (entry->fields.dest_id == 0xFF) { /* Broadcast. */ - for (i = 0; i < KVM_MAX_VCPUS; ++i) - if (kvm->vcpus[i] && kvm->vcpus[i]->arch.apic) - __set_bit(i, deliver_bitmask); - /* Lowest priority shouldn't combine with broadcast */ - if (entry->fields.delivery_mode == - IOAPIC_LOWEST_PRIORITY && printk_ratelimit()) - printk(KERN_INFO "kvm: apic: phys broadcast " - "and lowest prio\n"); - return; - } - for (i = 0; i < KVM_MAX_VCPUS; ++i) { - vcpu = kvm->vcpus[i]; - if (!vcpu) - continue; - if (kvm_apic_match_physical_addr(vcpu->arch.apic, - entry->fields.dest_id)) { - if (vcpu->arch.apic) - __set_bit(i, deliver_bitmask); - break; - } - } - } else if (entry->fields.dest_id != 0) /* Logical mode, MDA non-zero. */ - for (i = 0; i < KVM_MAX_VCPUS; ++i) { - vcpu = kvm->vcpus[i]; - if (!vcpu) - continue; - if (vcpu->arch.apic && - kvm_apic_match_logical_addr(vcpu->arch.apic, - entry->fields.dest_id)) - __set_bit(i, deliver_bitmask); - } + if (!vcpu || !kvm_apic_present(vcpu)) + continue; - switch (entry->fields.delivery_mode) { - case IOAPIC_LOWEST_PRIORITY: - /* Select one in deliver_bitmask */ - vcpu = kvm_get_lowest_prio_vcpu(kvm, - entry->fields.vector, deliver_bitmask); - bitmap_zero(deliver_bitmask, KVM_MAX_VCPUS); - if (!vcpu) - return; - __set_bit(vcpu->vcpu_id, deliver_bitmask); - break; - case IOAPIC_FIXED: - case IOAPIC_NMI: - break; - default: - if (printk_ratelimit()) - printk(KERN_INFO "kvm: unsupported delivery mode %d\n", - entry->fields.delivery_mode); + if (!kvm_apic_match_dest(vcpu, src, short_hand, dest_id, + dest_mode)) + continue; + + __set_bit(i, deliver_bitmask); + } + + if (low_prio) { + vcpu = kvm_get_lowest_prio_vcpu(kvm, 0, deliver_bitmask); bitmap_zero(deliver_bitmask, KVM_MAX_VCPUS); + if (vcpu) + __set_bit(vcpu->vcpu_id, deliver_bitmask); } } -- cgit v1.2.3-70-g09d2 From 58c2dde17d6eb6c8c0566e52d184aa16755d890f Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 5 Mar 2009 16:35:04 +0200 Subject: KVM: APIC: get rid of deliver_bitmask Deliver interrupt during destination matching loop. Signed-off-by: Gleb Natapov Acked-by: Xiantao Zhang Signed-off-by: Marcelo Tosatti --- arch/ia64/kvm/kvm-ia64.c | 33 ++++++++++++---------- arch/ia64/kvm/lapic.h | 4 +-- arch/x86/kvm/lapic.c | 59 +++++++++++---------------------------- arch/x86/kvm/lapic.h | 3 +- include/linux/kvm_types.h | 10 +++++++ virt/kvm/ioapic.c | 57 +++++++++++-------------------------- virt/kvm/ioapic.h | 6 ++-- virt/kvm/irq_comm.c | 71 ++++++++++++++++++++++++++++------------------- 8 files changed, 108 insertions(+), 135 deletions(-) (limited to 'include') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 1887a93a2bd..acf43ec4270 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -283,6 +283,18 @@ static int handle_sal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } +static int __apic_accept_irq(struct kvm_vcpu *vcpu, uint64_t vector) +{ + struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); + + if (!test_and_set_bit(vector, &vpd->irr[0])) { + vcpu->arch.irq_new_pending = 1; + kvm_vcpu_kick(vcpu); + return 1; + } + return 0; +} + /* * offset: address offset to IPI space. * value: deliver value. @@ -292,20 +304,20 @@ static void vcpu_deliver_ipi(struct kvm_vcpu *vcpu, uint64_t dm, { switch (dm) { case SAPIC_FIXED: - kvm_apic_set_irq(vcpu, vector, dm, 0); break; case SAPIC_NMI: - kvm_apic_set_irq(vcpu, 2, dm, 0); + vector = 2; break; case SAPIC_EXTINT: - kvm_apic_set_irq(vcpu, 0, dm, 0); + vector = 0; break; case SAPIC_INIT: case SAPIC_PMI: default: printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n"); - break; + return; } + __apic_accept_irq(vcpu, vector); } static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id, @@ -1813,17 +1825,9 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu) put_cpu(); } -int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 dmode, u8 trig) +int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) { - - struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); - - if (!test_and_set_bit(vec, &vpd->irr[0])) { - vcpu->arch.irq_new_pending = 1; - kvm_vcpu_kick(vcpu); - return 1; - } - return 0; + return __apic_accept_irq(vcpu, irq->vector); } int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) @@ -1844,6 +1848,7 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, int short_hand, int dest, int dest_mode) { + struct kvm_lapic *target = vcpu->arch.apic; return (dest_mode == 0) ? kvm_apic_match_physical_addr(target, dest) : kvm_apic_match_logical_addr(target, dest); diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h index e42109e6ca4..ee541cebcd7 100644 --- a/arch/ia64/kvm/lapic.h +++ b/arch/ia64/kvm/lapic.h @@ -23,7 +23,7 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, int short_hand, int dest, int dest_mode); int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); -bool kvm_apic_present(struct kvm_vcpu *vcpu); -int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 dmode, u8 trig); +int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq); +#define kvm_apic_present(x) (true) #endif diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 814466f455d..dd934d27040 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -199,27 +199,12 @@ EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr); static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, int vector, int level, int trig_mode); -int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 dmode, u8 trig) +int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) { struct kvm_lapic *apic = vcpu->arch.apic; - int lapic_dmode; - switch (dmode) { - case IOAPIC_LOWEST_PRIORITY: - lapic_dmode = APIC_DM_LOWEST; - break; - case IOAPIC_FIXED: - lapic_dmode = APIC_DM_FIXED; - break; - case IOAPIC_NMI: - lapic_dmode = APIC_DM_NMI; - break; - default: - printk(KERN_DEBUG"Ignoring delivery mode %d\n", dmode); - return 0; - break; - } - return __apic_accept_irq(apic, lapic_dmode, vec, 1, trig); + return __apic_accept_irq(apic, irq->delivery_mode, irq->vector, + irq->level, irq->trig_mode); } static inline int apic_find_highest_isr(struct kvm_lapic *apic) @@ -447,36 +432,24 @@ static void apic_send_ipi(struct kvm_lapic *apic) { u32 icr_low = apic_get_reg(apic, APIC_ICR); u32 icr_high = apic_get_reg(apic, APIC_ICR2); + struct kvm_lapic_irq irq; - unsigned int dest = GET_APIC_DEST_FIELD(icr_high); - unsigned int short_hand = icr_low & APIC_SHORT_MASK; - unsigned int trig_mode = icr_low & APIC_INT_LEVELTRIG; - unsigned int level = icr_low & APIC_INT_ASSERT; - unsigned int dest_mode = icr_low & APIC_DEST_MASK; - unsigned int delivery_mode = icr_low & APIC_MODE_MASK; - unsigned int vector = icr_low & APIC_VECTOR_MASK; - - DECLARE_BITMAP(deliver_bitmask, KVM_MAX_VCPUS); - int i; + irq.vector = icr_low & APIC_VECTOR_MASK; + irq.delivery_mode = icr_low & APIC_MODE_MASK; + irq.dest_mode = icr_low & APIC_DEST_MASK; + irq.level = icr_low & APIC_INT_ASSERT; + irq.trig_mode = icr_low & APIC_INT_LEVELTRIG; + irq.shorthand = icr_low & APIC_SHORT_MASK; + irq.dest_id = GET_APIC_DEST_FIELD(icr_high); apic_debug("icr_high 0x%x, icr_low 0x%x, " "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, " "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n", - icr_high, icr_low, short_hand, dest, - trig_mode, level, dest_mode, delivery_mode, vector); - - kvm_get_intr_delivery_bitmask(apic->vcpu->kvm, apic, dest, dest_mode, - delivery_mode == APIC_DM_LOWEST, short_hand, - deliver_bitmask); - - while ((i = find_first_bit(deliver_bitmask, KVM_MAX_VCPUS)) - < KVM_MAX_VCPUS) { - struct kvm_vcpu *vcpu = apic->vcpu->kvm->vcpus[i]; - __clear_bit(i, deliver_bitmask); - if (vcpu) - __apic_accept_irq(vcpu->arch.apic, delivery_mode, - vector, level, trig_mode); - } + icr_high, icr_low, irq.shorthand, irq.dest, + irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, + irq.vector); + + kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq); } static u32 apic_get_tmcct(struct kvm_lapic *apic) diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index b66dc14a969..a587f8349c4 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -31,14 +31,13 @@ u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); -int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 dmode, u8 trig); +int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq); u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu); int kvm_lapic_enabled(struct kvm_vcpu *vcpu); bool kvm_apic_present(struct kvm_vcpu *vcpu); -bool kvm_lapic_present(struct kvm_vcpu *vcpu); int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index b84aca3c4ad..fb46efbeabe 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -57,4 +57,14 @@ union kvm_ioapic_redirect_entry { } fields; }; +struct kvm_lapic_irq { + u32 vector; + u32 delivery_mode; + u32 dest_mode; + u32 level; + u32 trig_mode; + u32 shorthand; + u32 dest_id; +}; + #endif /* __KVM_TYPES_H__ */ diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 43969bbf127..1eddae94bab 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -142,58 +142,33 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) } } -int ioapic_deliver_entry(struct kvm *kvm, union kvm_ioapic_redirect_entry *e) -{ - DECLARE_BITMAP(deliver_bitmask, KVM_MAX_VCPUS); - int i, r = -1; - - kvm_get_intr_delivery_bitmask(kvm, NULL, e->fields.dest_id, - e->fields.dest_mode, - e->fields.delivery_mode == IOAPIC_LOWEST_PRIORITY, - 0, deliver_bitmask); - - if (find_first_bit(deliver_bitmask, KVM_MAX_VCPUS) >= KVM_MAX_VCPUS) { - ioapic_debug("no target on destination\n"); - return r; - } - - while ((i = find_first_bit(deliver_bitmask, KVM_MAX_VCPUS)) - < KVM_MAX_VCPUS) { - struct kvm_vcpu *vcpu = kvm->vcpus[i]; - __clear_bit(i, deliver_bitmask); - if (vcpu) { - if (r < 0) - r = 0; - r += kvm_apic_set_irq(vcpu, e->fields.vector, - e->fields.delivery_mode, - e->fields.trig_mode); - } else - ioapic_debug("null destination vcpu: " - "mask=%x vector=%x delivery_mode=%x\n", - e->fields.deliver_bitmask, - e->fields.vector, e->fields.delivery_mode); - } - return r; -} - static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) { - union kvm_ioapic_redirect_entry entry = ioapic->redirtbl[irq]; + union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq]; + struct kvm_lapic_irq irqe; ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " "vector=%x trig_mode=%x\n", - entry.fields.dest, entry.fields.dest_mode, - entry.fields.delivery_mode, entry.fields.vector, - entry.fields.trig_mode); + entry->fields.dest, entry->fields.dest_mode, + entry->fields.delivery_mode, entry->fields.vector, + entry->fields.trig_mode); + + irqe.dest_id = entry->fields.dest_id; + irqe.vector = entry->fields.vector; + irqe.dest_mode = entry->fields.dest_mode; + irqe.trig_mode = entry->fields.trig_mode; + irqe.delivery_mode = entry->fields.delivery_mode << 8; + irqe.level = 1; + irqe.shorthand = 0; #ifdef CONFIG_X86 /* Always delivery PIT interrupt to vcpu 0 */ if (irq == 0) { - entry.fields.dest_mode = 0; /* Physical mode. */ - entry.fields.dest_id = ioapic->kvm->vcpus[0]->vcpu_id; + irqe.dest_mode = 0; /* Physical mode. */ + irqe.dest_id = ioapic->kvm->vcpus[0]->vcpu_id; } #endif - return ioapic_deliver_entry(ioapic->kvm, &entry); + return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe); } int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index e7bc92d895f..7080b713c16 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -71,8 +71,6 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); int kvm_ioapic_init(struct kvm *kvm); int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); void kvm_ioapic_reset(struct kvm_ioapic *ioapic); -void kvm_get_intr_delivery_bitmask(struct kvm *kvm, struct kvm_lapic *src, - int dest_id, int dest_mode, bool low_prio, int short_hand, - unsigned long *deliver_bitmask); -int ioapic_deliver_entry(struct kvm *kvm, union kvm_ioapic_redirect_entry *e); +int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, + struct kvm_lapic_irq *irq); #endif diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index f5e059b67cd..4fa1f604b42 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -22,6 +22,9 @@ #include #include +#ifdef CONFIG_IA64 +#include +#endif #include "irq.h" @@ -43,61 +46,71 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level); } -void kvm_get_intr_delivery_bitmask(struct kvm *kvm, struct kvm_lapic *src, - int dest_id, int dest_mode, bool low_prio, int short_hand, - unsigned long *deliver_bitmask) +inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) { - int i, lowest = -1; - struct kvm_vcpu *vcpu; +#ifdef CONFIG_IA64 + return irq->delivery_mode == + (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT); +#else + return irq->delivery_mode == APIC_DM_LOWEST; +#endif +} - if (dest_mode == 0 && dest_id == 0xff && low_prio) +int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, + struct kvm_lapic_irq *irq) +{ + int i, r = -1; + struct kvm_vcpu *vcpu, *lowest = NULL; + + if (irq->dest_mode == 0 && irq->dest_id == 0xff && + kvm_is_dm_lowest_prio(irq)) printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n"); - bitmap_zero(deliver_bitmask, KVM_MAX_VCPUS); for (i = 0; i < KVM_MAX_VCPUS; i++) { vcpu = kvm->vcpus[i]; if (!vcpu || !kvm_apic_present(vcpu)) continue; - if (!kvm_apic_match_dest(vcpu, src, short_hand, dest_id, - dest_mode)) + if (!kvm_apic_match_dest(vcpu, src, irq->shorthand, + irq->dest_id, irq->dest_mode)) continue; - if (!low_prio) { - __set_bit(i, deliver_bitmask); + if (!kvm_is_dm_lowest_prio(irq)) { + if (r < 0) + r = 0; + r += kvm_apic_set_irq(vcpu, irq); } else { - if (lowest < 0) - lowest = i; - if (kvm_apic_compare_prio(vcpu, kvm->vcpus[lowest]) < 0) - lowest = i; + if (!lowest) + lowest = vcpu; + else if (kvm_apic_compare_prio(vcpu, lowest) < 0) + lowest = vcpu; } } - if (lowest != -1) - __set_bit(lowest, deliver_bitmask); + if (lowest) + r = kvm_apic_set_irq(lowest, irq); + + return r; } static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int level) { - union kvm_ioapic_redirect_entry entry; + struct kvm_lapic_irq irq; - entry.bits = 0; - entry.fields.dest_id = (e->msi.address_lo & + irq.dest_id = (e->msi.address_lo & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT; - entry.fields.vector = (e->msi.data & + irq.vector = (e->msi.data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT; - entry.fields.dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT, - (unsigned long *)&e->msi.address_lo); - entry.fields.trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT, - (unsigned long *)&e->msi.data); - entry.fields.delivery_mode = test_bit( - MSI_DATA_DELIVERY_MODE_SHIFT, - (unsigned long *)&e->msi.data); + irq.dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo; + irq.trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data; + irq.delivery_mode = e->msi.data & 0x700; + irq.level = 1; + irq.shorthand = 0; /* TODO Deal with RH bit of MSI message address */ - return ioapic_deliver_entry(kvm, &entry); + return kvm_irq_delivery_to_apic(kvm, NULL, &irq); } /* This should be called with the kvm->lock mutex held -- cgit v1.2.3-70-g09d2 From e56d532f20c890a06bbe7cd479f4201e3a03cd73 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Thu, 12 Mar 2009 21:45:39 +0800 Subject: KVM: Device assignment framework rework After discussion with Marcelo, we decided to rework device assignment framework together. The old problems are kernel logic is unnecessary complex. So Marcelo suggest to split it into a more elegant way: 1. Split host IRQ assign and guest IRQ assign. And userspace determine the combination. Also discard msi2intx parameter, userspace can specific KVM_DEV_IRQ_HOST_MSI | KVM_DEV_IRQ_GUEST_INTX in assigned_irq->flags to enable MSI to INTx convertion. 2. Split assign IRQ and deassign IRQ. Import two new ioctls: KVM_ASSIGN_DEV_IRQ and KVM_DEASSIGN_DEV_IRQ. This patch also fixed the reversed _IOR vs _IOW in definition(by deprecated the old interface). [avi: replace homemade bitcount() by hweight_long()] Signed-off-by: Marcelo Tosatti Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 1 + include/linux/kvm.h | 26 ++- include/linux/kvm_host.h | 5 - virt/kvm/kvm_main.c | 486 +++++++++++++++++++++++++---------------------- 4 files changed, 276 insertions(+), 242 deletions(-) (limited to 'include') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 43e049a2ccf..41123fc8613 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1026,6 +1026,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_SYNC_MMU: case KVM_CAP_REINJECT_CONTROL: case KVM_CAP_IRQ_INJECT_STATUS: + case KVM_CAP_ASSIGN_DEV_IRQ: r = 1; break; case KVM_CAP_COALESCED_MMIO: diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 640835ed270..644e3a9f47d 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -412,6 +412,7 @@ struct kvm_trace_rec { #ifdef __KVM_HAVE_MSIX #define KVM_CAP_DEVICE_MSIX 28 #endif +#define KVM_CAP_ASSIGN_DEV_IRQ 29 /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */ #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30 @@ -485,8 +486,10 @@ struct kvm_irq_routing { #define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \ struct kvm_assigned_pci_dev) #define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing) +/* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */ #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \ struct kvm_assigned_irq) +#define KVM_ASSIGN_DEV_IRQ _IOW(KVMIO, 0x70, struct kvm_assigned_irq) #define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71) #define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \ struct kvm_assigned_pci_dev) @@ -494,6 +497,7 @@ struct kvm_irq_routing { _IOW(KVMIO, 0x73, struct kvm_assigned_msix_nr) #define KVM_ASSIGN_SET_MSIX_ENTRY \ _IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry) +#define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq) /* * ioctls for vcpu fds @@ -584,6 +588,8 @@ struct kvm_debug_guest { #define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18) #define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19) +#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) + struct kvm_assigned_pci_dev { __u32 assigned_dev_id; __u32 busnr; @@ -594,6 +600,17 @@ struct kvm_assigned_pci_dev { }; }; +#define KVM_DEV_IRQ_HOST_INTX (1 << 0) +#define KVM_DEV_IRQ_HOST_MSI (1 << 1) +#define KVM_DEV_IRQ_HOST_MSIX (1 << 2) + +#define KVM_DEV_IRQ_GUEST_INTX (1 << 8) +#define KVM_DEV_IRQ_GUEST_MSI (1 << 9) +#define KVM_DEV_IRQ_GUEST_MSIX (1 << 10) + +#define KVM_DEV_IRQ_HOST_MASK 0x00ff +#define KVM_DEV_IRQ_GUEST_MASK 0xff00 + struct kvm_assigned_irq { __u32 assigned_dev_id; __u32 host_irq; @@ -609,15 +626,6 @@ struct kvm_assigned_irq { }; }; -#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) - -#define KVM_DEV_IRQ_ASSIGN_MSI_ACTION KVM_DEV_IRQ_ASSIGN_ENABLE_MSI -#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI (1 << 0) - -#define KVM_DEV_IRQ_ASSIGN_MSIX_ACTION (KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX |\ - KVM_DEV_IRQ_ASSIGN_MASK_MSIX) -#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX (1 << 1) -#define KVM_DEV_IRQ_ASSIGN_MASK_MSIX (1 << 2) struct kvm_assigned_msix_nr { __u32 assigned_dev_id; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index fb60f31c4fb..40e49ede8f9 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -339,11 +339,6 @@ struct kvm_assigned_dev_kernel { struct msix_entry *host_msix_entries; int guest_irq; struct kvm_guest_msix_entry *guest_msix_entries; -#define KVM_ASSIGNED_DEV_GUEST_INTX (1 << 0) -#define KVM_ASSIGNED_DEV_GUEST_MSI (1 << 1) -#define KVM_ASSIGNED_DEV_HOST_INTX (1 << 8) -#define KVM_ASSIGNED_DEV_HOST_MSI (1 << 9) -#define KVM_ASSIGNED_DEV_MSIX ((1 << 2) | (1 << 10)) unsigned long irq_requested_type; int irq_source_id; int flags; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 3bed82754a5..792fb7fae0a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -60,9 +61,6 @@ MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); -static int msi2intx = 1; -module_param(msi2intx, bool, 0); - DEFINE_SPINLOCK(kvm_lock); LIST_HEAD(vm_list); @@ -132,7 +130,7 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) * finer-grained lock, update this */ mutex_lock(&kvm->lock); - if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) { + if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { struct kvm_guest_msix_entry *guest_entries = assigned_dev->guest_msix_entries; for (i = 0; i < assigned_dev->entries_nr; i++) { @@ -152,7 +150,7 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, assigned_dev->guest_irq, 1); if (assigned_dev->irq_requested_type & - KVM_ASSIGNED_DEV_GUEST_MSI) { + KVM_DEV_IRQ_GUEST_MSI) { enable_irq(assigned_dev->host_irq); assigned_dev->host_irq_disabled = false; } @@ -166,7 +164,7 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) struct kvm_assigned_dev_kernel *assigned_dev = (struct kvm_assigned_dev_kernel *) dev_id; - if (assigned_dev->irq_requested_type == KVM_ASSIGNED_DEV_MSIX) { + if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { int index = find_index_from_host_irq(assigned_dev, irq); if (index < 0) return IRQ_HANDLED; @@ -204,22 +202,22 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) } } -/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ -static void kvm_free_assigned_irq(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev) +static void deassign_guest_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev) { - if (!irqchip_in_kernel(kvm)) - return; - kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier); + assigned_dev->ack_notifier.gsi = -1; if (assigned_dev->irq_source_id != -1) kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); assigned_dev->irq_source_id = -1; + assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK); +} - if (!assigned_dev->irq_requested_type) - return; - +/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ +static void deassign_host_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev) +{ /* * In kvm_free_device_irq, cancel_work_sync return true if: * 1. work is scheduled, and then cancelled. @@ -236,7 +234,7 @@ static void kvm_free_assigned_irq(struct kvm *kvm, * now, the kvm state is still legal for probably we also have to wait * interrupt_work done. */ - if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) { + if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { int i; for (i = 0; i < assigned_dev->entries_nr; i++) disable_irq_nosync(assigned_dev-> @@ -259,14 +257,41 @@ static void kvm_free_assigned_irq(struct kvm *kvm, free_irq(assigned_dev->host_irq, (void *)assigned_dev); - if (assigned_dev->irq_requested_type & - KVM_ASSIGNED_DEV_HOST_MSI) + if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) pci_disable_msi(assigned_dev->dev); } - assigned_dev->irq_requested_type = 0; + assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK); +} + +static int kvm_deassign_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev, + unsigned long irq_requested_type) +{ + unsigned long guest_irq_type, host_irq_type; + + if (!irqchip_in_kernel(kvm)) + return -EINVAL; + /* no irq assignment to deassign */ + if (!assigned_dev->irq_requested_type) + return -ENXIO; + + host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK; + guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK; + + if (host_irq_type) + deassign_host_irq(kvm, assigned_dev); + if (guest_irq_type) + deassign_guest_irq(kvm, assigned_dev); + + return 0; } +static void kvm_free_assigned_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev) +{ + kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type); +} static void kvm_free_assigned_device(struct kvm *kvm, struct kvm_assigned_dev_kernel @@ -298,256 +323,244 @@ void kvm_free_all_assigned_devices(struct kvm *kvm) } } -static int assigned_device_update_intx(struct kvm *kvm, - struct kvm_assigned_dev_kernel *adev, - struct kvm_assigned_irq *airq) +static int assigned_device_enable_host_intx(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev) { - adev->guest_irq = airq->guest_irq; - adev->ack_notifier.gsi = airq->guest_irq; - - if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_INTX) - return 0; - - if (irqchip_in_kernel(kvm)) { - if (!msi2intx && - (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)) { - free_irq(adev->host_irq, (void *)adev); - pci_disable_msi(adev->dev); - } + dev->host_irq = dev->dev->irq; + /* Even though this is PCI, we don't want to use shared + * interrupts. Sharing host devices with guest-assigned devices + * on the same interrupt line is not a happy situation: there + * are going to be long delays in accepting, acking, etc. + */ + if (request_irq(dev->host_irq, kvm_assigned_dev_intr, + 0, "kvm_assigned_intx_device", (void *)dev)) + return -EIO; + return 0; +} - if (!capable(CAP_SYS_RAWIO)) - return -EPERM; +#ifdef __KVM_HAVE_MSI +static int assigned_device_enable_host_msi(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev) +{ + int r; - if (airq->host_irq) - adev->host_irq = airq->host_irq; - else - adev->host_irq = adev->dev->irq; + if (!dev->dev->msi_enabled) { + r = pci_enable_msi(dev->dev); + if (r) + return r; + } - /* Even though this is PCI, we don't want to use shared - * interrupts. Sharing host devices with guest-assigned devices - * on the same interrupt line is not a happy situation: there - * are going to be long delays in accepting, acking, etc. - */ - if (request_irq(adev->host_irq, kvm_assigned_dev_intr, - 0, "kvm_assigned_intx_device", (void *)adev)) - return -EIO; + dev->host_irq = dev->dev->irq; + if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0, + "kvm_assigned_msi_device", (void *)dev)) { + pci_disable_msi(dev->dev); + return -EIO; } - adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_INTX | - KVM_ASSIGNED_DEV_HOST_INTX; return 0; } +#endif -#ifdef CONFIG_X86 -static int assigned_device_update_msi(struct kvm *kvm, - struct kvm_assigned_dev_kernel *adev, - struct kvm_assigned_irq *airq) +#ifdef __KVM_HAVE_MSIX +static int assigned_device_enable_host_msix(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev) { - int r; + int i, r = -EINVAL; - adev->guest_irq = airq->guest_irq; - if (airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI) { - /* x86 don't care upper address of guest msi message addr */ - adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_MSI; - adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_INTX; - adev->ack_notifier.gsi = -1; - } else if (msi2intx) { - adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_INTX; - adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_MSI; - adev->ack_notifier.gsi = airq->guest_irq; - } else { - /* - * Guest require to disable device MSI, we disable MSI and - * re-enable INTx by default again. Notice it's only for - * non-msi2intx. - */ - assigned_device_update_intx(kvm, adev, airq); - return 0; - } + /* host_msix_entries and guest_msix_entries should have been + * initialized */ + if (dev->entries_nr == 0) + return r; - if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) - return 0; + r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr); + if (r) + return r; - if (irqchip_in_kernel(kvm)) { - if (!msi2intx) { - if (adev->irq_requested_type & - KVM_ASSIGNED_DEV_HOST_INTX) - free_irq(adev->host_irq, (void *)adev); + for (i = 0; i < dev->entries_nr; i++) { + r = request_irq(dev->host_msix_entries[i].vector, + kvm_assigned_dev_intr, 0, + "kvm_assigned_msix_device", + (void *)dev); + /* FIXME: free requested_irq's on failure */ + if (r) + return r; + } - r = pci_enable_msi(adev->dev); - if (r) - return r; - } + return 0; +} - adev->host_irq = adev->dev->irq; - if (request_irq(adev->host_irq, kvm_assigned_dev_intr, 0, - "kvm_assigned_msi_device", (void *)adev)) - return -EIO; - } +#endif - if (!msi2intx) - adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_MSI; +static int assigned_device_enable_guest_intx(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev, + struct kvm_assigned_irq *irq) +{ + dev->guest_irq = irq->guest_irq; + dev->ack_notifier.gsi = irq->guest_irq; + return 0; +} - adev->irq_requested_type |= KVM_ASSIGNED_DEV_HOST_MSI; +#ifdef __KVM_HAVE_MSI +static int assigned_device_enable_guest_msi(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev, + struct kvm_assigned_irq *irq) +{ + dev->guest_irq = irq->guest_irq; + dev->ack_notifier.gsi = -1; return 0; } #endif +#ifdef __KVM_HAVE_MSIX +static int assigned_device_enable_guest_msix(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev, + struct kvm_assigned_irq *irq) +{ + dev->guest_irq = irq->guest_irq; + dev->ack_notifier.gsi = -1; + return 0; +} +#endif + +static int assign_host_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev, + __u32 host_irq_type) +{ + int r = -EEXIST; + + if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK) + return r; + switch (host_irq_type) { + case KVM_DEV_IRQ_HOST_INTX: + r = assigned_device_enable_host_intx(kvm, dev); + break; +#ifdef __KVM_HAVE_MSI + case KVM_DEV_IRQ_HOST_MSI: + r = assigned_device_enable_host_msi(kvm, dev); + break; +#endif #ifdef __KVM_HAVE_MSIX -static int assigned_device_update_msix(struct kvm *kvm, - struct kvm_assigned_dev_kernel *adev, - struct kvm_assigned_irq *airq) -{ - /* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */ - int i, r; - - adev->ack_notifier.gsi = -1; - - if (irqchip_in_kernel(kvm)) { - if (airq->flags & KVM_DEV_IRQ_ASSIGN_MASK_MSIX) - return -ENOTTY; - - if (!(airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX)) { - /* Guest disable MSI-X */ - kvm_free_assigned_irq(kvm, adev); - if (msi2intx) { - pci_enable_msi(adev->dev); - if (adev->dev->msi_enabled) - return assigned_device_update_msi(kvm, - adev, airq); - } - return assigned_device_update_intx(kvm, adev, airq); - } + case KVM_DEV_IRQ_HOST_MSIX: + r = assigned_device_enable_host_msix(kvm, dev); + break; +#endif + default: + r = -EINVAL; + } - /* host_msix_entries and guest_msix_entries should have been - * initialized */ - if (adev->entries_nr == 0) - return -EINVAL; + if (!r) + dev->irq_requested_type |= host_irq_type; - kvm_free_assigned_irq(kvm, adev); + return r; +} - r = pci_enable_msix(adev->dev, adev->host_msix_entries, - adev->entries_nr); - if (r) - return r; +static int assign_guest_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev, + struct kvm_assigned_irq *irq, + unsigned long guest_irq_type) +{ + int id; + int r = -EEXIST; - for (i = 0; i < adev->entries_nr; i++) { - r = request_irq((adev->host_msix_entries + i)->vector, - kvm_assigned_dev_intr, 0, - "kvm_assigned_msix_device", - (void *)adev); - if (r) - return r; - } + if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK) + return r; + + id = kvm_request_irq_source_id(kvm); + if (id < 0) + return id; + + dev->irq_source_id = id; + + switch (guest_irq_type) { + case KVM_DEV_IRQ_GUEST_INTX: + r = assigned_device_enable_guest_intx(kvm, dev, irq); + break; +#ifdef __KVM_HAVE_MSI + case KVM_DEV_IRQ_GUEST_MSI: + r = assigned_device_enable_guest_msi(kvm, dev, irq); + break; +#endif +#ifdef __KVM_HAVE_MSIX + case KVM_DEV_IRQ_GUEST_MSIX: + r = assigned_device_enable_guest_msix(kvm, dev, irq); + break; +#endif + default: + r = -EINVAL; } - adev->irq_requested_type |= KVM_ASSIGNED_DEV_MSIX; + if (!r) { + dev->irq_requested_type |= guest_irq_type; + kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); + } else + kvm_free_irq_source_id(kvm, dev->irq_source_id); - return 0; + return r; } -#endif +/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, - struct kvm_assigned_irq - *assigned_irq) + struct kvm_assigned_irq *assigned_irq) { - int r = 0; + int r = -EINVAL; struct kvm_assigned_dev_kernel *match; - u32 current_flags = 0, changed_flags; + unsigned long host_irq_type, guest_irq_type; - mutex_lock(&kvm->lock); + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + if (!irqchip_in_kernel(kvm)) + return r; + + mutex_lock(&kvm->lock); + r = -ENODEV; match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, assigned_irq->assigned_dev_id); - if (!match) { - mutex_unlock(&kvm->lock); - return -EINVAL; - } - - if (!match->irq_requested_type) { - INIT_WORK(&match->interrupt_work, - kvm_assigned_dev_interrupt_work_handler); - if (irqchip_in_kernel(kvm)) { - /* Register ack nofitier */ - match->ack_notifier.gsi = -1; - match->ack_notifier.irq_acked = - kvm_assigned_dev_ack_irq; - kvm_register_irq_ack_notifier(kvm, - &match->ack_notifier); - - /* Request IRQ source ID */ - r = kvm_request_irq_source_id(kvm); - if (r < 0) - goto out_release; - else - match->irq_source_id = r; - -#ifdef CONFIG_X86 - /* Determine host device irq type, we can know the - * result from dev->msi_enabled */ - if (msi2intx) - pci_enable_msi(match->dev); -#endif - } - } + if (!match) + goto out; - if (match->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) - current_flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX; - else if ((match->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) && - (match->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI)) - current_flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSI; + host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK); + guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK); - changed_flags = assigned_irq->flags ^ current_flags; + r = -EINVAL; + /* can only assign one type at a time */ + if (hweight_long(host_irq_type) > 1) + goto out; + if (hweight_long(guest_irq_type) > 1) + goto out; + if (host_irq_type == 0 && guest_irq_type == 0) + goto out; -#ifdef __KVM_HAVE_MSIX - if (changed_flags & KVM_DEV_IRQ_ASSIGN_MSIX_ACTION) { - r = assigned_device_update_msix(kvm, match, assigned_irq); - if (r) { - printk(KERN_WARNING "kvm: failed to execute " - "MSI-X action!\n"); - goto out_release; - } - } else -#endif - if ((changed_flags & KVM_DEV_IRQ_ASSIGN_MSI_ACTION) || - (msi2intx && match->dev->msi_enabled)) { -#ifdef CONFIG_X86 - r = assigned_device_update_msi(kvm, match, assigned_irq); - if (r) { - printk(KERN_WARNING "kvm: failed to enable " - "MSI device!\n"); - goto out_release; - } -#else - r = -ENOTTY; -#endif - } else if (assigned_irq->host_irq == 0 && match->dev->irq == 0) { - /* Host device IRQ 0 means don't support INTx */ - if (!msi2intx) { - printk(KERN_WARNING - "kvm: wait device to enable MSI!\n"); - r = 0; - } else { - printk(KERN_WARNING - "kvm: failed to enable MSI device!\n"); - r = -ENOTTY; - goto out_release; - } - } else { - /* Non-sharing INTx mode */ - r = assigned_device_update_intx(kvm, match, assigned_irq); - if (r) { - printk(KERN_WARNING "kvm: failed to enable " - "INTx device!\n"); - goto out_release; - } - } + r = 0; + if (host_irq_type) + r = assign_host_irq(kvm, match, host_irq_type); + if (r) + goto out; + if (guest_irq_type) + r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type); +out: mutex_unlock(&kvm->lock); return r; -out_release: +} + +static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, + struct kvm_assigned_irq + *assigned_irq) +{ + int r = -ENODEV; + struct kvm_assigned_dev_kernel *match; + + mutex_lock(&kvm->lock); + + match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + assigned_irq->assigned_dev_id); + if (!match) + goto out; + + r = kvm_deassign_irq(kvm, match, assigned_irq->flags); +out: mutex_unlock(&kvm->lock); - kvm_free_assigned_device(kvm, match); return r; } @@ -565,7 +578,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, assigned_dev->assigned_dev_id); if (match) { /* device already assigned */ - r = -EINVAL; + r = -EEXIST; goto out; } @@ -604,6 +617,9 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, match->dev = dev; match->irq_source_id = -1; match->kvm = kvm; + match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; + INIT_WORK(&match->interrupt_work, + kvm_assigned_dev_interrupt_work_handler); list_add(&match->list, &kvm->arch.assigned_dev_head); @@ -2084,6 +2100,11 @@ static long kvm_vm_ioctl(struct file *filp, break; } case KVM_ASSIGN_IRQ: { + r = -EOPNOTSUPP; + break; + } +#ifdef KVM_CAP_ASSIGN_DEV_IRQ + case KVM_ASSIGN_DEV_IRQ: { struct kvm_assigned_irq assigned_irq; r = -EFAULT; @@ -2094,6 +2115,18 @@ static long kvm_vm_ioctl(struct file *filp, goto out; break; } + case KVM_DEASSIGN_DEV_IRQ: { + struct kvm_assigned_irq assigned_irq; + + r = -EFAULT; + if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) + goto out; + r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq); + if (r) + goto out; + break; + } +#endif #endif #ifdef KVM_CAP_DEVICE_DEASSIGNMENT case KVM_DEASSIGN_PCI_DEVICE: { @@ -2596,9 +2629,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size, kvm_preempt_ops.sched_in = kvm_sched_in; kvm_preempt_ops.sched_out = kvm_sched_out; -#ifndef CONFIG_X86 - msi2intx = 0; -#endif return 0; -- cgit v1.2.3-70-g09d2 From 78646121e9a2fcf7977cc15966420e572a450bc3 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 23 Mar 2009 12:12:11 +0200 Subject: KVM: Fix interrupt unhalting a vcpu when it shouldn't kvm_vcpu_block() unhalts vpu on an interrupt/timer without checking if interrupt window is actually opened. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/ia64/kvm/kvm-ia64.c | 6 ++++++ arch/powerpc/kvm/powerpc.c | 6 ++++++ arch/s390/kvm/interrupt.c | 6 ++++++ arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/svm.c | 10 ++++++++++ arch/x86/kvm/vmx.c | 8 +++++++- arch/x86/kvm/x86.c | 5 +++++ include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 3 ++- 9 files changed, 44 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index d2a90fd505b..3bf0a345224 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1963,6 +1963,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) return 0; } +int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) +{ + /* do real check here */ + return 1; +} + int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) { return vcpu->arch.timer_fired; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 9057335fdc6..2cf915e51e7 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -41,6 +41,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v) return !!(v->arch.pending_exceptions); } +int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) +{ + /* do real check here */ + return 1; +} + int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { return !(v->arch.msr & MSR_WE); diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 0189356fe20..4ed4c3a1148 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -318,6 +318,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) return rc; } +int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) +{ + /* do real check here */ + return 1; +} + int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) { return 0; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 46276273a1a..8351c4d00ac 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -521,7 +521,7 @@ struct kvm_x86_ops { void (*inject_pending_irq)(struct kvm_vcpu *vcpu); void (*inject_pending_vectors)(struct kvm_vcpu *vcpu, struct kvm_run *run); - + int (*interrupt_allowed)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*get_tdp_level)(void); int (*get_mt_mask_shift)(void); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index aa528dbad07..de741043c5b 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2270,6 +2270,15 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu) vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK; } +static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + struct vmcb *vmcb = svm->vmcb; + return (vmcb->save.rflags & X86_EFLAGS_IF) && + !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && + (svm->vcpu.arch.hflags & HF_GIF_MASK); +} + static void svm_intr_assist(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -2649,6 +2658,7 @@ static struct kvm_x86_ops svm_x86_ops = { .exception_injected = svm_exception_injected, .inject_pending_irq = svm_intr_assist, .inject_pending_vectors = do_interrupt_requests, + .interrupt_allowed = svm_interrupt_allowed, .set_tss_addr = svm_set_tss_addr, .get_tdp_level = get_npt_level, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index da6461d5dc8..b9e06b07aca 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2490,6 +2490,12 @@ static void vmx_update_window_states(struct kvm_vcpu *vcpu) GUEST_INTR_STATE_MOV_SS))); } +static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) +{ + vmx_update_window_states(vcpu); + return vcpu->arch.interrupt_window_open; +} + static void do_interrupt_requests(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { @@ -3691,7 +3697,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .exception_injected = vmx_exception_injected, .inject_pending_irq = vmx_intr_assist, .inject_pending_vectors = do_interrupt_requests, - + .interrupt_allowed = vmx_interrupt_allowed, .set_tss_addr = vmx_set_tss_addr, .get_tdp_level = get_ept_level, .get_mt_mask_shift = vmx_get_mt_mask_shift, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8fca7a4e95a..5bbcad34537 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4475,3 +4475,8 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu) smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0); put_cpu(); } + +int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) +{ + return kvm_x86_ops->interrupt_allowed(vcpu); +} diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 40e49ede8f9..72d56844f38 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -298,6 +298,7 @@ int kvm_arch_hardware_setup(void); void kvm_arch_hardware_unsetup(void); void kvm_arch_check_processor_compat(void *rtn); int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); +int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); void kvm_free_physmem(struct kvm *kvm); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a1a4272fa57..63d5fa2bc84 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1610,7 +1610,8 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) for (;;) { prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); - if (kvm_cpu_has_interrupt(vcpu) || + if ((kvm_arch_interrupt_allowed(vcpu) && + kvm_cpu_has_interrupt(vcpu)) || kvm_arch_vcpu_runnable(vcpu)) { set_bit(KVM_REQ_UNHALT, &vcpu->requests); break; -- cgit v1.2.3-70-g09d2 From 2f8b9ee14eb439008e0c5131116ea6baa40dba50 Mon Sep 17 00:00:00 2001 From: nathan binkert Date: Fri, 27 Mar 2009 21:53:05 -0700 Subject: KVM: Make kvm header C++ friendly Two things needed fixing: 1) g++ does not allow a named structure type within an anonymous union and 2) Avoid name clash between two padding fields within the same struct by giving them different names as is done elsewhere in the header. Signed-off-by: Nathan Binkert Signed-off-by: Avi Kivity --- include/linux/kvm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 644e3a9f47d..3db5d8d3748 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -119,7 +119,7 @@ struct kvm_run { __u32 error_code; } ex; /* KVM_EXIT_IO */ - struct kvm_io { + struct { #define KVM_EXIT_IO_IN 0 #define KVM_EXIT_IO_OUT 1 __u8 direction; @@ -224,10 +224,10 @@ struct kvm_interrupt { /* for KVM_GET_DIRTY_LOG */ struct kvm_dirty_log { __u32 slot; - __u32 padding; + __u32 padding1; union { void __user *dirty_bitmap; /* one bit per page */ - __u64 padding; + __u64 padding2; }; }; -- cgit v1.2.3-70-g09d2 From 522c68c4416de3cd3e11a9ff10d58e776a69ae1e Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Mon, 27 Apr 2009 20:35:43 +0800 Subject: KVM: Enable snooping control for supported hardware Memory aliases with different memory type is a problem for guest. For the guest without assigned device, the memory type of guest memory would always been the same as host(WB); but for the assigned device, some part of memory may be used as DMA and then set to uncacheable memory type(UC/WC), which would be a conflict of host memory type then be a potential issue. Snooping control can guarantee the cache correctness of memory go through the DMA engine of VT-d. [avi: fix build on ia64] Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/ia64/include/asm/kvm_host.h | 1 + arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/vmx.c | 19 +++++++++++++++++-- include/linux/kvm_host.h | 3 +++ virt/kvm/iommu.c | 27 ++++++++++++++++++++++++--- 5 files changed, 46 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index 589536fa799..5f43697aed3 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -474,6 +474,7 @@ struct kvm_arch { struct list_head assigned_dev_head; struct iommu_domain *iommu_domain; + int iommu_flags; struct hlist_head irq_ack_notifier_list; unsigned long irq_sources_bitmap; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8a6f6b643df..253d8f669cf 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -393,6 +393,7 @@ struct kvm_arch{ struct list_head active_mmu_pages; struct list_head assigned_dev_head; struct iommu_domain *iommu_domain; + int iommu_flags; struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 59b080c262e..e8a5649f9c1 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3581,11 +3581,26 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) { u64 ret; + /* For VT-d and EPT combination + * 1. MMIO: always map as UC + * 2. EPT with VT-d: + * a. VT-d without snooping control feature: can't guarantee the + * result, try to trust guest. + * b. VT-d with snooping control feature: snooping control feature of + * VT-d engine can guarantee the cache correctness. Just set it + * to WB to keep consistent with host. So the same as item 3. + * 3. EPT without VT-d: always map as WB and set IGMT=1 to keep + * consistent with host MTRR + */ if (is_mmio) ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT; + else if (vcpu->kvm->arch.iommu_domain && + !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY)) + ret = kvm_get_guest_memory_type(vcpu, gfn) << + VMX_EPT_MT_EPTE_SHIFT; else - ret = (kvm_get_guest_memory_type(vcpu, gfn) << - VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IGMT_BIT; + ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) + | VMX_EPT_IGMT_BIT; return ret; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 72d56844f38..bdce8e1303c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -367,6 +367,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian); int kvm_request_irq_source_id(struct kvm *kvm); void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); +/* For vcpu->arch.iommu_flags */ +#define KVM_IOMMU_CACHE_COHERENCY 0x1 + #ifdef CONFIG_IOMMU_API int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, unsigned long npages); diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index 4c403750360..15147583abd 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -39,11 +39,16 @@ int kvm_iommu_map_pages(struct kvm *kvm, pfn_t pfn; int i, r = 0; struct iommu_domain *domain = kvm->arch.iommu_domain; + int flags; /* check if iommu exists and in use */ if (!domain) return 0; + flags = IOMMU_READ | IOMMU_WRITE; + if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY) + flags |= IOMMU_CACHE; + for (i = 0; i < npages; i++) { /* check if already mapped */ if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) @@ -53,8 +58,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, r = iommu_map_range(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn), - PAGE_SIZE, - IOMMU_READ | IOMMU_WRITE); + PAGE_SIZE, flags); if (r) { printk(KERN_ERR "kvm_iommu_map_address:" "iommu failed to map pfn=%lx\n", pfn); @@ -88,7 +92,7 @@ int kvm_assign_device(struct kvm *kvm, { struct pci_dev *pdev = NULL; struct iommu_domain *domain = kvm->arch.iommu_domain; - int r; + int r, last_flags; /* check if iommu exists and in use */ if (!domain) @@ -107,12 +111,29 @@ int kvm_assign_device(struct kvm *kvm, return r; } + last_flags = kvm->arch.iommu_flags; + if (iommu_domain_has_cap(kvm->arch.iommu_domain, + IOMMU_CAP_CACHE_COHERENCY)) + kvm->arch.iommu_flags |= KVM_IOMMU_CACHE_COHERENCY; + + /* Check if need to update IOMMU page table for guest memory */ + if ((last_flags ^ kvm->arch.iommu_flags) == + KVM_IOMMU_CACHE_COHERENCY) { + kvm_iommu_unmap_memslots(kvm); + r = kvm_iommu_map_memslots(kvm); + if (r) + goto out_unmap; + } + printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n", assigned_dev->host_busnr, PCI_SLOT(assigned_dev->host_devfn), PCI_FUNC(assigned_dev->host_devfn)); return 0; +out_unmap: + kvm_iommu_unmap_memslots(kvm); + return r; } int kvm_deassign_device(struct kvm *kvm, -- cgit v1.2.3-70-g09d2 From 32f8840064d88cc3f6e85203aec7b6b57bebcb97 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 7 May 2009 17:55:12 -0300 Subject: KVM: use smp_send_reschedule in kvm_vcpu_kick KVM uses a function call IPI to cause the exit of a guest running on a physical cpu. For virtual interrupt notification there is no need to wait on IPI receival, or to execute any function. This is exactly what the reschedule IPI does, without the overhead of function IPI. So use it instead of smp_call_function_single in kvm_vcpu_kick. Also change the "guest_mode" variable to a bit in vcpu->requests, and use that to collapse multiple IPI's that would be issued between the first one and zeroing of guest mode. This allows kvm_vcpu_kick to called with interrupts disabled. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/ia64/kernel/irq_ia64.c | 3 +++ arch/ia64/kvm/kvm-ia64.c | 22 ++++++++-------------- arch/x86/kernel/smp.c | 3 +++ arch/x86/kvm/x86.c | 36 +++++++++++------------------------- include/linux/kvm_host.h | 2 +- 5 files changed, 26 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index acc4d19ae62..b448197728b 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -610,6 +610,9 @@ static struct irqaction ipi_irqaction = { .name = "IPI" }; +/* + * KVM uses this interrupt to force a cpu out of guest mode + */ static struct irqaction resched_irqaction = { .handler = dummy_handler, .flags = IRQF_DISABLED, diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index be4413e1f43..80c57b0a21c 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -668,7 +668,7 @@ again: host_ctx = kvm_get_host_context(vcpu); guest_ctx = kvm_get_guest_context(vcpu); - vcpu->guest_mode = 1; + clear_bit(KVM_REQ_KICK, &vcpu->requests); r = kvm_vcpu_pre_transition(vcpu); if (r < 0) @@ -685,7 +685,7 @@ again: kvm_vcpu_post_transition(vcpu); vcpu->arch.launched = 1; - vcpu->guest_mode = 0; + set_bit(KVM_REQ_KICK, &vcpu->requests); local_irq_enable(); /* @@ -1879,24 +1879,18 @@ void kvm_arch_hardware_unsetup(void) { } -static void vcpu_kick_intr(void *info) -{ -#ifdef DEBUG - struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info; - printk(KERN_DEBUG"vcpu_kick_intr %p \n", vcpu); -#endif -} - void kvm_vcpu_kick(struct kvm_vcpu *vcpu) { - int ipi_pcpu = vcpu->cpu; - int cpu = get_cpu(); + int me; + int cpu = vcpu->cpu; if (waitqueue_active(&vcpu->wq)) wake_up_interruptible(&vcpu->wq); - if (vcpu->guest_mode && cpu != ipi_pcpu) - smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0); + me = get_cpu(); + if (cpu != me && (unsigned) cpu < nr_cpu_ids && cpu_online(cpu)) + if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests)) + smp_send_reschedule(cpu); put_cpu(); } diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 13f33ea8cca..3b2e55e8ad2 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -172,6 +172,9 @@ void smp_reschedule_interrupt(struct pt_regs *regs) { ack_APIC_irq(); inc_irq_stat(irq_resched_count); + /* + * KVM uses this interrupt to force a cpu out of guest mode + */ } void smp_call_function_interrupt(struct pt_regs *regs) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 75927700a26..3c4c327490a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3230,6 +3230,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) local_irq_disable(); + clear_bit(KVM_REQ_KICK, &vcpu->requests); + smp_mb__after_clear_bit(); + if (vcpu->requests || need_resched() || signal_pending(current)) { local_irq_enable(); preempt_enable(); @@ -3237,13 +3240,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) goto out; } - vcpu->guest_mode = 1; - /* - * Make sure that guest_mode assignment won't happen after - * testing the pending IRQ vector bitmap. - */ - smp_wmb(); - if (vcpu->arch.exception.pending) __queue_exception(vcpu); else @@ -3288,7 +3284,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) set_debugreg(vcpu->arch.host_dr6, 6); set_debugreg(vcpu->arch.host_dr7, 7); - vcpu->guest_mode = 0; + set_bit(KVM_REQ_KICK, &vcpu->requests); local_irq_enable(); ++vcpu->stat.exits; @@ -4571,30 +4567,20 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) || vcpu->arch.nmi_pending; } -static void vcpu_kick_intr(void *info) -{ -#ifdef DEBUG - struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info; - printk(KERN_DEBUG "vcpu_kick_intr %p \n", vcpu); -#endif -} - void kvm_vcpu_kick(struct kvm_vcpu *vcpu) { - int ipi_pcpu = vcpu->cpu; - int cpu; + int me; + int cpu = vcpu->cpu; if (waitqueue_active(&vcpu->wq)) { wake_up_interruptible(&vcpu->wq); ++vcpu->stat.halt_wakeup; } - /* - * We may be called synchronously with irqs disabled in guest mode, - * So need not to call smp_call_function_single() in that case. - */ - cpu = get_cpu(); - if (vcpu->guest_mode && vcpu->cpu != cpu) - smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0); + + me = get_cpu(); + if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) + if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests)) + smp_send_reschedule(cpu); put_cpu(); } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index bdce8e1303c..16181628419 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -38,6 +38,7 @@ #define KVM_REQ_UNHALT 6 #define KVM_REQ_MMU_SYNC 7 #define KVM_REQ_KVMCLOCK_UPDATE 8 +#define KVM_REQ_KICK 9 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 @@ -72,7 +73,6 @@ struct kvm_vcpu { struct mutex mutex; int cpu; struct kvm_run *run; - int guest_mode; unsigned long requests; unsigned long guest_debug; int fpu_active; -- cgit v1.2.3-70-g09d2 From 547de29e5b1662deb05b5f90917902dc0e9ac182 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 7 May 2009 17:55:13 -0300 Subject: KVM: protect assigned dev workqueue, int handler and irq acker kvm_assigned_dev_ack_irq is vulnerable to a race condition with the interrupt handler function. It does: if (dev->host_irq_disabled) { enable_irq(dev->host_irq); dev->host_irq_disabled = false; } If an interrupt triggers before the host->dev_irq_disabled assignment, it will disable the interrupt and set dev->host_irq_disabled to true. On return to kvm_assigned_dev_ack_irq, dev->host_irq_disabled is set to false, and the next kvm_assigned_dev_ack_irq call will fail to reenable it. Other than that, having the interrupt handler and work handlers run in parallel sounds like asking for trouble (could not spot any obvious problem, but better not have to, its fragile). CC: sheng.yang@intel.com Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 13 ++++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 16181628419..aacc5449f58 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -345,6 +345,7 @@ struct kvm_assigned_dev_kernel { int flags; struct pci_dev *dev; struct kvm *kvm; + spinlock_t assigned_dev_lock; }; struct kvm_irq_mask_notifier { diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 29c0afb064d..687d113a3e5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -130,6 +131,7 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) * finer-grained lock, update this */ mutex_lock(&kvm->lock); + spin_lock_irq(&assigned_dev->assigned_dev_lock); if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { struct kvm_guest_msix_entry *guest_entries = assigned_dev->guest_msix_entries; @@ -156,18 +158,21 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) } } + spin_unlock_irq(&assigned_dev->assigned_dev_lock); mutex_unlock(&assigned_dev->kvm->lock); } static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) { + unsigned long flags; struct kvm_assigned_dev_kernel *assigned_dev = (struct kvm_assigned_dev_kernel *) dev_id; + spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags); if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { int index = find_index_from_host_irq(assigned_dev, irq); if (index < 0) - return IRQ_HANDLED; + goto out; assigned_dev->guest_msix_entries[index].flags |= KVM_ASSIGNED_MSIX_PENDING; } @@ -177,6 +182,8 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) disable_irq_nosync(irq); assigned_dev->host_irq_disabled = true; +out: + spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags); return IRQ_HANDLED; } @@ -184,6 +191,7 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) { struct kvm_assigned_dev_kernel *dev; + unsigned long flags; if (kian->gsi == -1) return; @@ -196,10 +204,12 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) /* The guest irq may be shared so this ack may be * from another device. */ + spin_lock_irqsave(&dev->assigned_dev_lock, flags); if (dev->host_irq_disabled) { enable_irq(dev->host_irq); dev->host_irq_disabled = false; } + spin_unlock_irqrestore(&dev->assigned_dev_lock, flags); } static void deassign_guest_irq(struct kvm *kvm, @@ -615,6 +625,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, match->host_devfn = assigned_dev->devfn; match->flags = assigned_dev->flags; match->dev = dev; + spin_lock_init(&match->assigned_dev_lock); match->irq_source_id = -1; match->kvm = kvm; match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; -- cgit v1.2.3-70-g09d2 From 04dce7d9d429ea5ea04e9432d1726c930f4d67da Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 10 Jun 2009 16:59:46 +1000 Subject: spinlock: Add missing __raw_spin_lock_flags() stub for UP This was only defined with CONFIG_DEBUG_SPINLOCK set, but some obscure arch/powerpc code wants it always. Signed-off-by: Benjamin Herrenschmidt Acked-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/spinlock_up.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h index 938234c4a99..d4841ed8215 100644 --- a/include/linux/spinlock_up.h +++ b/include/linux/spinlock_up.h @@ -60,6 +60,7 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock) #define __raw_spin_is_locked(lock) ((void)(lock), 0) /* for sched.c and kernel_lock.c: */ # define __raw_spin_lock(lock) do { (void)(lock); } while (0) +# define __raw_spin_lock_flags(lock, flags) do { (void)(lock); } while (0) # define __raw_spin_unlock(lock) do { (void)(lock); } while (0) # define __raw_spin_trylock(lock) ({ (void)(lock); 1; }) #endif /* DEBUG_SPINLOCK */ -- cgit v1.2.3-70-g09d2 From f1db457ce6e2f63cb01022f58c0c023838958bd1 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 10 Jun 2009 10:06:24 +0800 Subject: tracing/events: convert block trace points to TRACE_EVENT(), fix !CONFIG_BLOCK Fix building failures when CONFIG_BLOCK == n. Signed-off-by: Li Zefan LKML-Reference: <4A2F1520.8020003@cn.fujitsu.com> Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/blktrace_api.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index c7ec31dd04c..7e4350ece0f 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -218,7 +218,7 @@ static inline int blk_trace_init_sysfs(struct device *dev) #endif /* CONFIG_BLK_DEV_IO_TRACE */ -#ifdef CONFIG_EVENT_TRACING +#if defined(CONFIG_EVENT_TRACING) && defined(CONFIG_BLOCK) static inline int blk_cmd_buf_len(struct request *rq) { @@ -229,7 +229,7 @@ extern void blk_dump_cmd(char *buf, struct request *rq); extern void blk_fill_rwbs(char *rwbs, u32 rw, int bytes); extern void blk_fill_rwbs_rq(char *rwbs, struct request *rq); -#endif /* CONFIG_EVENT_TRACING */ +#endif /* CONFIG_EVENT_TRACING && CONFIG_BLOCK */ #endif /* __KERNEL__ */ #endif -- cgit v1.2.3-70-g09d2 From 440f0d588555892601cfe511728a0fc0c8204063 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 10 Jun 2009 14:32:47 +0200 Subject: netfilter: nf_conntrack: use per-conntrack locks for protocol data Introduce per-conntrack locks and use them instead of the global protocol locks to avoid contention. Especially tcp_lock shows up very high in profiles on larger machines. This will also allow to simplify the upcoming reliable event delivery patches. Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack.h | 2 ++ include/net/netfilter/nf_conntrack_l4proto.h | 4 +-- net/netfilter/nf_conntrack_core.c | 1 + net/netfilter/nf_conntrack_netlink.c | 4 +-- net/netfilter/nf_conntrack_proto_dccp.c | 24 ++++++++--------- net/netfilter/nf_conntrack_proto_gre.c | 3 +-- net/netfilter/nf_conntrack_proto_sctp.c | 27 +++++++++---------- net/netfilter/nf_conntrack_proto_tcp.c | 39 +++++++++++++--------------- 8 files changed, 49 insertions(+), 55 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 2b877374242..ecc79f95907 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -93,6 +93,8 @@ struct nf_conn { plus 1 for any connection(s) we are `master' for */ struct nf_conntrack ct_general; + spinlock_t lock; + /* XXX should I move this to the tail ? - Y.K */ /* These are my tuples; original and reply */ struct nf_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX]; diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index ba32ed7bdab..3767fb41e54 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -59,11 +59,11 @@ struct nf_conntrack_l4proto const struct nf_conntrack_tuple *); /* Print out the private part of the conntrack. */ - int (*print_conntrack)(struct seq_file *s, const struct nf_conn *); + int (*print_conntrack)(struct seq_file *s, struct nf_conn *); /* convert protoinfo to nfnetink attributes */ int (*to_nlattr)(struct sk_buff *skb, struct nlattr *nla, - const struct nf_conn *ct); + struct nf_conn *ct); /* Calculate protoinfo nlattr size */ int (*nlattr_size)(void); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index b54c23475e9..edf95695e0a 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -519,6 +519,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, return ERR_PTR(-ENOMEM); } + spin_lock_init(&ct->lock); atomic_set(&ct->ct_general.use, 1); ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 4448b062de0..4e503ada572 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -143,7 +143,7 @@ nla_put_failure: } static inline int -ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct nf_conn *ct) +ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct) { struct nf_conntrack_l4proto *l4proto; struct nlattr *nest_proto; @@ -347,7 +347,7 @@ nla_put_failure: static int ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, - int event, const struct nf_conn *ct) + int event, struct nf_conn *ct) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 11801c43c8c..6b08d327796 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -24,8 +24,6 @@ #include #include -static DEFINE_RWLOCK(dccp_lock); - /* Timeouts are based on values from RFC4340: * * - REQUEST: @@ -491,7 +489,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, return NF_ACCEPT; } - write_lock_bh(&dccp_lock); + spin_lock_bh(&ct->lock); role = ct->proto.dccp.role[dir]; old_state = ct->proto.dccp.state; @@ -535,13 +533,13 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, ct->proto.dccp.last_dir = dir; ct->proto.dccp.last_pkt = type; - write_unlock_bh(&dccp_lock); + spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_DCCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_dccp: invalid packet ignored "); return NF_ACCEPT; case CT_DCCP_INVALID: - write_unlock_bh(&dccp_lock); + spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_DCCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_dccp: invalid state transition "); @@ -551,7 +549,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, ct->proto.dccp.last_dir = dir; ct->proto.dccp.last_pkt = type; ct->proto.dccp.state = new_state; - write_unlock_bh(&dccp_lock); + spin_unlock_bh(&ct->lock); dn = dccp_pernet(net); nf_ct_refresh_acct(ct, ctinfo, skb, dn->dccp_timeout[new_state]); @@ -617,18 +615,18 @@ static int dccp_print_tuple(struct seq_file *s, ntohs(tuple->dst.u.dccp.port)); } -static int dccp_print_conntrack(struct seq_file *s, const struct nf_conn *ct) +static int dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct) { return seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]); } #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, - const struct nf_conn *ct) + struct nf_conn *ct) { struct nlattr *nest_parms; - read_lock_bh(&dccp_lock); + spin_lock_bh(&ct->lock); nest_parms = nla_nest_start(skb, CTA_PROTOINFO_DCCP | NLA_F_NESTED); if (!nest_parms) goto nla_put_failure; @@ -638,11 +636,11 @@ static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, NLA_PUT_BE64(skb, CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ, cpu_to_be64(ct->proto.dccp.handshake_seq)); nla_nest_end(skb, nest_parms); - read_unlock_bh(&dccp_lock); + spin_unlock_bh(&ct->lock); return 0; nla_put_failure: - read_unlock_bh(&dccp_lock); + spin_unlock_bh(&ct->lock); return -1; } @@ -673,7 +671,7 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) return -EINVAL; } - write_lock_bh(&dccp_lock); + spin_lock_bh(&ct->lock); ct->proto.dccp.state = nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]); if (nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) == CT_DCCP_ROLE_CLIENT) { ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT; @@ -686,7 +684,7 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) ct->proto.dccp.handshake_seq = be64_to_cpu(nla_get_be64(tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ])); } - write_unlock_bh(&dccp_lock); + spin_unlock_bh(&ct->lock); return 0; } diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 117b80112fc..175a28c9616 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -219,8 +219,7 @@ static int gre_print_tuple(struct seq_file *s, } /* print private data for conntrack */ -static int gre_print_conntrack(struct seq_file *s, - const struct nf_conn *ct) +static int gre_print_conntrack(struct seq_file *s, struct nf_conn *ct) { return seq_printf(s, "timeout=%u, stream_timeout=%u ", (ct->proto.gre.timeout / HZ), diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 101b4ad9e81..c10e6f36e31 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -25,9 +25,6 @@ #include #include -/* Protects ct->proto.sctp */ -static DEFINE_RWLOCK(sctp_lock); - /* FIXME: Examine ipfilter's timeouts and conntrack transitions more closely. They're more complex. --RR @@ -164,13 +161,13 @@ static int sctp_print_tuple(struct seq_file *s, } /* Print out the private part of the conntrack. */ -static int sctp_print_conntrack(struct seq_file *s, const struct nf_conn *ct) +static int sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct) { enum sctp_conntrack state; - read_lock_bh(&sctp_lock); + spin_lock_bh(&ct->lock); state = ct->proto.sctp.state; - read_unlock_bh(&sctp_lock); + spin_unlock_bh(&ct->lock); return seq_printf(s, "%s ", sctp_conntrack_names[state]); } @@ -318,7 +315,7 @@ static int sctp_packet(struct nf_conn *ct, } old_state = new_state = SCTP_CONNTRACK_NONE; - write_lock_bh(&sctp_lock); + spin_lock_bh(&ct->lock); for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { /* Special cases of Verification tag check (Sec 8.5.1) */ if (sch->type == SCTP_CID_INIT) { @@ -371,7 +368,7 @@ static int sctp_packet(struct nf_conn *ct, if (old_state != new_state) nf_conntrack_event_cache(IPCT_PROTOINFO, ct); } - write_unlock_bh(&sctp_lock); + spin_unlock_bh(&ct->lock); nf_ct_refresh_acct(ct, ctinfo, skb, sctp_timeouts[new_state]); @@ -386,7 +383,7 @@ static int sctp_packet(struct nf_conn *ct, return NF_ACCEPT; out_unlock: - write_unlock_bh(&sctp_lock); + spin_unlock_bh(&ct->lock); out: return -NF_ACCEPT; } @@ -469,11 +466,11 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb, #include static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, - const struct nf_conn *ct) + struct nf_conn *ct) { struct nlattr *nest_parms; - read_lock_bh(&sctp_lock); + spin_lock_bh(&ct->lock); nest_parms = nla_nest_start(skb, CTA_PROTOINFO_SCTP | NLA_F_NESTED); if (!nest_parms) goto nla_put_failure; @@ -488,14 +485,14 @@ static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, CTA_PROTOINFO_SCTP_VTAG_REPLY, ct->proto.sctp.vtag[IP_CT_DIR_REPLY]); - read_unlock_bh(&sctp_lock); + spin_unlock_bh(&ct->lock); nla_nest_end(skb, nest_parms); return 0; nla_put_failure: - read_unlock_bh(&sctp_lock); + spin_unlock_bh(&ct->lock); return -1; } @@ -527,13 +524,13 @@ static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct) !tb[CTA_PROTOINFO_SCTP_VTAG_REPLY]) return -EINVAL; - write_lock_bh(&sctp_lock); + spin_lock_bh(&ct->lock); ct->proto.sctp.state = nla_get_u8(tb[CTA_PROTOINFO_SCTP_STATE]); ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL]); ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_REPLY]); - write_unlock_bh(&sctp_lock); + spin_unlock_bh(&ct->lock); return 0; } diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index b7e8a825efe..5c5739c741f 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -29,9 +29,6 @@ #include #include -/* Protects ct->proto.tcp */ -static DEFINE_RWLOCK(tcp_lock); - /* "Be conservative in what you do, be liberal in what you accept from others." If it's non-zero, we mark only out of window RST segments as INVALID. */ @@ -309,13 +306,13 @@ static int tcp_print_tuple(struct seq_file *s, } /* Print out the private part of the conntrack. */ -static int tcp_print_conntrack(struct seq_file *s, const struct nf_conn *ct) +static int tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct) { enum tcp_conntrack state; - read_lock_bh(&tcp_lock); + spin_lock_bh(&ct->lock); state = ct->proto.tcp.state; - read_unlock_bh(&tcp_lock); + spin_unlock_bh(&ct->lock); return seq_printf(s, "%s ", tcp_conntrack_names[state]); } @@ -725,14 +722,14 @@ void nf_conntrack_tcp_update(const struct sk_buff *skb, end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph); - write_lock_bh(&tcp_lock); + spin_lock_bh(&ct->lock); /* * We have to worry for the ack in the reply packet only... */ if (after(end, ct->proto.tcp.seen[dir].td_end)) ct->proto.tcp.seen[dir].td_end = end; ct->proto.tcp.last_end = end; - write_unlock_bh(&tcp_lock); + spin_unlock_bh(&ct->lock); pr_debug("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i " "receiver end=%u maxend=%u maxwin=%u scale=%i\n", sender->td_end, sender->td_maxend, sender->td_maxwin, @@ -841,7 +838,7 @@ static int tcp_packet(struct nf_conn *ct, th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); BUG_ON(th == NULL); - write_lock_bh(&tcp_lock); + spin_lock_bh(&ct->lock); old_state = ct->proto.tcp.state; dir = CTINFO2DIR(ctinfo); index = get_conntrack_index(th); @@ -871,7 +868,7 @@ static int tcp_packet(struct nf_conn *ct, && ct->proto.tcp.last_index == TCP_RST_SET)) { /* Attempt to reopen a closed/aborted connection. * Delete this connection and look up again. */ - write_unlock_bh(&tcp_lock); + spin_unlock_bh(&ct->lock); /* Only repeat if we can actually remove the timer. * Destruction may already be in progress in process @@ -907,7 +904,7 @@ static int tcp_packet(struct nf_conn *ct, * that the client cannot but retransmit its SYN and * thus initiate a clean new session. */ - write_unlock_bh(&tcp_lock); + spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: killing out of sync session "); @@ -920,7 +917,7 @@ static int tcp_packet(struct nf_conn *ct, ct->proto.tcp.last_end = segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th); - write_unlock_bh(&tcp_lock); + spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid packet ignored "); @@ -929,7 +926,7 @@ static int tcp_packet(struct nf_conn *ct, /* Invalid packet */ pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", dir, get_conntrack_index(th), old_state); - write_unlock_bh(&tcp_lock); + spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid state "); @@ -960,7 +957,7 @@ static int tcp_packet(struct nf_conn *ct, if (!tcp_in_window(ct, &ct->proto.tcp, dir, index, skb, dataoff, th, pf)) { - write_unlock_bh(&tcp_lock); + spin_unlock_bh(&ct->lock); return -NF_ACCEPT; } in_window: @@ -989,7 +986,7 @@ static int tcp_packet(struct nf_conn *ct, timeout = nf_ct_tcp_timeout_unacknowledged; else timeout = tcp_timeouts[new_state]; - write_unlock_bh(&tcp_lock); + spin_unlock_bh(&ct->lock); if (new_state != old_state) nf_conntrack_event_cache(IPCT_PROTOINFO, ct); @@ -1106,12 +1103,12 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, #include static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, - const struct nf_conn *ct) + struct nf_conn *ct) { struct nlattr *nest_parms; struct nf_ct_tcp_flags tmp = {}; - read_lock_bh(&tcp_lock); + spin_lock_bh(&ct->lock); nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED); if (!nest_parms) goto nla_put_failure; @@ -1131,14 +1128,14 @@ static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, tmp.flags = ct->proto.tcp.seen[1].flags; NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY, sizeof(struct nf_ct_tcp_flags), &tmp); - read_unlock_bh(&tcp_lock); + spin_unlock_bh(&ct->lock); nla_nest_end(skb, nest_parms); return 0; nla_put_failure: - read_unlock_bh(&tcp_lock); + spin_unlock_bh(&ct->lock); return -1; } @@ -1169,7 +1166,7 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct) nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX) return -EINVAL; - write_lock_bh(&tcp_lock); + spin_lock_bh(&ct->lock); if (tb[CTA_PROTOINFO_TCP_STATE]) ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]); @@ -1196,7 +1193,7 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct) ct->proto.tcp.seen[1].td_scale = nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]); } - write_unlock_bh(&tcp_lock); + spin_unlock_bh(&ct->lock); return 0; } -- cgit v1.2.3-70-g09d2 From feb6118d4e76bc5685909426c2dcc90fdb7ba05b Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 24 May 2009 20:00:39 +0300 Subject: [SCSI] libosd: OSD2r05: Additional command enums Add all constant definitions of new OSD commands added in revision 4 & 5. Mainly for creating snapshots and clones. Signed-off-by: Boaz Harrosh Signed-off-by: James Bottomley --- include/scsi/osd_protocol.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/scsi/osd_protocol.h b/include/scsi/osd_protocol.h index 62b2ab8c69d..2cc8e8b1cc1 100644 --- a/include/scsi/osd_protocol.h +++ b/include/scsi/osd_protocol.h @@ -303,7 +303,15 @@ enum osd_service_actions { OSD_ACT_V2(REMOVE_MEMBER_OBJECTS, 0x21) OSD_ACT_V2(GET_MEMBER_ATTRIBUTES, 0x22) OSD_ACT_V2(SET_MEMBER_ATTRIBUTES, 0x23) + + OSD_ACT_V2(CREATE_CLONE, 0x28) + OSD_ACT_V2(CREATE_SNAPSHOT, 0x29) + OSD_ACT_V2(DETACH_CLONE, 0x2A) + OSD_ACT_V2(REFRESH_SNAPSHOT_CLONE, 0x2B) + OSD_ACT_V2(RESTORE_PARTITION_FROM_SNAPSHOT, 0x2C) + OSD_ACT_V2(READ_MAP, 0x31) + OSD_ACT_V2(READ_MAPS_COMPARE, 0x32) OSD_ACT_V1_V2(PERFORM_SCSI_COMMAND, 0x8F7E, 0x8F7C) OSD_ACT_V1_V2(SCSI_TASK_MANAGEMENT, 0x8F7F, 0x8F7D) -- cgit v1.2.3-70-g09d2 From 29191b92030bc97b05b539ce5ae0543c24a6d7ca Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 24 May 2009 20:01:03 +0300 Subject: [SCSI] libosd: OSD2r05: Attribute definitions Some New revision 5 Attribute definitions. Some missing definitions of Attributes and pages Signed-off-by: Boaz Harrosh Signed-off-by: James Bottomley --- include/scsi/osd_attributes.h | 74 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 72 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/scsi/osd_attributes.h b/include/scsi/osd_attributes.h index f888a6fda07..56e920ade32 100644 --- a/include/scsi/osd_attributes.h +++ b/include/scsi/osd_attributes.h @@ -29,6 +29,7 @@ enum { OSD_APAGE_PARTITION_INFORMATION = OSD_APAGE_PARTITION_FIRST + 1, OSD_APAGE_PARTITION_QUOTAS = OSD_APAGE_PARTITION_FIRST + 2, OSD_APAGE_PARTITION_TIMESTAMP = OSD_APAGE_PARTITION_FIRST + 3, + OSD_APAGE_PARTITION_ATTR_ACCESS = OSD_APAGE_PARTITION_FIRST + 4, OSD_APAGE_PARTITION_SECURITY = OSD_APAGE_PARTITION_FIRST + 5, OSD_APAGE_PARTITION_LAST = 0x5FFFFFFF, @@ -51,7 +52,9 @@ enum { OSD_APAGE_RESERVED_TYPE_LAST = 0xEFFFFFFF, OSD_APAGE_COMMON_FIRST = 0xF0000000, - OSD_APAGE_COMMON_LAST = 0xFFFFFFFE, + OSD_APAGE_COMMON_LAST = 0xFFFFFFFD, + + OSD_APAGE_CURRENT_COMMAND = 0xFFFFFFFE, OSD_APAGE_REQUEST_ALL = 0xFFFFFFFF, }; @@ -106,10 +109,30 @@ enum { OSD_ATTR_RI_PRODUCT_REVISION_LEVEL = 0x7, /* 4 */ OSD_ATTR_RI_PRODUCT_SERIAL_NUMBER = 0x8, /* variable */ OSD_ATTR_RI_OSD_NAME = 0x9, /* variable */ + OSD_ATTR_RI_MAX_CDB_CONTINUATION_LEN = 0xA, /* 4 */ OSD_ATTR_RI_TOTAL_CAPACITY = 0x80, /* 8 */ OSD_ATTR_RI_USED_CAPACITY = 0x81, /* 8 */ OSD_ATTR_RI_NUMBER_OF_PARTITIONS = 0xC0, /* 8 */ OSD_ATTR_RI_CLOCK = 0x100, /* 6 */ + OARI_DEFAULT_ISOLATION_METHOD = 0X110, /* 1 */ + OARI_SUPPORTED_ISOLATION_METHODS = 0X111, /* 32 */ + + OARI_DATA_ATOMICITY_GUARANTEE = 0X120, /* 8 */ + OARI_DATA_ATOMICITY_ALIGNMENT = 0X121, /* 8 */ + OARI_ATTRIBUTES_ATOMICITY_GUARANTEE = 0X122, /* 8 */ + OARI_DATA_ATTRIBUTES_ATOMICITY_MULTIPLIER = 0X123, /* 1 */ + + OARI_MAXIMUM_SNAPSHOTS_COUNT = 0X1C1, /* 0 or 4 */ + OARI_MAXIMUM_CLONES_COUNT = 0X1C2, /* 0 or 4 */ + OARI_MAXIMUM_BRANCH_DEPTH = 0X1CC, /* 0 or 4 */ + OARI_SUPPORTED_OBJECT_DUPLICATION_METHOD_FIRST = 0X200, /* 0 or 4 */ + OARI_SUPPORTED_OBJECT_DUPLICATION_METHOD_LAST = 0X2ff, /* 0 or 4 */ + OARI_SUPPORTED_TIME_OF_DUPLICATION_METHOD_FIRST = 0X300, /* 0 or 4 */ + OARI_SUPPORTED_TIME_OF_DUPLICATION_METHOD_LAST = 0X30F, /* 0 or 4 */ + OARI_SUPPORT_FOR_DUPLICATED_OBJECT_FREEZING = 0X310, /* 0 or 4 */ + OARI_SUPPORT_FOR_SNAPSHOT_REFRESHING = 0X311, /* 0 or 1 */ + OARI_SUPPORTED_CDB_CONTINUATION_DESC_TYPE_FIRST = 0X7000001,/* 0 or 4 */ + OARI_SUPPORTED_CDB_CONTINUATION_DESC_TYPE_LAST = 0X700FFFF,/* 0 or 4 */ }; /* Root_Information_attributes_page does not have a get_page structure */ @@ -120,7 +143,15 @@ enum { OSD_ATTR_PI_PARTITION_ID = 0x1, /* 8 */ OSD_ATTR_PI_USERNAME = 0x9, /* variable */ OSD_ATTR_PI_USED_CAPACITY = 0x81, /* 8 */ + OSD_ATTR_PI_USED_CAPACITY_INCREMENT = 0x84, /* 0 or 8 */ OSD_ATTR_PI_NUMBER_OF_OBJECTS = 0xC1, /* 8 */ + + OSD_ATTR_PI_ACTUAL_DATA_SPACE = 0xD1, /* 0 or 8 */ + OSD_ATTR_PI_RESERVED_DATA_SPACE = 0xD2, /* 0 or 8 */ + OSD_ATTR_PI_DEFAULT_SNAPSHOT_DUPLICATION_METHOD = 0x200,/* 0 or 4 */ + OSD_ATTR_PI_DEFAULT_CLONE_DUPLICATION_METHOD = 0x201,/* 0 or 4 */ + OSD_ATTR_PI_DEFAULT_SP_TIME_OF_DUPLICATION = 0x300,/* 0 or 4 */ + OSD_ATTR_PI_DEFAULT_CLONE_TIME_OF_DUPLICATION = 0x301,/* 0 or 4 */ }; /* Partition Information attributes page does not have a get_page structure */ @@ -131,6 +162,7 @@ enum { OSD_ATTR_CI_PARTITION_ID = 0x1, /* 8 */ OSD_ATTR_CI_COLLECTION_OBJECT_ID = 0x2, /* 8 */ OSD_ATTR_CI_USERNAME = 0x9, /* variable */ + OSD_ATTR_CI_COLLECTION_TYPE = 0xA, /* 1 */ OSD_ATTR_CI_USED_CAPACITY = 0x81, /* 8 */ }; /* Collection Information attributes page does not have a get_page structure */ @@ -144,6 +176,8 @@ enum { OSD_ATTR_OI_USERNAME = 0x9, /* variable */ OSD_ATTR_OI_USED_CAPACITY = 0x81, /* 8 */ OSD_ATTR_OI_LOGICAL_LENGTH = 0x82, /* 8 */ + SD_ATTR_OI_ACTUAL_DATA_SPACE = 0XD1, /* 0 OR 8 */ + SD_ATTR_OI_RESERVED_DATA_SPACE = 0XD2, /* 0 OR 8 */ }; /* Object Information attributes page does not have a get_page structure */ @@ -248,7 +282,18 @@ struct object_timestamps_attributes_page { struct osd_timestamp data_modified_time; } __packed; -/* 7.1.2.19 Collections attributes page */ +/* OSD2r05: 7.1.3.19 Attributes Access attributes page + * (OSD_APAGE_PARTITION_ATTR_ACCESS) + * + * each attribute is of the form below. Total array length is deduced + * from the attribute's length + * (See allowed_attributes_access of the struct osd_cap_object_descriptor) + */ +struct attributes_access_attr { + struct osd_attributes_list_attrid attr_list[0]; +} __packed; + +/* OSD2r05: 7.1.2.21 Collections attributes page */ /* TBD */ /* 7.1.2.20 Root Policy/Security attributes page (OSD_APAGE_ROOT_SECURITY) */ @@ -324,4 +369,29 @@ struct object_security_attributes_page { __be32 policy_access_tag; } __packed; +/* OSD2r05: 7.1.3.31 Current Command attributes page + * (OSD_APAGE_CURRENT_COMMAND) + */ +enum { + OSD_ATTR_CC_RESPONSE_INTEGRITY_CHECK_VALUE = 0x1, /* 32 */ + OSD_ATTR_CC_OBJECT_TYPE = 0x2, /* 1 */ + OSD_ATTR_CC_PARTITION_ID = 0x3, /* 8 */ + OSD_ATTR_CC_OBJECT_ID = 0x4, /* 8 */ + OSD_ATTR_CC_STARTING_BYTE_ADDRESS_OF_APPEND = 0x5, /* 8 */ + OSD_ATTR_CC_CHANGE_IN_USED_CAPACITY = 0x6, /* 8 */ +}; + +/*TBD: osdv1_current_command_attributes_page */ + +struct osdv2_current_command_attributes_page { + struct osd_attr_page_header hdr; /* id=0xFFFFFFFE, size=0x44 */ + u8 response_integrity_check_value[OSD_CRYPTO_KEYID_SIZE]; + u8 object_type; + u8 reserved[3]; + __be64 partition_id; + __be64 object_id; + __be64 starting_byte_address_of_append; + __be64 change_in_used_capacity; +}; + #endif /*ndef __OSD_ATTRIBUTES_H__*/ -- cgit v1.2.3-70-g09d2 From 0e35afbc8b054e04a35faa796c72abb3b82bd33b Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 24 May 2009 20:02:22 +0300 Subject: [SCSI] libosd: osd_req_{read,write}_kern new API By popular demand, define usefull wrappers for osd_req_read/write that recieve kernel pointers. All users had their own. Also remove these from exofs Signed-off-by: Boaz Harrosh Signed-off-by: James Bottomley --- drivers/scsi/osd/osd_initiator.c | 28 ++++++++++++++++++++++++++++ fs/exofs/common.h | 6 ------ fs/exofs/osd.c | 26 -------------------------- include/scsi/osd_initiator.h | 4 ++++ 4 files changed, 32 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c index 15f0bbc19c9..c98153bfb36 100644 --- a/drivers/scsi/osd/osd_initiator.c +++ b/drivers/scsi/osd/osd_initiator.c @@ -789,6 +789,20 @@ void osd_req_write(struct osd_request *or, } EXPORT_SYMBOL(osd_req_write); +int osd_req_write_kern(struct osd_request *or, + const struct osd_obj_id *obj, u64 offset, void* buff, u64 len) +{ + struct request_queue *req_q = or->osd_dev->scsi_device->request_queue; + struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL); + + if (IS_ERR(bio)) + return PTR_ERR(bio); + + osd_req_write(or, obj, bio, offset); + return 0; +} +EXPORT_SYMBOL(osd_req_write_kern); + /*TODO: void osd_req_append(struct osd_request *, const struct osd_obj_id *, struct bio *data_out); */ /*TODO: void osd_req_create_write(struct osd_request *, @@ -824,6 +838,20 @@ void osd_req_read(struct osd_request *or, } EXPORT_SYMBOL(osd_req_read); +int osd_req_read_kern(struct osd_request *or, + const struct osd_obj_id *obj, u64 offset, void* buff, u64 len) +{ + struct request_queue *req_q = or->osd_dev->scsi_device->request_queue; + struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL); + + if (IS_ERR(bio)) + return PTR_ERR(bio); + + osd_req_read(or, obj, bio, offset); + return 0; +} +EXPORT_SYMBOL(osd_req_read_kern); + void osd_req_get_attributes(struct osd_request *or, const struct osd_obj_id *obj) { diff --git a/fs/exofs/common.h b/fs/exofs/common.h index b1512c4bb8c..24667eedc02 100644 --- a/fs/exofs/common.h +++ b/fs/exofs/common.h @@ -175,10 +175,4 @@ int exofs_async_op(struct osd_request *or, int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr); -int osd_req_read_kern(struct osd_request *or, - const struct osd_obj_id *obj, u64 offset, void *buff, u64 len); - -int osd_req_write_kern(struct osd_request *or, - const struct osd_obj_id *obj, u64 offset, void *buff, u64 len); - #endif /*ifndef __EXOFS_COM_H__*/ diff --git a/fs/exofs/osd.c b/fs/exofs/osd.c index b249ae97fb1..48cc4d11d3f 100644 --- a/fs/exofs/osd.c +++ b/fs/exofs/osd.c @@ -125,29 +125,3 @@ int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr) return -EIO; } - -int osd_req_read_kern(struct osd_request *or, - const struct osd_obj_id *obj, u64 offset, void* buff, u64 len) -{ - struct request_queue *req_q = or->osd_dev->scsi_device->request_queue; - struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL); - - if (!bio) - return -ENOMEM; - - osd_req_read(or, obj, bio, offset); - return 0; -} - -int osd_req_write_kern(struct osd_request *or, - const struct osd_obj_id *obj, u64 offset, void* buff, u64 len) -{ - struct request_queue *req_q = or->osd_dev->scsi_device->request_queue; - struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL); - - if (!bio) - return -ENOMEM; - - osd_req_write(or, obj, bio, offset); - return 0; -} diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h index b24d9616eb4..6132790d678 100644 --- a/include/scsi/osd_initiator.h +++ b/include/scsi/osd_initiator.h @@ -364,6 +364,8 @@ void osd_req_remove_object(struct osd_request *or, struct osd_obj_id *); void osd_req_write(struct osd_request *or, const struct osd_obj_id *, struct bio *data_out, u64 offset); +int osd_req_write_kern(struct osd_request *or, + const struct osd_obj_id *obj, u64 offset, void *buff, u64 len); void osd_req_append(struct osd_request *or, const struct osd_obj_id *, struct bio *data_out);/* NI */ void osd_req_create_write(struct osd_request *or, @@ -379,6 +381,8 @@ void osd_req_flush_object(struct osd_request *or, void osd_req_read(struct osd_request *or, const struct osd_obj_id *, struct bio *data_in, u64 offset); +int osd_req_read_kern(struct osd_request *or, + const struct osd_obj_id *obj, u64 offset, void *buff, u64 len); /* * Root/Partition/Collection/Object Attributes commands -- cgit v1.2.3-70-g09d2 From 62f469b596dd0aadf046a69027087c18db43734e Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 24 May 2009 20:04:26 +0300 Subject: [SCSI] libosd: osd_req_{read,write} takes a length parameter For supporting of chained-bios we can not inspect the first bio only, as before. Caller shall pass the total length of the request, ie. sum_bytes(bio-chain). Also since the bio might be a chain we don't set it's direction on behalf of it's callers. The bio direction should be properly set prior to this call. So fix a couple of write users that now need to set the bio direction properly [In this patch I change both library code and user sites at exofs, to make it easy on integration. It should be submitted via James's scsi-misc tree.] Signed-off-by: Boaz Harrosh CC: Jeff Garzik Signed-off-by: James Bottomley --- drivers/scsi/osd/osd_initiator.c | 23 +++++++++++++---------- fs/exofs/inode.c | 5 +++-- include/scsi/osd_initiator.h | 4 ++-- 3 files changed, 18 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c index ba2ebae305c..3f5ec578e6c 100644 --- a/drivers/scsi/osd/osd_initiator.c +++ b/drivers/scsi/osd/osd_initiator.c @@ -779,13 +779,14 @@ EXPORT_SYMBOL(osd_req_remove_object); */ void osd_req_write(struct osd_request *or, - const struct osd_obj_id *obj, struct bio *bio, u64 offset) + const struct osd_obj_id *obj, u64 offset, + struct bio *bio, u64 len) { - _osd_req_encode_common(or, OSD_ACT_WRITE, obj, offset, bio->bi_size); + _osd_req_encode_common(or, OSD_ACT_WRITE, obj, offset, len); WARN_ON(or->out.bio || or->out.total_bytes); - bio->bi_rw |= (1 << BIO_RW); + WARN_ON(0 == bio_rw_flagged(bio, BIO_RW)); or->out.bio = bio; - or->out.total_bytes = bio->bi_size; + or->out.total_bytes = len; } EXPORT_SYMBOL(osd_req_write); @@ -798,7 +799,8 @@ int osd_req_write_kern(struct osd_request *or, if (IS_ERR(bio)) return PTR_ERR(bio); - osd_req_write(or, obj, bio, offset); + bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */ + osd_req_write(or, obj, offset, bio, len); return 0; } EXPORT_SYMBOL(osd_req_write_kern); @@ -828,13 +830,14 @@ void osd_req_flush_object(struct osd_request *or, EXPORT_SYMBOL(osd_req_flush_object); void osd_req_read(struct osd_request *or, - const struct osd_obj_id *obj, struct bio *bio, u64 offset) + const struct osd_obj_id *obj, u64 offset, + struct bio *bio, u64 len) { - _osd_req_encode_common(or, OSD_ACT_READ, obj, offset, bio->bi_size); + _osd_req_encode_common(or, OSD_ACT_READ, obj, offset, len); WARN_ON(or->in.bio || or->in.total_bytes); - bio->bi_rw &= ~(1 << BIO_RW); + WARN_ON(1 == bio_rw_flagged(bio, BIO_RW)); or->in.bio = bio; - or->in.total_bytes = bio->bi_size; + or->in.total_bytes = len; } EXPORT_SYMBOL(osd_req_read); @@ -847,7 +850,7 @@ int osd_req_read_kern(struct osd_request *or, if (IS_ERR(bio)) return PTR_ERR(bio); - osd_req_read(or, obj, bio, offset); + osd_req_read(or, obj, offset, bio, len); return 0; } EXPORT_SYMBOL(osd_req_read_kern); diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index ba8d9fab469..f79e8e58c3a 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -266,7 +266,7 @@ static int read_exec(struct page_collect *pcol, bool is_sync) goto err; } - osd_req_read(or, &obj, pcol->bio, i_start); + osd_req_read(or, &obj, i_start, pcol->bio, pcol->length); if (is_sync) { exofs_sync_op(or, pcol->sbi->s_timeout, oi->i_cred); @@ -522,7 +522,8 @@ static int write_exec(struct page_collect *pcol) *pcol_copy = *pcol; - osd_req_write(or, &obj, pcol_copy->bio, i_start); + pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */ + osd_req_write(or, &obj, i_start, pcol_copy->bio, pcol_copy->length); ret = exofs_async_op(or, writepages_done, pcol_copy, oi->i_cred); if (unlikely(ret)) { EXOFS_ERR("write_exec: exofs_async_op() Faild\n"); diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h index 6132790d678..8c1e3b804af 100644 --- a/include/scsi/osd_initiator.h +++ b/include/scsi/osd_initiator.h @@ -363,7 +363,7 @@ void osd_req_create_object(struct osd_request *or, struct osd_obj_id *); void osd_req_remove_object(struct osd_request *or, struct osd_obj_id *); void osd_req_write(struct osd_request *or, - const struct osd_obj_id *, struct bio *data_out, u64 offset); + const struct osd_obj_id *obj, u64 offset, struct bio *bio, u64 len); int osd_req_write_kern(struct osd_request *or, const struct osd_obj_id *obj, u64 offset, void *buff, u64 len); void osd_req_append(struct osd_request *or, @@ -380,7 +380,7 @@ void osd_req_flush_object(struct osd_request *or, /*V2*/ u64 offset, /*V2*/ u64 len); void osd_req_read(struct osd_request *or, - const struct osd_obj_id *, struct bio *data_in, u64 offset); + const struct osd_obj_id *obj, u64 offset, struct bio *bio, u64 len); int osd_req_read_kern(struct osd_request *or, const struct osd_obj_id *obj, u64 offset, void *buff, u64 len); -- cgit v1.2.3-70-g09d2 From fc2fac5b5f11e2bee3bf37215c8746236f5ea188 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 24 May 2009 20:04:43 +0300 Subject: [SCSI] libosd: Define an osd_dev wrapper to retrieve the request_queue libosd users that need to work with bios, must sometime use the request_queue associated with the osd_dev. Make a wrapper for that, and convert all in-tree users. Signed-off-by: Boaz Harrosh Signed-off-by: James Bottomley --- drivers/scsi/osd/osd_initiator.c | 6 +++--- fs/exofs/inode.c | 3 +-- include/scsi/osd_initiator.h | 5 +++++ 3 files changed, 9 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c index 3f5ec578e6c..3959797149f 100644 --- a/drivers/scsi/osd/osd_initiator.c +++ b/drivers/scsi/osd/osd_initiator.c @@ -670,7 +670,7 @@ static int _osd_req_list_objects(struct osd_request *or, __be16 action, const struct osd_obj_id *obj, osd_id initial_id, struct osd_obj_id_list *list, unsigned nelem) { - struct request_queue *q = or->osd_dev->scsi_device->request_queue; + struct request_queue *q = osd_request_queue(or->osd_dev); u64 len = nelem * sizeof(osd_id) + sizeof(*list); struct bio *bio; @@ -793,7 +793,7 @@ EXPORT_SYMBOL(osd_req_write); int osd_req_write_kern(struct osd_request *or, const struct osd_obj_id *obj, u64 offset, void* buff, u64 len) { - struct request_queue *req_q = or->osd_dev->scsi_device->request_queue; + struct request_queue *req_q = osd_request_queue(or->osd_dev); struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL); if (IS_ERR(bio)) @@ -844,7 +844,7 @@ EXPORT_SYMBOL(osd_req_read); int osd_req_read_kern(struct osd_request *or, const struct osd_obj_id *obj, u64 offset, void* buff, u64 len) { - struct request_queue *req_q = or->osd_dev->scsi_device->request_queue; + struct request_queue *req_q = osd_request_queue(or->osd_dev); struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL); if (IS_ERR(bio)) diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index f79e8e58c3a..77d0a295eb1 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -59,10 +59,9 @@ static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, struct inode *inode) { struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; - struct request_queue *req_q = sbi->s_dev->scsi_device->request_queue; pcol->sbi = sbi; - pcol->req_q = req_q; + pcol->req_q = osd_request_queue(sbi->s_dev); pcol->inode = inode; pcol->expected_pages = expected_pages; diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h index 8c1e3b804af..b44dc53bd88 100644 --- a/include/scsi/osd_initiator.h +++ b/include/scsi/osd_initiator.h @@ -18,6 +18,7 @@ #include "osd_types.h" #include +#include /* Note: "NI" in comments below means "Not Implemented yet" */ @@ -69,6 +70,10 @@ void osd_dev_fini(struct osd_dev *od); /* some hi level device operations */ int osd_auto_detect_ver(struct osd_dev *od, void *caps); /* GFP_KERNEL */ +static inline struct request_queue *osd_request_queue(struct osd_dev *od) +{ + return od->scsi_device->request_queue; +} /* we might want to use function vector in the future */ static inline void osd_dev_set_ver(struct osd_dev *od, enum osd_std_version v) -- cgit v1.2.3-70-g09d2 From 021e2230d6c04d80289fceb2d21c9ce93a615b32 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 24 May 2009 20:05:05 +0300 Subject: [SCSI] osduld: use filp_open() when looking up an osd-device This patch was inspired by Al Viro, for simplifying and fixing the retrieval of osd-devices by in-kernel users, eg: file systems. In-Kernel users, now, go through the same path user-mode does by opening a file on the osd char-device and though holding a reference to both the device and the Module. A file pointer was added to the osd_dev structure which is now allocated for each user. The internal osd_dev is no longer exposed outside of the uld. I wanted to do that for a long time so each libosd user can have his own defaults on the device. The API is left the same, so user code need not change. It is no longer needed to open/close a file handle on the osd char-device from user-mode, before mounting an exofs on it. Signed-off-by: Boaz Harrosh CC: Al Viro Signed-off-by: James Bottomley --- drivers/scsi/osd/osd_uld.c | 66 ++++++++++++++++++++------------------------ include/scsi/osd_initiator.h | 1 + 2 files changed, 31 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/drivers/scsi/osd/osd_uld.c b/drivers/scsi/osd/osd_uld.c index 22b59e13ba8..0bdef339090 100644 --- a/drivers/scsi/osd/osd_uld.c +++ b/drivers/scsi/osd/osd_uld.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -175,10 +176,9 @@ static const struct file_operations osd_fops = { struct osd_dev *osduld_path_lookup(const char *name) { - struct path path; - struct inode *inode; - struct cdev *cdev; - struct osd_uld_device *uninitialized_var(oud); + struct osd_uld_device *oud; + struct osd_dev *od; + struct file *file; int error; if (!name || !*name) { @@ -186,52 +186,46 @@ struct osd_dev *osduld_path_lookup(const char *name) return ERR_PTR(-EINVAL); } - error = kern_path(name, LOOKUP_FOLLOW, &path); - if (error) { - OSD_ERR("path_lookup of %s failed=>%d\n", name, error); - return ERR_PTR(error); - } + od = kzalloc(sizeof(*od), GFP_KERNEL); + if (!od) + return ERR_PTR(-ENOMEM); - inode = path.dentry->d_inode; - error = -EINVAL; /* Not the right device e.g osd_uld_device */ - if (!S_ISCHR(inode->i_mode)) { - OSD_DEBUG("!S_ISCHR()\n"); - goto out; + file = filp_open(name, O_RDWR, 0); + if (IS_ERR(file)) { + error = PTR_ERR(file); + goto free_od; } - cdev = inode->i_cdev; - if (!cdev) { - OSD_ERR("Before mounting an OSD Based filesystem\n"); - OSD_ERR(" user-mode must open+close the %s device\n", name); - OSD_ERR(" Example: bash: echo < %s\n", name); - goto out; + if (file->f_op != &osd_fops){ + error = -EINVAL; + goto close_file; } - /* The Magic wand. Is it our char-dev */ - /* TODO: Support sg devices */ - if (cdev->owner != THIS_MODULE) { - OSD_ERR("Error mounting %s - is not an OSD device\n", name); - goto out; - } + oud = file->private_data; - oud = container_of(cdev, struct osd_uld_device, cdev); + *od = oud->od; + od->file = file; - __uld_get(oud); - error = 0; + return od; -out: - path_put(&path); - return error ? ERR_PTR(error) : &oud->od; +close_file: + fput(file); +free_od: + kfree(od); + return ERR_PTR(error); } EXPORT_SYMBOL(osduld_path_lookup); void osduld_put_device(struct osd_dev *od) { - if (od) { - struct osd_uld_device *oud = container_of(od, - struct osd_uld_device, od); - __uld_put(oud); + if (od && !IS_ERR(od)) { + struct osd_uld_device *oud = od->file->private_data; + + BUG_ON(od->scsi_device != oud->od.scsi_device); + + fput(od->file); + kfree(od); } } EXPORT_SYMBOL(osduld_put_device); diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h index b44dc53bd88..02bd9f71635 100644 --- a/include/scsi/osd_initiator.h +++ b/include/scsi/osd_initiator.h @@ -48,6 +48,7 @@ enum osd_std_version { */ struct osd_dev { struct scsi_device *scsi_device; + struct file *file; unsigned def_timeout; #ifdef OSD_VER1_SUPPORT -- cgit v1.2.3-70-g09d2 From bd2b5b12849a3446abad0b25e920f86f5480b309 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Jun 2009 13:40:57 +0200 Subject: perf_counter: More aggressive frequency adjustment Also employ the overflow handler to adjust the frequency, this results in a stable frequency in about 40~50 samples, instead of that many ticks. This also means we can start sampling at a sample period of 1 without running head-first into the throttle. It relies on sched_clock() to accurately measure the time difference between the overflow NMIs. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 5 +- include/linux/perf_counter.h | 1 + kernel/perf_counter.c | 130 +++++++++++++++++++++++++------------ 3 files changed, 92 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 49f258537cb..240ca563063 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -696,10 +696,11 @@ static int __hw_perf_counter_init(struct perf_counter *counter) if (!attr->exclude_kernel) hwc->config |= ARCH_PERFMON_EVENTSEL_OS; - if (!hwc->sample_period) + if (!hwc->sample_period) { hwc->sample_period = x86_pmu.max_period; + atomic64_set(&hwc->period_left, hwc->sample_period); + } - atomic64_set(&hwc->period_left, hwc->sample_period); counter->destroy = hw_perf_counter_destroy; /* diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 3586df840f6..282d8cc4898 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -371,6 +371,7 @@ struct hw_perf_counter { u64 freq_count; u64 freq_interrupts; + u64 freq_stamp; #endif }; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 5eacaaf3f9c..51c571ee4d0 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1184,13 +1184,33 @@ static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu) static void perf_log_throttle(struct perf_counter *counter, int enable); static void perf_log_period(struct perf_counter *counter, u64 period); -static void perf_adjust_freq(struct perf_counter_context *ctx) +static void perf_adjust_period(struct perf_counter *counter, u64 events) +{ + struct hw_perf_counter *hwc = &counter->hw; + u64 period, sample_period; + s64 delta; + + events *= hwc->sample_period; + period = div64_u64(events, counter->attr.sample_freq); + + delta = (s64)(period - hwc->sample_period); + delta = (delta + 7) / 8; /* low pass filter */ + + sample_period = hwc->sample_period + delta; + + if (!sample_period) + sample_period = 1; + + perf_log_period(counter, sample_period); + + hwc->sample_period = sample_period; +} + +static void perf_ctx_adjust_freq(struct perf_counter_context *ctx) { struct perf_counter *counter; struct hw_perf_counter *hwc; - u64 interrupts, sample_period; - u64 events, period, freq; - s64 delta; + u64 interrupts, freq; spin_lock(&ctx->lock); list_for_each_entry(counter, &ctx->counter_list, list_entry) { @@ -1202,6 +1222,9 @@ static void perf_adjust_freq(struct perf_counter_context *ctx) interrupts = hwc->interrupts; hwc->interrupts = 0; + /* + * unthrottle counters on the tick + */ if (interrupts == MAX_INTERRUPTS) { perf_log_throttle(counter, 1); counter->pmu->unthrottle(counter); @@ -1211,6 +1234,9 @@ static void perf_adjust_freq(struct perf_counter_context *ctx) if (!counter->attr.freq || !counter->attr.sample_freq) continue; + /* + * if the specified freq < HZ then we need to skip ticks + */ if (counter->attr.sample_freq < HZ) { freq = counter->attr.sample_freq; @@ -1226,20 +1252,20 @@ static void perf_adjust_freq(struct perf_counter_context *ctx) } else freq = HZ; - events = freq * interrupts * hwc->sample_period; - period = div64_u64(events, counter->attr.sample_freq); - - delta = (s64)(1 + period - hwc->sample_period); - delta >>= 1; - - sample_period = hwc->sample_period + delta; - - if (!sample_period) - sample_period = 1; + perf_adjust_period(counter, freq * interrupts); - perf_log_period(counter, sample_period); - - hwc->sample_period = sample_period; + /* + * In order to avoid being stalled by an (accidental) huge + * sample period, force reset the sample period if we didn't + * get any events in this freq period. + */ + if (!interrupts) { + perf_disable(); + counter->pmu->disable(counter); + atomic_set(&hwc->period_left, 0); + counter->pmu->enable(counter); + perf_enable(); + } } spin_unlock(&ctx->lock); } @@ -1279,9 +1305,9 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) cpuctx = &per_cpu(perf_cpu_context, cpu); ctx = curr->perf_counter_ctxp; - perf_adjust_freq(&cpuctx->ctx); + perf_ctx_adjust_freq(&cpuctx->ctx); if (ctx) - perf_adjust_freq(ctx); + perf_ctx_adjust_freq(ctx); perf_counter_cpu_sched_out(cpuctx); if (ctx) @@ -1647,10 +1673,10 @@ static int perf_counter_period(struct perf_counter *counter, u64 __user *arg) counter->attr.sample_freq = value; } else { + perf_log_period(counter, value); + counter->attr.sample_period = value; counter->hw.sample_period = value; - - perf_log_period(counter, value); } unlock: spin_unlock_irq(&ctx->lock); @@ -2853,35 +2879,41 @@ void __perf_counter_mmap(struct vm_area_struct *vma) * event flow. */ +struct freq_event { + struct perf_event_header header; + u64 time; + u64 id; + u64 period; +}; + static void perf_log_period(struct perf_counter *counter, u64 period) { struct perf_output_handle handle; + struct freq_event event; int ret; - struct { - struct perf_event_header header; - u64 time; - u64 id; - u64 period; - } freq_event = { + if (counter->hw.sample_period == period) + return; + + if (counter->attr.sample_type & PERF_SAMPLE_PERIOD) + return; + + event = (struct freq_event) { .header = { .type = PERF_EVENT_PERIOD, .misc = 0, - .size = sizeof(freq_event), + .size = sizeof(event), }, .time = sched_clock(), .id = counter->id, .period = period, }; - if (counter->hw.sample_period == period) - return; - - ret = perf_output_begin(&handle, counter, sizeof(freq_event), 0, 0); + ret = perf_output_begin(&handle, counter, sizeof(event), 1, 0); if (ret) return; - perf_output_put(&handle, freq_event); + perf_output_put(&handle, event); perf_output_end(&handle); } @@ -2923,15 +2955,16 @@ int perf_counter_overflow(struct perf_counter *counter, { int events = atomic_read(&counter->event_limit); int throttle = counter->pmu->unthrottle != NULL; + struct hw_perf_counter *hwc = &counter->hw; int ret = 0; if (!throttle) { - counter->hw.interrupts++; + hwc->interrupts++; } else { - if (counter->hw.interrupts != MAX_INTERRUPTS) { - counter->hw.interrupts++; - if (HZ*counter->hw.interrupts > (u64)sysctl_perf_counter_limit) { - counter->hw.interrupts = MAX_INTERRUPTS; + if (hwc->interrupts != MAX_INTERRUPTS) { + hwc->interrupts++; + if (HZ * hwc->interrupts > (u64)sysctl_perf_counter_limit) { + hwc->interrupts = MAX_INTERRUPTS; perf_log_throttle(counter, 0); ret = 1; } @@ -2945,6 +2978,16 @@ int perf_counter_overflow(struct perf_counter *counter, } } + if (counter->attr.freq) { + u64 now = sched_clock(); + s64 delta = now - hwc->freq_stamp; + + hwc->freq_stamp = now; + + if (delta > 0 && delta < TICK_NSEC) + perf_adjust_period(counter, NSEC_PER_SEC / (int)delta); + } + /* * XXX event_limit might not quite work as expected on inherited * counters @@ -3379,7 +3422,6 @@ static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) return NULL; counter->destroy = tp_perf_counter_destroy; - counter->hw.sample_period = counter->attr.sample_period; return &perf_ops_generic; } @@ -3483,10 +3525,11 @@ perf_counter_alloc(struct perf_counter_attr *attr, pmu = NULL; hwc = &counter->hw; + hwc->sample_period = attr->sample_period; if (attr->freq && attr->sample_freq) - hwc->sample_period = div64_u64(TICK_NSEC, attr->sample_freq); - else - hwc->sample_period = attr->sample_period; + hwc->sample_period = 1; + + atomic64_set(&hwc->period_left, hwc->sample_period); /* * we currently do not support PERF_SAMPLE_GROUP on inherited counters @@ -3687,6 +3730,9 @@ inherit_counter(struct perf_counter *parent_counter, else child_counter->state = PERF_COUNTER_STATE_OFF; + if (parent_counter->attr.freq) + child_counter->hw.sample_period = parent_counter->hw.sample_period; + /* * Link it up in the child's context: */ -- cgit v1.2.3-70-g09d2 From 4d1d49858c0a5a4fb1be4bc7972754cd640245ba Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 4 Jun 2009 21:57:03 +0200 Subject: net/libertas: remove GPIO-CS handling in SPI interface code This removes the dependency on GPIO framework and lets the SPI host driver handle the chip select. The SPI host driver is required to keep the CS active for the entire message unless cs_change says otherwise. This patch collects the two/three single SPI transfers into a message. Also the delay in read path in case use_dummy_writes are not used is moved into the SPI host driver. Tested-by: Mike Rapoport Tested-by: Andrey Yurovsky Signed-off-by: Sebastian Andrzej Siewior Acked-by: Dan Williams Signed-off-by: John W. Linville --- drivers/net/wireless/Kconfig | 2 +- drivers/net/wireless/libertas/if_spi.c | 92 ++++++++++++++++------------------ include/linux/spi/libertas_spi.h | 3 -- 3 files changed, 45 insertions(+), 52 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig index daf4c805be5..fb7541c28e5 100644 --- a/drivers/net/wireless/Kconfig +++ b/drivers/net/wireless/Kconfig @@ -153,7 +153,7 @@ config LIBERTAS_SDIO config LIBERTAS_SPI tristate "Marvell Libertas 8686 SPI 802.11b/g cards" - depends on LIBERTAS && SPI && GENERIC_GPIO + depends on LIBERTAS && SPI ---help--- A driver for Marvell Libertas 8686 SPI devices. diff --git a/drivers/net/wireless/libertas/if_spi.c b/drivers/net/wireless/libertas/if_spi.c index ea23c5de142..f8c2898d82b 100644 --- a/drivers/net/wireless/libertas/if_spi.c +++ b/drivers/net/wireless/libertas/if_spi.c @@ -19,7 +19,6 @@ #include #include -#include #include #include #include @@ -51,13 +50,6 @@ struct if_spi_card { u16 card_id; u8 card_rev; - /* Pin number for our GPIO chip-select. */ - /* TODO: Once the generic SPI layer has some additional features, we - * should take this out and use the normal chip select here. - * We need support for chip select delays, and not dropping chipselect - * after each word. */ - int gpio_cs; - /* The last time that we initiated an SPU operation */ unsigned long prev_xfer_time; @@ -130,12 +122,10 @@ static void spu_transaction_init(struct if_spi_card *card) * If not, we have to busy-wait to be on the safe side. */ ndelay(400); } - gpio_set_value(card->gpio_cs, 0); /* assert CS */ } static void spu_transaction_finish(struct if_spi_card *card) { - gpio_set_value(card->gpio_cs, 1); /* drop CS */ card->prev_xfer_time = jiffies; } @@ -145,6 +135,13 @@ static int spu_write(struct if_spi_card *card, u16 reg, const u8 *buf, int len) { int err = 0; u16 reg_out = cpu_to_le16(reg | IF_SPI_WRITE_OPERATION_MASK); + struct spi_message m; + struct spi_transfer reg_trans; + struct spi_transfer data_trans; + + spi_message_init(&m); + memset(®_trans, 0, sizeof(reg_trans)); + memset(&data_trans, 0, sizeof(data_trans)); /* You must give an even number of bytes to the SPU, even if it * doesn't care about the last one. */ @@ -153,13 +150,16 @@ static int spu_write(struct if_spi_card *card, u16 reg, const u8 *buf, int len) spu_transaction_init(card); /* write SPU register index */ - err = spi_write(card->spi, (u8 *)®_out, sizeof(u16)); - if (err) - goto out; + reg_trans.tx_buf = ®_out; + reg_trans.len = sizeof(reg_out); - err = spi_write(card->spi, buf, len); + data_trans.tx_buf = buf; + data_trans.len = len; -out: + spi_message_add_tail(®_trans, &m); + spi_message_add_tail(&data_trans, &m); + + err = spi_sync(card->spi, &m); spu_transaction_finish(card); return err; } @@ -186,10 +186,13 @@ static inline int spu_reg_is_port_reg(u16 reg) static int spu_read(struct if_spi_card *card, u16 reg, u8 *buf, int len) { - unsigned int i, delay; + unsigned int delay; int err = 0; - u16 zero = 0; u16 reg_out = cpu_to_le16(reg | IF_SPI_READ_OPERATION_MASK); + struct spi_message m; + struct spi_transfer reg_trans; + struct spi_transfer dummy_trans; + struct spi_transfer data_trans; /* You must take an even number of bytes from the SPU, even if you * don't care about the last one. */ @@ -197,29 +200,34 @@ static int spu_read(struct if_spi_card *card, u16 reg, u8 *buf, int len) spu_transaction_init(card); + spi_message_init(&m); + memset(®_trans, 0, sizeof(reg_trans)); + memset(&dummy_trans, 0, sizeof(dummy_trans)); + memset(&data_trans, 0, sizeof(data_trans)); + /* write SPU register index */ - err = spi_write(card->spi, (u8 *)®_out, sizeof(u16)); - if (err) - goto out; + reg_trans.tx_buf = ®_out; + reg_trans.len = sizeof(reg_out); + spi_message_add_tail(®_trans, &m); delay = spu_reg_is_port_reg(reg) ? card->spu_port_delay : card->spu_reg_delay; if (card->use_dummy_writes) { /* Clock in dummy cycles while the SPU fills the FIFO */ - for (i = 0; i < delay / 16; ++i) { - err = spi_write(card->spi, (u8 *)&zero, sizeof(u16)); - if (err) - return err; - } + dummy_trans.len = delay / 8; + spi_message_add_tail(&dummy_trans, &m); } else { /* Busy-wait while the SPU fills the FIFO */ - ndelay(100 + (delay * 10)); + reg_trans.delay_usecs = + DIV_ROUND_UP((100 + (delay * 10)), 1000); } /* read in data */ - err = spi_read(card->spi, buf, len); + data_trans.rx_buf = buf; + data_trans.len = len; + spi_message_add_tail(&data_trans, &m); -out: + err = spi_sync(card->spi, &m); spu_transaction_finish(card); return err; } @@ -1049,7 +1057,6 @@ static int __devinit if_spi_probe(struct spi_device *spi) spi_set_drvdata(spi, card); card->pdata = pdata; card->spi = spi; - card->gpio_cs = pdata->gpio_cs; card->prev_xfer_time = jiffies; sema_init(&card->spi_ready, 0); @@ -1058,26 +1065,18 @@ static int __devinit if_spi_probe(struct spi_device *spi) INIT_LIST_HEAD(&card->data_packet_list); spin_lock_init(&card->buffer_lock); - /* set up GPIO CS line. TODO: use regular CS line */ - err = gpio_request(card->gpio_cs, "if_spi_gpio_chip_select"); - if (err) - goto free_card; - err = gpio_direction_output(card->gpio_cs, 1); - if (err) - goto free_gpio; - /* Initialize the SPI Interface Unit */ err = spu_init(card, pdata->use_dummy_writes); if (err) - goto free_gpio; + goto free_card; err = spu_get_chip_revision(card, &card->card_id, &card->card_rev); if (err) - goto free_gpio; + goto free_card; /* Firmware load */ err = spu_read_u32(card, IF_SPI_SCRATCH_4_REG, &scratch); if (err) - goto free_gpio; + goto free_card; if (scratch == SUCCESSFUL_FW_DOWNLOAD_MAGIC) lbs_deb_spi("Firmware is already loaded for " "Marvell WLAN 802.11 adapter\n"); @@ -1085,7 +1084,7 @@ static int __devinit if_spi_probe(struct spi_device *spi) err = if_spi_calculate_fw_names(card->card_id, card->helper_fw_name, card->main_fw_name); if (err) - goto free_gpio; + goto free_card; lbs_deb_spi("Initializing FW for Marvell WLAN 802.11 adapter " "(chip_id = 0x%04x, chip_rev = 0x%02x) " @@ -1096,23 +1095,23 @@ static int __devinit if_spi_probe(struct spi_device *spi) spi->max_speed_hz); err = if_spi_prog_helper_firmware(card); if (err) - goto free_gpio; + goto free_card; err = if_spi_prog_main_firmware(card); if (err) - goto free_gpio; + goto free_card; lbs_deb_spi("loaded FW for Marvell WLAN 802.11 adapter\n"); } err = spu_set_interrupt_mode(card, 0, 1); if (err) - goto free_gpio; + goto free_card; /* Register our card with libertas. * This will call alloc_etherdev */ priv = lbs_add_card(card, &spi->dev); if (!priv) { err = -ENOMEM; - goto free_gpio; + goto free_card; } card->priv = priv; priv->card = card; @@ -1157,8 +1156,6 @@ terminate_thread: if_spi_terminate_spi_thread(card); remove_card: lbs_remove_card(priv); /* will call free_netdev */ -free_gpio: - gpio_free(card->gpio_cs); free_card: free_if_spi_card(card); out: @@ -1179,7 +1176,6 @@ static int __devexit libertas_spi_remove(struct spi_device *spi) free_irq(spi->irq, card); if_spi_terminate_spi_thread(card); lbs_remove_card(priv); /* will call free_netdev */ - gpio_free(card->gpio_cs); if (card->pdata->teardown) card->pdata->teardown(spi); free_if_spi_card(card); diff --git a/include/linux/spi/libertas_spi.h b/include/linux/spi/libertas_spi.h index 79506f5f9e6..1b5d5384fcd 100644 --- a/include/linux/spi/libertas_spi.h +++ b/include/linux/spi/libertas_spi.h @@ -22,9 +22,6 @@ struct libertas_spi_platform_data { * speed, you may want to use 0 here. */ u16 use_dummy_writes; - /* GPIO number to use as chip select */ - u16 gpio_cs; - /* Board specific setup/teardown */ int (*setup)(struct spi_device *spi); int (*teardown)(struct spi_device *spi); -- cgit v1.2.3-70-g09d2 From 8f77f3849cc3ae2d6df9301785a3d316ea7d7ee1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 7 Jun 2009 21:58:37 +0200 Subject: mac80211: do not pass PS frames out of mac80211 again In order to handle powersave frames properly we had needed to pass these out to the device queues again, and introduce the skb->requeue bit. This, however, also has unnecessary overhead by needing to 'clean up' already tried frames, and this clean-up code is also buggy when software encryption is used. Instead of sending the frames via the master netdev queue again, simply put them into the pending queue. This also fixes a problem where frames for that particular station could be reordered when some were still on the software queues and older ones are re-injected into the software queue after them. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/skbuff.h | 4 --- include/net/mac80211.h | 3 +++ net/core/skbuff.c | 1 - net/mac80211/ieee80211_i.h | 5 ++++ net/mac80211/main.c | 61 +++++----------------------------------------- net/mac80211/rx.c | 25 +++++++------------ net/mac80211/tx.c | 3 ++- net/mac80211/util.c | 46 ++++++++++++++++++++++++++++++++++ net/mac80211/wme.c | 2 +- 9 files changed, 72 insertions(+), 78 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f1c93b878b3..fa51293f270 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -304,9 +304,6 @@ typedef unsigned char *sk_buff_data_t; * @tc_verd: traffic control verdict * @ndisc_nodetype: router type (from link layer) * @do_not_encrypt: set to prevent encryption of this frame - * @requeue: set to indicate that the wireless core should attempt - * a software retry on this frame if we failed to - * receive an ACK for it * @dma_cookie: a cookie to one of several possible DMA operations * done by skb DMA functions * @secmark: security marking @@ -380,7 +377,6 @@ struct sk_buff { #endif #if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE) __u8 do_not_encrypt:1; - __u8 requeue:1; #endif /* 0/13/14 bit hole */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 17d61d19d91..c0610447697 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -239,6 +239,8 @@ struct ieee80211_bss_conf { * @IEEE80211_TX_INTFL_NEED_TXPROCESSING: completely internal to mac80211, * used to indicate that a pending frame requires TX processing before * it can be sent out. + * @IEEE80211_TX_INTFL_RETRIED: completely internal to mac80211, + * used to indicate that a frame was already retried due to PS */ enum mac80211_tx_control_flags { IEEE80211_TX_CTL_REQ_TX_STATUS = BIT(0), @@ -256,6 +258,7 @@ enum mac80211_tx_control_flags { IEEE80211_TX_CTL_RATE_CTRL_PROBE = BIT(12), IEEE80211_TX_INTFL_RCALGO = BIT(13), IEEE80211_TX_INTFL_NEED_TXPROCESSING = BIT(14), + IEEE80211_TX_INTFL_RETRIED = BIT(15), }; /** diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 49961ba3c0f..b94d777e3eb 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -552,7 +552,6 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->vlan_tci = old->vlan_tci; #if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE) new->do_not_encrypt = old->do_not_encrypt; - new->requeue = old->requeue; #endif skb_copy_secmark(new, old); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index c088c46704a..4dbc2896419 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -589,6 +589,7 @@ enum queue_stop_reason { IEEE80211_QUEUE_STOP_REASON_AGGREGATION, IEEE80211_QUEUE_STOP_REASON_SUSPEND, IEEE80211_QUEUE_STOP_REASON_PENDING, + IEEE80211_QUEUE_STOP_REASON_SKB_ADD, }; struct ieee80211_master_priv { @@ -1121,6 +1122,10 @@ void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue, enum queue_stop_reason reason); void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue, enum queue_stop_reason reason); +void ieee80211_add_pending_skb(struct ieee80211_local *local, + struct sk_buff *skb); +int ieee80211_add_pending_skbs(struct ieee80211_local *local, + struct sk_buff_head *skbs); void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, u16 transaction, u16 auth_alg, diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 2683df91807..092a017b237 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -369,60 +369,12 @@ static void ieee80211_tasklet_handler(unsigned long data) } } -/* Remove added headers (e.g., QoS control), encryption header/MIC, etc. to - * make a prepared TX frame (one that has been given to hw) to look like brand - * new IEEE 802.11 frame that is ready to go through TX processing again. - */ -static void ieee80211_remove_tx_extra(struct ieee80211_local *local, - struct ieee80211_key *key, - struct sk_buff *skb) -{ - unsigned int hdrlen, iv_len, mic_len; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - - hdrlen = ieee80211_hdrlen(hdr->frame_control); - - if (!key) - goto no_key; - - switch (key->conf.alg) { - case ALG_WEP: - iv_len = WEP_IV_LEN; - mic_len = WEP_ICV_LEN; - break; - case ALG_TKIP: - iv_len = TKIP_IV_LEN; - mic_len = TKIP_ICV_LEN; - break; - case ALG_CCMP: - iv_len = CCMP_HDR_LEN; - mic_len = CCMP_MIC_LEN; - break; - default: - goto no_key; - } - - if (skb->len >= hdrlen + mic_len && - !(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) - skb_trim(skb, skb->len - mic_len); - if (skb->len >= hdrlen + iv_len) { - memmove(skb->data + iv_len, skb->data, hdrlen); - hdr = (struct ieee80211_hdr *)skb_pull(skb, iv_len); - } - -no_key: - if (ieee80211_is_data_qos(hdr->frame_control)) { - hdr->frame_control &= ~cpu_to_le16(IEEE80211_STYPE_QOS_DATA); - memmove(skb->data + IEEE80211_QOS_CTL_LEN, skb->data, - hdrlen - IEEE80211_QOS_CTL_LEN); - skb_pull(skb, IEEE80211_QOS_CTL_LEN); - } -} - static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, struct sta_info *sta, struct sk_buff *skb) { + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + sta->tx_filtered_count++; /* @@ -464,16 +416,15 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, */ if (test_sta_flags(sta, WLAN_STA_PS) && skb_queue_len(&sta->tx_filtered) < STA_MAX_TX_BUFFER) { - ieee80211_remove_tx_extra(local, sta->key, skb); skb_queue_tail(&sta->tx_filtered, skb); return; } - if (!test_sta_flags(sta, WLAN_STA_PS) && !skb->requeue) { + if (!test_sta_flags(sta, WLAN_STA_PS) && + !(info->flags & IEEE80211_TX_INTFL_RETRIED)) { /* Software retry the packet once */ - skb->requeue = 1; - ieee80211_remove_tx_extra(local, sta->key, skb); - dev_queue_xmit(skb); + info->flags |= IEEE80211_TX_INTFL_RETRIED; + ieee80211_add_pending_skb(local, skb); return; } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 75412518510..de5bba7f910 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -797,8 +797,7 @@ static int ap_sta_ps_end(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; - struct sk_buff *skb; - int sent = 0; + int sent, buffered; atomic_dec(&sdata->bss->num_sta_ps); @@ -814,22 +813,16 @@ static int ap_sta_ps_end(struct sta_info *sta) #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ /* Send all buffered frames to the station */ - while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL) { - sent++; - skb->requeue = 1; - dev_queue_xmit(skb); - } - while ((skb = skb_dequeue(&sta->ps_tx_buf)) != NULL) { - local->total_ps_buffered--; - sent++; + sent = ieee80211_add_pending_skbs(local, &sta->tx_filtered); + buffered = ieee80211_add_pending_skbs(local, &sta->ps_tx_buf); + sent += buffered; + local->total_ps_buffered -= buffered; + #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - printk(KERN_DEBUG "%s: STA %pM aid %d send PS frame " - "since STA not sleeping anymore\n", sdata->dev->name, - sta->sta.addr, sta->sta.aid); + printk(KERN_DEBUG "%s: STA %pM aid %d sending %d filtered/%d PS frames " + "since STA not sleeping anymore\n", sdata->dev->name, + sta->sta.addr, sta->sta.aid, sent - buffered, buffered); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ - skb->requeue = 1; - dev_queue_xmit(skb); - } return sent; } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 1436f747531..bfaa9ce3314 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -400,6 +400,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) sta_info_set_tim_bit(sta); info->control.jiffies = jiffies; + info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; skb_queue_tail(&sta->ps_tx_buf, tx->skb); return TX_QUEUED; } @@ -420,7 +421,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) * frame filtering and keeps a station blacklist on its own * (e.g: p54), so that frames can be delivered unimpeded. * - * Note: It should be save to disable the filter now. + * Note: It should be safe to disable the filter now. * As, it is really unlikely that we still have any pending * frame for this station in the hw's buffers/fifos left, * that is not rejected with a unsuccessful tx_status yet. diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 22f63815fb3..66ce96a69f3 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -341,6 +341,52 @@ void ieee80211_stop_queue(struct ieee80211_hw *hw, int queue) } EXPORT_SYMBOL(ieee80211_stop_queue); +void ieee80211_add_pending_skb(struct ieee80211_local *local, + struct sk_buff *skb) +{ + struct ieee80211_hw *hw = &local->hw; + unsigned long flags; + int queue = skb_get_queue_mapping(skb); + + spin_lock_irqsave(&local->queue_stop_reason_lock, flags); + __ieee80211_stop_queue(hw, queue, IEEE80211_QUEUE_STOP_REASON_SKB_ADD); + __ieee80211_stop_queue(hw, queue, IEEE80211_QUEUE_STOP_REASON_PENDING); + skb_queue_tail(&local->pending[queue], skb); + __ieee80211_wake_queue(hw, queue, IEEE80211_QUEUE_STOP_REASON_SKB_ADD); + spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); +} + +int ieee80211_add_pending_skbs(struct ieee80211_local *local, + struct sk_buff_head *skbs) +{ + struct ieee80211_hw *hw = &local->hw; + struct sk_buff *skb; + unsigned long flags; + int queue, ret = 0, i; + + spin_lock_irqsave(&local->queue_stop_reason_lock, flags); + for (i = 0; i < hw->queues; i++) + __ieee80211_stop_queue(hw, i, + IEEE80211_QUEUE_STOP_REASON_SKB_ADD); + + while ((skb = skb_dequeue(skbs))) { + ret++; + queue = skb_get_queue_mapping(skb); + skb_queue_tail(&local->pending[queue], skb); + } + + for (i = 0; i < hw->queues; i++) { + if (ret) + __ieee80211_stop_queue(hw, i, + IEEE80211_QUEUE_STOP_REASON_PENDING); + __ieee80211_wake_queue(hw, i, + IEEE80211_QUEUE_STOP_REASON_SKB_ADD); + } + spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); + + return ret; +} + void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, enum queue_stop_reason reason) { diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 694343b9102..116a923b14d 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -101,7 +101,7 @@ u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb) * Now we know the 1d priority, fill in the QoS header if * there is one (and we haven't done this before). */ - if (!skb->requeue && ieee80211_is_data_qos(hdr->frame_control)) { + if (ieee80211_is_data_qos(hdr->frame_control)) { u8 *p = ieee80211_get_qos_ctl(hdr); u8 ack_policy = 0; tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; -- cgit v1.2.3-70-g09d2 From b3fa1329eaf2a7b97124dacf5b663fd51346ac19 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Mon, 8 Jun 2009 13:27:27 +0100 Subject: rfkill: remove set_global_sw_state rfkill_set_global_sw_state() (previously rfkill_set_default()) will no longer be exported by the rewritten rfkill core. Instead, platform drivers which can provide persistent soft-rfkill state across power-down/reboot should indicate their initial state by calling rfkill_set_sw_state() before registration. Otherwise, they will be initialized to a default value during registration by a set_block call. We remove existing calls to rfkill_set_sw_state() which happen before registration, since these had no effect in the old model. If these drivers do have persistent state, the calls can be put back (subject to testing :-). This affects hp-wmi and acer-wmi. Drivers with persistent state will affect the global state only if rfkill-input is enabled. This is required, otherwise booting with wireless soft-blocked and pressing the wireless-toggle key once would have no apparent effect. This special case will be removed in future along with rfkill-input, in favour of a more flexible userspace daemon (see Documentation/feature-removal-schedule.txt). Now rfkill_global_states[n].def is only used to preserve global states over EPO, it is renamed to ".sav". Signed-off-by: Alan Jenkins Acked-by: Henrique de Moraes Holschuh Signed-off-by: John W. Linville --- drivers/platform/x86/acer-wmi.c | 3 -- drivers/platform/x86/eeepc-laptop.c | 8 ++-- drivers/platform/x86/hp-wmi.c | 4 -- drivers/platform/x86/thinkpad_acpi.c | 31 +++++++------- include/linux/rfkill.h | 28 ++++--------- net/rfkill/core.c | 81 +++++++++++++----------------------- 6 files changed, 56 insertions(+), 99 deletions(-) (limited to 'include') diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c index b618fa51db2..09a503e5da6 100644 --- a/drivers/platform/x86/acer-wmi.c +++ b/drivers/platform/x86/acer-wmi.c @@ -988,7 +988,6 @@ static struct rfkill *acer_rfkill_register(struct device *dev, char *name, u32 cap) { int err; - u32 state; struct rfkill *rfkill_dev; rfkill_dev = rfkill_alloc(name, dev, type, @@ -996,8 +995,6 @@ static struct rfkill *acer_rfkill_register(struct device *dev, (void *)(unsigned long)cap); if (!rfkill_dev) return ERR_PTR(-ENOMEM); - get_u32(&state, cap); - rfkill_set_sw_state(rfkill_dev, !state); err = rfkill_register(rfkill_dev); if (err) { diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c index 1208d0cedd1..03bf522bd7a 100644 --- a/drivers/platform/x86/eeepc-laptop.c +++ b/drivers/platform/x86/eeepc-laptop.c @@ -675,8 +675,8 @@ static int eeepc_hotk_add(struct acpi_device *device) if (!ehotk->eeepc_wlan_rfkill) goto wlan_fail; - rfkill_set_global_sw_state(RFKILL_TYPE_WLAN, - get_acpi(CM_ASL_WLAN) != 1); + rfkill_set_sw_state(ehotk->eeepc_wlan_rfkill, + get_acpi(CM_ASL_WLAN) != 1); result = rfkill_register(ehotk->eeepc_wlan_rfkill); if (result) goto wlan_fail; @@ -693,8 +693,8 @@ static int eeepc_hotk_add(struct acpi_device *device) if (!ehotk->eeepc_bluetooth_rfkill) goto bluetooth_fail; - rfkill_set_global_sw_state(RFKILL_TYPE_BLUETOOTH, - get_acpi(CM_ASL_BLUETOOTH) != 1); + rfkill_set_sw_state(ehotk->eeepc_bluetooth_rfkill, + get_acpi(CM_ASL_BLUETOOTH) != 1); result = rfkill_register(ehotk->eeepc_bluetooth_rfkill); if (result) goto bluetooth_fail; diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index 8d931145cbf..16fffe44e33 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -422,7 +422,6 @@ static int __init hp_wmi_bios_setup(struct platform_device *device) RFKILL_TYPE_WLAN, &hp_wmi_rfkill_ops, (void *) 0); - rfkill_set_sw_state(wifi_rfkill, hp_wmi_wifi_state()); err = rfkill_register(wifi_rfkill); if (err) goto register_wifi_error; @@ -433,8 +432,6 @@ static int __init hp_wmi_bios_setup(struct platform_device *device) RFKILL_TYPE_BLUETOOTH, &hp_wmi_rfkill_ops, (void *) 1); - rfkill_set_sw_state(bluetooth_rfkill, - hp_wmi_bluetooth_state()); err = rfkill_register(bluetooth_rfkill); if (err) goto register_bluetooth_error; @@ -445,7 +442,6 @@ static int __init hp_wmi_bios_setup(struct platform_device *device) RFKILL_TYPE_WWAN, &hp_wmi_rfkill_ops, (void *) 2); - rfkill_set_sw_state(wwan_rfkill, hp_wmi_wwan_state()); err = rfkill_register(wwan_rfkill); if (err) goto register_wwan_err; diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index cfcafa4e947..86e958539f4 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -1168,21 +1168,6 @@ static int __init tpacpi_new_rfkill(const enum tpacpi_rfk_id id, BUG_ON(id >= TPACPI_RFK_SW_MAX || tpacpi_rfkill_switches[id]); - initial_sw_status = (tp_rfkops->get_status)(); - if (initial_sw_status < 0) { - printk(TPACPI_ERR - "failed to read initial state for %s, error %d; " - "will turn radio off\n", name, initial_sw_status); - } else { - initial_sw_state = (initial_sw_status == TPACPI_RFK_RADIO_OFF); - if (set_default) { - /* try to set the initial state as the default for the - * rfkill type, since we ask the firmware to preserve - * it across S5 in NVRAM */ - rfkill_set_global_sw_state(rfktype, initial_sw_state); - } - } - atp_rfk = kzalloc(sizeof(struct tpacpi_rfk), GFP_KERNEL); if (atp_rfk) atp_rfk->rfkill = rfkill_alloc(name, @@ -1200,8 +1185,20 @@ static int __init tpacpi_new_rfkill(const enum tpacpi_rfk_id id, atp_rfk->id = id; atp_rfk->ops = tp_rfkops; - rfkill_set_states(atp_rfk->rfkill, initial_sw_state, - tpacpi_rfk_check_hwblock_state()); + initial_sw_status = (tp_rfkops->get_status)(); + if (initial_sw_status < 0) { + printk(TPACPI_ERR + "failed to read initial state for %s, error %d\n", + name, initial_sw_status); + } else { + initial_sw_state = (initial_sw_status == TPACPI_RFK_RADIO_OFF); + if (set_default) { + /* try to keep the initial state, since we ask the + * firmware to preserve it across S5 in NVRAM */ + rfkill_set_sw_state(atp_rfk->rfkill, initial_sw_state); + } + } + rfkill_set_hw_state(atp_rfk->rfkill, tpacpi_rfk_check_hwblock_state()); res = rfkill_register(atp_rfk->rfkill); if (res < 0) { diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index d7e818ad0bc..c1dca0b8138 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -157,8 +157,14 @@ struct rfkill * __must_check rfkill_alloc(const char *name, * @rfkill: rfkill structure to be registered * * This function should be called by the transmitter driver to register - * the rfkill structure needs to be registered. Before calling this function - * the driver needs to be ready to service method calls from rfkill. + * the rfkill structure. Before calling this function the driver needs + * to be ready to service method calls from rfkill. + * + * If the software blocked state is not set before registration, + * set_block will be called to initialize it to a default value. + * + * If the hardware blocked state is not set before registration, + * it is assumed to be unblocked. */ int __must_check rfkill_register(struct rfkill *rfkill); @@ -250,19 +256,6 @@ bool rfkill_set_sw_state(struct rfkill *rfkill, bool blocked); */ void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw); -/** - * rfkill_set_global_sw_state - set global sw block default - * @type: rfkill type to set default for - * @blocked: default to set - * - * This function sets the global default -- use at boot if your platform has - * an rfkill switch. If not early enough this call may be ignored. - * - * XXX: instead of ignoring -- how about just updating all currently - * registered drivers? - */ -void rfkill_set_global_sw_state(const enum rfkill_type type, bool blocked); - /** * rfkill_blocked - query rfkill block * @@ -317,11 +310,6 @@ static inline void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw) { } -static inline void rfkill_set_global_sw_state(const enum rfkill_type type, - bool blocked) -{ -} - static inline bool rfkill_blocked(struct rfkill *rfkill) { return false; diff --git a/net/rfkill/core.c b/net/rfkill/core.c index e161ebc40a3..fa430bd03f1 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -57,6 +57,7 @@ struct rfkill { bool registered; bool suspended; + bool persistent; const struct rfkill_ops *ops; void *data; @@ -116,11 +117,9 @@ MODULE_PARM_DESC(default_state, "Default initial state for all radio types, 0 = radio off"); static struct { - bool cur, def; + bool cur, sav; } rfkill_global_states[NUM_RFKILL_TYPES]; -static unsigned long rfkill_states_default_locked; - static bool rfkill_epo_lock_active; @@ -392,7 +391,7 @@ void rfkill_epo(void) rfkill_set_block(rfkill, true); for (i = 0; i < NUM_RFKILL_TYPES; i++) { - rfkill_global_states[i].def = rfkill_global_states[i].cur; + rfkill_global_states[i].sav = rfkill_global_states[i].cur; rfkill_global_states[i].cur = true; } @@ -417,7 +416,7 @@ void rfkill_restore_states(void) rfkill_epo_lock_active = false; for (i = 0; i < NUM_RFKILL_TYPES; i++) - __rfkill_switch_all(i, rfkill_global_states[i].def); + __rfkill_switch_all(i, rfkill_global_states[i].sav); mutex_unlock(&rfkill_global_mutex); } @@ -464,29 +463,6 @@ bool rfkill_get_global_sw_state(const enum rfkill_type type) } #endif -void rfkill_set_global_sw_state(const enum rfkill_type type, bool blocked) -{ - BUG_ON(type == RFKILL_TYPE_ALL); - - mutex_lock(&rfkill_global_mutex); - - /* don't allow unblock when epo */ - if (rfkill_epo_lock_active && !blocked) - goto out; - - /* too late */ - if (rfkill_states_default_locked & BIT(type)) - goto out; - - rfkill_states_default_locked |= BIT(type); - - rfkill_global_states[type].cur = blocked; - rfkill_global_states[type].def = blocked; - out: - mutex_unlock(&rfkill_global_mutex); -} -EXPORT_SYMBOL(rfkill_set_global_sw_state); - bool rfkill_set_hw_state(struct rfkill *rfkill, bool blocked) { @@ -532,13 +508,14 @@ bool rfkill_set_sw_state(struct rfkill *rfkill, bool blocked) blocked = blocked || hwblock; spin_unlock_irqrestore(&rfkill->lock, flags); - if (!rfkill->registered) - return blocked; + if (!rfkill->registered) { + rfkill->persistent = true; + } else { + if (prev != blocked && !hwblock) + schedule_work(&rfkill->uevent_work); - if (prev != blocked && !hwblock) - schedule_work(&rfkill->uevent_work); - - rfkill_led_trigger_event(rfkill); + rfkill_led_trigger_event(rfkill); + } return blocked; } @@ -563,13 +540,14 @@ void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw) spin_unlock_irqrestore(&rfkill->lock, flags); - if (!rfkill->registered) - return; - - if (swprev != sw || hwprev != hw) - schedule_work(&rfkill->uevent_work); + if (!rfkill->registered) { + rfkill->persistent = true; + } else { + if (swprev != sw || hwprev != hw) + schedule_work(&rfkill->uevent_work); - rfkill_led_trigger_event(rfkill); + rfkill_led_trigger_event(rfkill); + } } EXPORT_SYMBOL(rfkill_set_states); @@ -888,15 +866,6 @@ int __must_check rfkill_register(struct rfkill *rfkill) dev_set_name(dev, "rfkill%lu", rfkill_no); rfkill_no++; - if (!(rfkill_states_default_locked & BIT(rfkill->type))) { - /* first of its kind */ - BUILD_BUG_ON(NUM_RFKILL_TYPES > - sizeof(rfkill_states_default_locked) * 8); - rfkill_states_default_locked |= BIT(rfkill->type); - rfkill_global_states[rfkill->type].cur = - rfkill_global_states[rfkill->type].def; - } - list_add_tail(&rfkill->node, &rfkill_list); error = device_add(dev); @@ -916,7 +885,17 @@ int __must_check rfkill_register(struct rfkill *rfkill) if (rfkill->ops->poll) schedule_delayed_work(&rfkill->poll_work, round_jiffies_relative(POLL_INTERVAL)); - schedule_work(&rfkill->sync_work); + + if (!rfkill->persistent || rfkill_epo_lock_active) { + schedule_work(&rfkill->sync_work); + } else { +#ifdef CONFIG_RFKILL_INPUT + bool soft_blocked = !!(rfkill->state & RFKILL_BLOCK_SW); + + if (!atomic_read(&rfkill_input_disabled)) + __rfkill_switch_all(rfkill->type, soft_blocked); +#endif + } rfkill_send_events(rfkill, RFKILL_OP_ADD); @@ -1193,7 +1172,7 @@ static int __init rfkill_init(void) int i; for (i = 0; i < NUM_RFKILL_TYPES; i++) - rfkill_global_states[i].def = !rfkill_default_state; + rfkill_global_states[i].cur = !rfkill_default_state; error = class_register(&rfkill_class); if (error) -- cgit v1.2.3-70-g09d2 From 908209c160da8ecb68052111972b7a21310eac3f Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Mon, 8 Jun 2009 13:12:23 +0100 Subject: rfkill: don't impose global states on resume (just restore the previous states) Once rfkill-input is disabled, the "global" states will only be used as default initial states. Since the states will always be the same after resume, we shouldn't generate events on resume. Signed-off-by: Alan Jenkins Signed-off-by: John W. Linville --- include/linux/rfkill.h | 7 ++++--- net/rfkill/core.c | 6 +----- 2 files changed, 5 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index c1dca0b8138..16e39c7a67f 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -212,7 +212,7 @@ void rfkill_destroy(struct rfkill *rfkill); * * rfkill drivers that get events when the hard-blocked state changes * use this function to notify the rfkill core (and through that also - * userspace) of the current state -- they should also use this after + * userspace) of the current state. They should also use this after * resume if the state could have changed. * * You need not (but may) call this function if poll_state is assigned. @@ -234,8 +234,9 @@ bool __must_check rfkill_set_hw_state(struct rfkill *rfkill, bool blocked); * rfkill drivers that get events when the soft-blocked state changes * (yes, some platforms directly act on input but allow changing again) * use this function to notify the rfkill core (and through that also - * userspace) of the current state -- they should also use this after - * resume if the state could have changed. + * userspace) of the current state. It is not necessary to notify on + * resume; since hibernation can always change the soft-blocked state, + * the rfkill core will unconditionally restore the previous state. * * This function can be called in any context, even from within rfkill * callbacks. diff --git a/net/rfkill/core.c b/net/rfkill/core.c index fa430bd03f1..4e68ab439d5 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -728,15 +728,11 @@ static int rfkill_resume(struct device *dev) struct rfkill *rfkill = to_rfkill(dev); bool cur; - mutex_lock(&rfkill_global_mutex); - cur = rfkill_global_states[rfkill->type].cur; + cur = !!(rfkill->state & RFKILL_BLOCK_SW); rfkill_set_block(rfkill, cur); - mutex_unlock(&rfkill_global_mutex); rfkill->suspended = false; - schedule_work(&rfkill->uevent_work); - rfkill_resume_polling(rfkill); return 0; -- cgit v1.2.3-70-g09d2 From 6ff9a64d2aaa6eae396adc95e9c91c0cbfa6dbe4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 10 Jun 2009 14:28:34 -0400 Subject: tracing: do not translate event helper macros in print format By moving the macro that creates the print format code above the defining of the event macro helpers (__get_str, __print_symbolic, and __get_dynamic_array), we get a little cleaner print format. Instead of: (char *)((void *)REC + REC->__data_loc_name) we get: __get_str(name) Instead of: ({ static const struct trace_print_flags symbols[] = { { HI_SOFTIRQ, "HI" }, { we get: __print_symbolic(REC->vec, { HI_SOFTIRQ, "HI" }, { Signed-off-by: Steven Rostedt --- include/trace/ftrace.h | 158 +++++++++++++++++++++++++------------------------ 1 file changed, 81 insertions(+), 77 deletions(-) (limited to 'include') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 40ede4db4d8..1867553c61e 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -80,6 +80,87 @@ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) +/* + * Setup the showing format of trace point. + * + * int + * ftrace_format_##call(struct trace_seq *s) + * { + * struct ftrace_raw_##call field; + * int ret; + * + * ret = trace_seq_printf(s, #type " " #item ";" + * " offset:%u; size:%u;\n", + * offsetof(struct ftrace_raw_##call, item), + * sizeof(field.type)); + * + * } + */ + +#undef TP_STRUCT__entry +#define TP_STRUCT__entry(args...) args + +#undef __field +#define __field(type, item) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ + "offset:%u;\tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), item), \ + (unsigned int)sizeof(field.item)); \ + if (!ret) \ + return 0; + +#undef __array +#define __array(type, item, len) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ + "offset:%u;\tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), item), \ + (unsigned int)sizeof(field.item)); \ + if (!ret) \ + return 0; + +#undef __dynamic_array +#define __dynamic_array(type, item, len) \ + ret = trace_seq_printf(s, "\tfield:__data_loc " #item ";\t" \ + "offset:%u;\tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), \ + __data_loc_##item), \ + (unsigned int)sizeof(field.__data_loc_##item)); \ + if (!ret) \ + return 0; + +#undef __string +#define __string(item, src) __dynamic_array(char, item, -1) + +#undef __entry +#define __entry REC + +#undef __print_symbolic +#undef __get_dynamic_array +#undef __get_str + +#undef TP_printk +#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args) + +#undef TP_fast_assign +#define TP_fast_assign(args...) args + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ +static int \ +ftrace_format_##call(struct trace_seq *s) \ +{ \ + struct ftrace_raw_##call field __attribute__((unused)); \ + int ret = 0; \ + \ + tstruct; \ + \ + trace_seq_printf(s, "\nprint fmt: " print); \ + \ + return ret; \ +} + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + /* * Stage 3 of the trace events. * @@ -179,83 +260,6 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -/* - * Setup the showing format of trace point. - * - * int - * ftrace_format_##call(struct trace_seq *s) - * { - * struct ftrace_raw_##call field; - * int ret; - * - * ret = trace_seq_printf(s, #type " " #item ";" - * " offset:%u; size:%u;\n", - * offsetof(struct ftrace_raw_##call, item), - * sizeof(field.type)); - * - * } - */ - -#undef TP_STRUCT__entry -#define TP_STRUCT__entry(args...) args - -#undef __field -#define __field(type, item) \ - ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%u;\tsize:%u;\n", \ - (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item)); \ - if (!ret) \ - return 0; - -#undef __array -#define __array(type, item, len) \ - ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ - "offset:%u;\tsize:%u;\n", \ - (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item)); \ - if (!ret) \ - return 0; - -#undef __dynamic_array -#define __dynamic_array(type, item, len) \ - ret = trace_seq_printf(s, "\tfield:__data_loc " #item ";\t" \ - "offset:%u;\tsize:%u;\n", \ - (unsigned int)offsetof(typeof(field), \ - __data_loc_##item), \ - (unsigned int)sizeof(field.__data_loc_##item)); \ - if (!ret) \ - return 0; - -#undef __string -#define __string(item, src) __dynamic_array(char, item, -1) - -#undef __entry -#define __entry REC - -#undef TP_printk -#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args) - -#undef TP_fast_assign -#define TP_fast_assign(args...) args - -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ -static int \ -ftrace_format_##call(struct trace_seq *s) \ -{ \ - struct ftrace_raw_##call field __attribute__((unused)); \ - int ret = 0; \ - \ - tstruct; \ - \ - trace_seq_printf(s, "\nprint fmt: " print); \ - \ - return ret; \ -} - -#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) - #undef __field #define __field(type, item) \ ret = trace_define_field(event_call, #type, #item, \ -- cgit v1.2.3-70-g09d2 From d9c7d394a8ebacb60097b192939ae9f15235225e Mon Sep 17 00:00:00 2001 From: Nikanth Karthikesan Date: Wed, 10 Jun 2009 12:57:06 -0700 Subject: block: prevent possible io_context->refcount overflow Currently io_context has an atomic_t(32-bit) as refcount. In the case of cfq, for each device against whcih a task does I/O, a reference to the io_context would be taken. And when there are multiple process sharing io_contexts(CLONE_IO) would also have a reference to the same io_context. Theoretically the possible maximum number of processes sharing the same io_context + the number of disks/cfq_data referring to the same io_context can overflow the 32-bit counter on a very high-end machine. Even though it is an improbable case, let us make it atomic_long_t. Signed-off-by: Nikanth Karthikesan Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- block/blk-ioc.c | 12 ++++++------ block/cfq-iosched.c | 2 +- include/linux/iocontext.h | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/block/blk-ioc.c b/block/blk-ioc.c index 012f065ac8e..d4ed6000147 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -35,9 +35,9 @@ int put_io_context(struct io_context *ioc) if (ioc == NULL) return 1; - BUG_ON(atomic_read(&ioc->refcount) == 0); + BUG_ON(atomic_long_read(&ioc->refcount) == 0); - if (atomic_dec_and_test(&ioc->refcount)) { + if (atomic_long_dec_and_test(&ioc->refcount)) { rcu_read_lock(); if (ioc->aic && ioc->aic->dtor) ioc->aic->dtor(ioc->aic); @@ -90,7 +90,7 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node) ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node); if (ret) { - atomic_set(&ret->refcount, 1); + atomic_long_set(&ret->refcount, 1); atomic_set(&ret->nr_tasks, 1); spin_lock_init(&ret->lock); ret->ioprio_changed = 0; @@ -151,7 +151,7 @@ struct io_context *get_io_context(gfp_t gfp_flags, int node) ret = current_io_context(gfp_flags, node); if (unlikely(!ret)) break; - } while (!atomic_inc_not_zero(&ret->refcount)); + } while (!atomic_long_inc_not_zero(&ret->refcount)); return ret; } @@ -163,8 +163,8 @@ void copy_io_context(struct io_context **pdst, struct io_context **psrc) struct io_context *dst = *pdst; if (src) { - BUG_ON(atomic_read(&src->refcount) == 0); - atomic_inc(&src->refcount); + BUG_ON(atomic_long_read(&src->refcount) == 0); + atomic_long_inc(&src->refcount); put_io_context(dst); *pdst = src; } diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 99ac4304d71..ef2f72d4243 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1282,7 +1282,7 @@ static void cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq) if (!cfqd->active_cic) { struct cfq_io_context *cic = RQ_CIC(rq); - atomic_inc(&cic->ioc->refcount); + atomic_long_inc(&cic->ioc->refcount); cfqd->active_cic = cic; } } diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 08b987bccf8..dd05434fa45 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -64,7 +64,7 @@ struct cfq_io_context { * and kmalloc'ed. These could be shared between processes. */ struct io_context { - atomic_t refcount; + atomic_long_t refcount; atomic_t nr_tasks; /* all the fields below are protected by this lock */ @@ -91,8 +91,8 @@ static inline struct io_context *ioc_task_link(struct io_context *ioc) * if ref count is zero, don't allow sharing (ioc is going away, it's * a race). */ - if (ioc && atomic_inc_not_zero(&ioc->refcount)) { - atomic_inc(&ioc->nr_tasks); + if (ioc && atomic_long_inc_not_zero(&ioc->refcount)) { + atomic_long_inc(&ioc->refcount); return ioc; } -- cgit v1.2.3-70-g09d2 From b43d65f7e818485664037a46367cfb15af05bd8c Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 9 Jun 2009 08:11:42 +0100 Subject: [ARM] 5546/1: ARM PL022 SSP/SPI driver v3 This adds a driver for the ARM PL022 PrimeCell SSP/SPI driver found in the U300 platforms as well as in some ARM reference hardware, and in a modified version on the Nomadik board. Reviewed-by: Alessandro Rubini Reviewed-by: Russell King Reviewed-by: Baruch Siach Signed-off-by: Linus Walleij Signed-off-by: Russell King --- drivers/spi/Kconfig | 9 + drivers/spi/Makefile | 1 + drivers/spi/amba-pl022.c | 1866 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/amba/pl022.h | 264 +++++++ 4 files changed, 2140 insertions(+) create mode 100644 drivers/spi/amba-pl022.c create mode 100644 include/linux/amba/pl022.h (limited to 'include') diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig index 7c61251bea6..8e7c17e4461 100644 --- a/drivers/spi/Kconfig +++ b/drivers/spi/Kconfig @@ -171,6 +171,15 @@ config SPI_ORION help This enables using the SPI master controller on the Orion chips. +config SPI_PL022 + tristate "ARM AMBA PL022 SSP controller (EXPERIMENTAL)" + depends on ARM_AMBA && EXPERIMENTAL + default y if MACH_U300 + help + This selects the ARM(R) AMBA(R) PrimeCell PL022 SSP + controller. If you have an embedded system with an AMBA(R) + bus and a PL022 controller, say Y or M here. + config SPI_PXA2XX tristate "PXA2xx SSP SPI master" depends on ARCH_PXA && EXPERIMENTAL diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile index 5d0451936d8..ecfadb18048 100644 --- a/drivers/spi/Makefile +++ b/drivers/spi/Makefile @@ -23,6 +23,7 @@ obj-$(CONFIG_SPI_PXA2XX) += pxa2xx_spi.o obj-$(CONFIG_SPI_OMAP_UWIRE) += omap_uwire.o obj-$(CONFIG_SPI_OMAP24XX) += omap2_mcspi.o obj-$(CONFIG_SPI_ORION) += orion_spi.o +obj-$(CONFIG_SPI_PL022) += amba-pl022.o obj-$(CONFIG_SPI_MPC52xx_PSC) += mpc52xx_psc_spi.o obj-$(CONFIG_SPI_MPC83xx) += spi_mpc83xx.o obj-$(CONFIG_SPI_S3C24XX_GPIO) += spi_s3c24xx_gpio.o diff --git a/drivers/spi/amba-pl022.c b/drivers/spi/amba-pl022.c new file mode 100644 index 00000000000..da76797ce8b --- /dev/null +++ b/drivers/spi/amba-pl022.c @@ -0,0 +1,1866 @@ +/* + * drivers/spi/amba-pl022.c + * + * A driver for the ARM PL022 PrimeCell SSP/SPI bus master. + * + * Copyright (C) 2008-2009 ST-Ericsson AB + * Copyright (C) 2006 STMicroelectronics Pvt. Ltd. + * + * Author: Linus Walleij + * + * Initial version inspired by: + * linux-2.6.17-rc3-mm1/drivers/spi/pxa2xx_spi.c + * Initial adoption to PL022 by: + * Sachin Verma + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* + * TODO: + * - add timeout on polled transfers + * - add generic DMA framework support + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * This macro is used to define some register default values. + * reg is masked with mask, the OR:ed with an (again masked) + * val shifted sb steps to the left. + */ +#define SSP_WRITE_BITS(reg, val, mask, sb) \ + ((reg) = (((reg) & ~(mask)) | (((val)<<(sb)) & (mask)))) + +/* + * This macro is also used to define some default values. + * It will just shift val by sb steps to the left and mask + * the result with mask. + */ +#define GEN_MASK_BITS(val, mask, sb) \ + (((val)<<(sb)) & (mask)) + +#define DRIVE_TX 0 +#define DO_NOT_DRIVE_TX 1 + +#define DO_NOT_QUEUE_DMA 0 +#define QUEUE_DMA 1 + +#define RX_TRANSFER 1 +#define TX_TRANSFER 2 + +/* + * Macros to access SSP Registers with their offsets + */ +#define SSP_CR0(r) (r + 0x000) +#define SSP_CR1(r) (r + 0x004) +#define SSP_DR(r) (r + 0x008) +#define SSP_SR(r) (r + 0x00C) +#define SSP_CPSR(r) (r + 0x010) +#define SSP_IMSC(r) (r + 0x014) +#define SSP_RIS(r) (r + 0x018) +#define SSP_MIS(r) (r + 0x01C) +#define SSP_ICR(r) (r + 0x020) +#define SSP_DMACR(r) (r + 0x024) +#define SSP_ITCR(r) (r + 0x080) +#define SSP_ITIP(r) (r + 0x084) +#define SSP_ITOP(r) (r + 0x088) +#define SSP_TDR(r) (r + 0x08C) + +#define SSP_PID0(r) (r + 0xFE0) +#define SSP_PID1(r) (r + 0xFE4) +#define SSP_PID2(r) (r + 0xFE8) +#define SSP_PID3(r) (r + 0xFEC) + +#define SSP_CID0(r) (r + 0xFF0) +#define SSP_CID1(r) (r + 0xFF4) +#define SSP_CID2(r) (r + 0xFF8) +#define SSP_CID3(r) (r + 0xFFC) + +/* + * SSP Control Register 0 - SSP_CR0 + */ +#define SSP_CR0_MASK_DSS (0x1FUL << 0) +#define SSP_CR0_MASK_HALFDUP (0x1UL << 5) +#define SSP_CR0_MASK_SPO (0x1UL << 6) +#define SSP_CR0_MASK_SPH (0x1UL << 7) +#define SSP_CR0_MASK_SCR (0xFFUL << 8) +#define SSP_CR0_MASK_CSS (0x1FUL << 16) +#define SSP_CR0_MASK_FRF (0x3UL << 21) + +/* + * SSP Control Register 0 - SSP_CR1 + */ +#define SSP_CR1_MASK_LBM (0x1UL << 0) +#define SSP_CR1_MASK_SSE (0x1UL << 1) +#define SSP_CR1_MASK_MS (0x1UL << 2) +#define SSP_CR1_MASK_SOD (0x1UL << 3) +#define SSP_CR1_MASK_RENDN (0x1UL << 4) +#define SSP_CR1_MASK_TENDN (0x1UL << 5) +#define SSP_CR1_MASK_MWAIT (0x1UL << 6) +#define SSP_CR1_MASK_RXIFLSEL (0x7UL << 7) +#define SSP_CR1_MASK_TXIFLSEL (0x7UL << 10) + +/* + * SSP Data Register - SSP_DR + */ +#define SSP_DR_MASK_DATA 0xFFFFFFFF + +/* + * SSP Status Register - SSP_SR + */ +#define SSP_SR_MASK_TFE (0x1UL << 0) /* Transmit FIFO empty */ +#define SSP_SR_MASK_TNF (0x1UL << 1) /* Transmit FIFO not full */ +#define SSP_SR_MASK_RNE (0x1UL << 2) /* Receive FIFO not empty */ +#define SSP_SR_MASK_RFF (0x1UL << 3) /* Receive FIFO full */ +#define SSP_SR_MASK_BSY (0x1UL << 4) /* Busy Flag */ + +/* + * SSP Clock Prescale Register - SSP_CPSR + */ +#define SSP_CPSR_MASK_CPSDVSR (0xFFUL << 0) + +/* + * SSP Interrupt Mask Set/Clear Register - SSP_IMSC + */ +#define SSP_IMSC_MASK_RORIM (0x1UL << 0) /* Receive Overrun Interrupt mask */ +#define SSP_IMSC_MASK_RTIM (0x1UL << 1) /* Receive timeout Interrupt mask */ +#define SSP_IMSC_MASK_RXIM (0x1UL << 2) /* Receive FIFO Interrupt mask */ +#define SSP_IMSC_MASK_TXIM (0x1UL << 3) /* Transmit FIFO Interrupt mask */ + +/* + * SSP Raw Interrupt Status Register - SSP_RIS + */ +/* Receive Overrun Raw Interrupt status */ +#define SSP_RIS_MASK_RORRIS (0x1UL << 0) +/* Receive Timeout Raw Interrupt status */ +#define SSP_RIS_MASK_RTRIS (0x1UL << 1) +/* Receive FIFO Raw Interrupt status */ +#define SSP_RIS_MASK_RXRIS (0x1UL << 2) +/* Transmit FIFO Raw Interrupt status */ +#define SSP_RIS_MASK_TXRIS (0x1UL << 3) + +/* + * SSP Masked Interrupt Status Register - SSP_MIS + */ +/* Receive Overrun Masked Interrupt status */ +#define SSP_MIS_MASK_RORMIS (0x1UL << 0) +/* Receive Timeout Masked Interrupt status */ +#define SSP_MIS_MASK_RTMIS (0x1UL << 1) +/* Receive FIFO Masked Interrupt status */ +#define SSP_MIS_MASK_RXMIS (0x1UL << 2) +/* Transmit FIFO Masked Interrupt status */ +#define SSP_MIS_MASK_TXMIS (0x1UL << 3) + +/* + * SSP Interrupt Clear Register - SSP_ICR + */ +/* Receive Overrun Raw Clear Interrupt bit */ +#define SSP_ICR_MASK_RORIC (0x1UL << 0) +/* Receive Timeout Clear Interrupt bit */ +#define SSP_ICR_MASK_RTIC (0x1UL << 1) + +/* + * SSP DMA Control Register - SSP_DMACR + */ +/* Receive DMA Enable bit */ +#define SSP_DMACR_MASK_RXDMAE (0x1UL << 0) +/* Transmit DMA Enable bit */ +#define SSP_DMACR_MASK_TXDMAE (0x1UL << 1) + +/* + * SSP Integration Test control Register - SSP_ITCR + */ +#define SSP_ITCR_MASK_ITEN (0x1UL << 0) +#define SSP_ITCR_MASK_TESTFIFO (0x1UL << 1) + +/* + * SSP Integration Test Input Register - SSP_ITIP + */ +#define ITIP_MASK_SSPRXD (0x1UL << 0) +#define ITIP_MASK_SSPFSSIN (0x1UL << 1) +#define ITIP_MASK_SSPCLKIN (0x1UL << 2) +#define ITIP_MASK_RXDMAC (0x1UL << 3) +#define ITIP_MASK_TXDMAC (0x1UL << 4) +#define ITIP_MASK_SSPTXDIN (0x1UL << 5) + +/* + * SSP Integration Test output Register - SSP_ITOP + */ +#define ITOP_MASK_SSPTXD (0x1UL << 0) +#define ITOP_MASK_SSPFSSOUT (0x1UL << 1) +#define ITOP_MASK_SSPCLKOUT (0x1UL << 2) +#define ITOP_MASK_SSPOEn (0x1UL << 3) +#define ITOP_MASK_SSPCTLOEn (0x1UL << 4) +#define ITOP_MASK_RORINTR (0x1UL << 5) +#define ITOP_MASK_RTINTR (0x1UL << 6) +#define ITOP_MASK_RXINTR (0x1UL << 7) +#define ITOP_MASK_TXINTR (0x1UL << 8) +#define ITOP_MASK_INTR (0x1UL << 9) +#define ITOP_MASK_RXDMABREQ (0x1UL << 10) +#define ITOP_MASK_RXDMASREQ (0x1UL << 11) +#define ITOP_MASK_TXDMABREQ (0x1UL << 12) +#define ITOP_MASK_TXDMASREQ (0x1UL << 13) + +/* + * SSP Test Data Register - SSP_TDR + */ +#define TDR_MASK_TESTDATA (0xFFFFFFFF) + +/* + * Message State + * we use the spi_message.state (void *) pointer to + * hold a single state value, that's why all this + * (void *) casting is done here. + */ +#define STATE_START ((void *) 0) +#define STATE_RUNNING ((void *) 1) +#define STATE_DONE ((void *) 2) +#define STATE_ERROR ((void *) -1) + +/* + * Queue State + */ +#define QUEUE_RUNNING (0) +#define QUEUE_STOPPED (1) +/* + * SSP State - Whether Enabled or Disabled + */ +#define SSP_DISABLED (0) +#define SSP_ENABLED (1) + +/* + * SSP DMA State - Whether DMA Enabled or Disabled + */ +#define SSP_DMA_DISABLED (0) +#define SSP_DMA_ENABLED (1) + +/* + * SSP Clock Defaults + */ +#define NMDK_SSP_DEFAULT_CLKRATE 0x2 +#define NMDK_SSP_DEFAULT_PRESCALE 0x40 + +/* + * SSP Clock Parameter ranges + */ +#define CPSDVR_MIN 0x02 +#define CPSDVR_MAX 0xFE +#define SCR_MIN 0x00 +#define SCR_MAX 0xFF + +/* + * SSP Interrupt related Macros + */ +#define DEFAULT_SSP_REG_IMSC 0x0UL +#define DISABLE_ALL_INTERRUPTS DEFAULT_SSP_REG_IMSC +#define ENABLE_ALL_INTERRUPTS (~DEFAULT_SSP_REG_IMSC) + +#define CLEAR_ALL_INTERRUPTS 0x3 + + +/* + * The type of reading going on on this chip + */ +enum ssp_reading { + READING_NULL, + READING_U8, + READING_U16, + READING_U32 +}; + +/** + * The type of writing going on on this chip + */ +enum ssp_writing { + WRITING_NULL, + WRITING_U8, + WRITING_U16, + WRITING_U32 +}; + +/** + * struct vendor_data - vendor-specific config parameters + * for PL022 derivates + * @fifodepth: depth of FIFOs (both) + * @max_bpw: maximum number of bits per word + * @unidir: supports unidirection transfers + */ +struct vendor_data { + int fifodepth; + int max_bpw; + bool unidir; +}; + +/** + * struct pl022 - This is the private SSP driver data structure + * @adev: AMBA device model hookup + * @phybase: The physical memory where the SSP device resides + * @virtbase: The virtual memory where the SSP is mapped + * @master: SPI framework hookup + * @master_info: controller-specific data from machine setup + * @regs: SSP controller register's virtual address + * @pump_messages: Work struct for scheduling work to the workqueue + * @lock: spinlock to syncronise access to driver data + * @workqueue: a workqueue on which any spi_message request is queued + * @busy: workqueue is busy + * @run: workqueue is running + * @pump_transfers: Tasklet used in Interrupt Transfer mode + * @cur_msg: Pointer to current spi_message being processed + * @cur_transfer: Pointer to current spi_transfer + * @cur_chip: pointer to current clients chip(assigned from controller_state) + * @tx: current position in TX buffer to be read + * @tx_end: end position in TX buffer to be read + * @rx: current position in RX buffer to be written + * @rx_end: end position in RX buffer to be written + * @readingtype: the type of read currently going on + * @writingtype: the type or write currently going on + */ +struct pl022 { + struct amba_device *adev; + struct vendor_data *vendor; + resource_size_t phybase; + void __iomem *virtbase; + struct clk *clk; + struct spi_master *master; + struct pl022_ssp_controller *master_info; + /* Driver message queue */ + struct workqueue_struct *workqueue; + struct work_struct pump_messages; + spinlock_t queue_lock; + struct list_head queue; + int busy; + int run; + /* Message transfer pump */ + struct tasklet_struct pump_transfers; + struct spi_message *cur_msg; + struct spi_transfer *cur_transfer; + struct chip_data *cur_chip; + void *tx; + void *tx_end; + void *rx; + void *rx_end; + enum ssp_reading read; + enum ssp_writing write; +}; + +/** + * struct chip_data - To maintain runtime state of SSP for each client chip + * @cr0: Value of control register CR0 of SSP + * @cr1: Value of control register CR1 of SSP + * @dmacr: Value of DMA control Register of SSP + * @cpsr: Value of Clock prescale register + * @n_bytes: how many bytes(power of 2) reqd for a given data width of client + * @enable_dma: Whether to enable DMA or not + * @write: function ptr to be used to write when doing xfer for this chip + * @read: function ptr to be used to read when doing xfer for this chip + * @cs_control: chip select callback provided by chip + * @xfer_type: polling/interrupt/DMA + * + * Runtime state of the SSP controller, maintained per chip, + * This would be set according to the current message that would be served + */ +struct chip_data { + u16 cr0; + u16 cr1; + u16 dmacr; + u16 cpsr; + u8 n_bytes; + u8 enable_dma:1; + enum ssp_reading read; + enum ssp_writing write; + void (*cs_control) (u32 command); + int xfer_type; +}; + +/** + * null_cs_control - Dummy chip select function + * @command: select/delect the chip + * + * If no chip select function is provided by client this is used as dummy + * chip select + */ +static void null_cs_control(u32 command) +{ + pr_debug("pl022: dummy chip select control, CS=0x%x\n", command); +} + +/** + * giveback - current spi_message is over, schedule next message and call + * callback of this message. Assumes that caller already + * set message->status; dma and pio irqs are blocked + * @pl022: SSP driver private data structure + */ +static void giveback(struct pl022 *pl022) +{ + struct spi_transfer *last_transfer; + unsigned long flags; + struct spi_message *msg; + void (*curr_cs_control) (u32 command); + + /* + * This local reference to the chip select function + * is needed because we set curr_chip to NULL + * as a step toward termininating the message. + */ + curr_cs_control = pl022->cur_chip->cs_control; + spin_lock_irqsave(&pl022->queue_lock, flags); + msg = pl022->cur_msg; + pl022->cur_msg = NULL; + pl022->cur_transfer = NULL; + pl022->cur_chip = NULL; + queue_work(pl022->workqueue, &pl022->pump_messages); + spin_unlock_irqrestore(&pl022->queue_lock, flags); + + last_transfer = list_entry(msg->transfers.prev, + struct spi_transfer, + transfer_list); + + /* Delay if requested before any change in chip select */ + if (last_transfer->delay_usecs) + /* + * FIXME: This runs in interrupt context. + * Is this really smart? + */ + udelay(last_transfer->delay_usecs); + + /* + * Drop chip select UNLESS cs_change is true or we are returning + * a message with an error, or next message is for another chip + */ + if (!last_transfer->cs_change) + curr_cs_control(SSP_CHIP_DESELECT); + else { + struct spi_message *next_msg; + + /* Holding of cs was hinted, but we need to make sure + * the next message is for the same chip. Don't waste + * time with the following tests unless this was hinted. + * + * We cannot postpone this until pump_messages, because + * after calling msg->complete (below) the driver that + * sent the current message could be unloaded, which + * could invalidate the cs_control() callback... + */ + + /* get a pointer to the next message, if any */ + spin_lock_irqsave(&pl022->queue_lock, flags); + if (list_empty(&pl022->queue)) + next_msg = NULL; + else + next_msg = list_entry(pl022->queue.next, + struct spi_message, queue); + spin_unlock_irqrestore(&pl022->queue_lock, flags); + + /* see if the next and current messages point + * to the same chip + */ + if (next_msg && next_msg->spi != msg->spi) + next_msg = NULL; + if (!next_msg || msg->state == STATE_ERROR) + curr_cs_control(SSP_CHIP_DESELECT); + } + msg->state = NULL; + if (msg->complete) + msg->complete(msg->context); + /* This message is completed, so let's turn off the clock! */ + clk_disable(pl022->clk); +} + +/** + * flush - flush the FIFO to reach a clean state + * @pl022: SSP driver private data structure + */ +static int flush(struct pl022 *pl022) +{ + unsigned long limit = loops_per_jiffy << 1; + + dev_dbg(&pl022->adev->dev, "flush\n"); + do { + while (readw(SSP_SR(pl022->virtbase)) & SSP_SR_MASK_RNE) + readw(SSP_DR(pl022->virtbase)); + } while ((readw(SSP_SR(pl022->virtbase)) & SSP_SR_MASK_BSY) && limit--); + return limit; +} + +/** + * restore_state - Load configuration of current chip + * @pl022: SSP driver private data structure + */ +static void restore_state(struct pl022 *pl022) +{ + struct chip_data *chip = pl022->cur_chip; + + writew(chip->cr0, SSP_CR0(pl022->virtbase)); + writew(chip->cr1, SSP_CR1(pl022->virtbase)); + writew(chip->dmacr, SSP_DMACR(pl022->virtbase)); + writew(chip->cpsr, SSP_CPSR(pl022->virtbase)); + writew(DISABLE_ALL_INTERRUPTS, SSP_IMSC(pl022->virtbase)); + writew(CLEAR_ALL_INTERRUPTS, SSP_ICR(pl022->virtbase)); +} + +/** + * load_ssp_default_config - Load default configuration for SSP + * @pl022: SSP driver private data structure + */ + +/* + * Default SSP Register Values + */ +#define DEFAULT_SSP_REG_CR0 ( \ + GEN_MASK_BITS(SSP_DATA_BITS_12, SSP_CR0_MASK_DSS, 0) | \ + GEN_MASK_BITS(SSP_MICROWIRE_CHANNEL_FULL_DUPLEX, SSP_CR0_MASK_HALFDUP, 5) | \ + GEN_MASK_BITS(SSP_CLK_POL_IDLE_LOW, SSP_CR0_MASK_SPO, 6) | \ + GEN_MASK_BITS(SSP_CLK_FALLING_EDGE, SSP_CR0_MASK_SPH, 7) | \ + GEN_MASK_BITS(NMDK_SSP_DEFAULT_CLKRATE, SSP_CR0_MASK_SCR, 8) | \ + GEN_MASK_BITS(SSP_BITS_8, SSP_CR0_MASK_CSS, 16) | \ + GEN_MASK_BITS(SSP_INTERFACE_MOTOROLA_SPI, SSP_CR0_MASK_FRF, 21) \ +) + +#define DEFAULT_SSP_REG_CR1 ( \ + GEN_MASK_BITS(LOOPBACK_DISABLED, SSP_CR1_MASK_LBM, 0) | \ + GEN_MASK_BITS(SSP_DISABLED, SSP_CR1_MASK_SSE, 1) | \ + GEN_MASK_BITS(SSP_MASTER, SSP_CR1_MASK_MS, 2) | \ + GEN_MASK_BITS(DO_NOT_DRIVE_TX, SSP_CR1_MASK_SOD, 3) | \ + GEN_MASK_BITS(SSP_RX_MSB, SSP_CR1_MASK_RENDN, 4) | \ + GEN_MASK_BITS(SSP_TX_MSB, SSP_CR1_MASK_TENDN, 5) | \ + GEN_MASK_BITS(SSP_MWIRE_WAIT_ZERO, SSP_CR1_MASK_MWAIT, 6) |\ + GEN_MASK_BITS(SSP_RX_1_OR_MORE_ELEM, SSP_CR1_MASK_RXIFLSEL, 7) | \ + GEN_MASK_BITS(SSP_TX_1_OR_MORE_EMPTY_LOC, SSP_CR1_MASK_TXIFLSEL, 10) \ +) + +#define DEFAULT_SSP_REG_CPSR ( \ + GEN_MASK_BITS(NMDK_SSP_DEFAULT_PRESCALE, SSP_CPSR_MASK_CPSDVSR, 0) \ +) + +#define DEFAULT_SSP_REG_DMACR (\ + GEN_MASK_BITS(SSP_DMA_DISABLED, SSP_DMACR_MASK_RXDMAE, 0) | \ + GEN_MASK_BITS(SSP_DMA_DISABLED, SSP_DMACR_MASK_TXDMAE, 1) \ +) + + +static void load_ssp_default_config(struct pl022 *pl022) +{ + writew(DEFAULT_SSP_REG_CR0, SSP_CR0(pl022->virtbase)); + writew(DEFAULT_SSP_REG_CR1, SSP_CR1(pl022->virtbase)); + writew(DEFAULT_SSP_REG_DMACR, SSP_DMACR(pl022->virtbase)); + writew(DEFAULT_SSP_REG_CPSR, SSP_CPSR(pl022->virtbase)); + writew(DISABLE_ALL_INTERRUPTS, SSP_IMSC(pl022->virtbase)); + writew(CLEAR_ALL_INTERRUPTS, SSP_ICR(pl022->virtbase)); +} + +/** + * This will write to TX and read from RX according to the parameters + * set in pl022. + */ +static void readwriter(struct pl022 *pl022) +{ + + /* + * The FIFO depth is different inbetween primecell variants. + * I believe filling in too much in the FIFO might cause + * errons in 8bit wide transfers on ARM variants (just 8 words + * FIFO, means only 8x8 = 64 bits in FIFO) at least. + * + * FIXME: currently we have no logic to account for this. + * perhaps there is even something broken in HW regarding + * 8bit transfers (it doesn't fail on 16bit) so this needs + * more investigation... + */ + dev_dbg(&pl022->adev->dev, + "%s, rx: %p, rxend: %p, tx: %p, txend: %p\n", + __func__, pl022->rx, pl022->rx_end, pl022->tx, pl022->tx_end); + + /* Read as much as you can */ + while ((readw(SSP_SR(pl022->virtbase)) & SSP_SR_MASK_RNE) + && (pl022->rx < pl022->rx_end)) { + switch (pl022->read) { + case READING_NULL: + readw(SSP_DR(pl022->virtbase)); + break; + case READING_U8: + *(u8 *) (pl022->rx) = + readw(SSP_DR(pl022->virtbase)) & 0xFFU; + break; + case READING_U16: + *(u16 *) (pl022->rx) = + (u16) readw(SSP_DR(pl022->virtbase)); + break; + case READING_U32: + *(u32 *) (pl022->rx) = + readl(SSP_DR(pl022->virtbase)); + break; + } + pl022->rx += (pl022->cur_chip->n_bytes); + } + /* + * Write as much as you can, while keeping an eye on the RX FIFO! + */ + while ((readw(SSP_SR(pl022->virtbase)) & SSP_SR_MASK_TNF) + && (pl022->tx < pl022->tx_end)) { + switch (pl022->write) { + case WRITING_NULL: + writew(0x0, SSP_DR(pl022->virtbase)); + break; + case WRITING_U8: + writew(*(u8 *) (pl022->tx), SSP_DR(pl022->virtbase)); + break; + case WRITING_U16: + writew((*(u16 *) (pl022->tx)), SSP_DR(pl022->virtbase)); + break; + case WRITING_U32: + writel(*(u32 *) (pl022->tx), SSP_DR(pl022->virtbase)); + break; + } + pl022->tx += (pl022->cur_chip->n_bytes); + /* + * This inner reader takes care of things appearing in the RX + * FIFO as we're transmitting. This will happen a lot since the + * clock starts running when you put things into the TX FIFO, + * and then things are continously clocked into the RX FIFO. + */ + while ((readw(SSP_SR(pl022->virtbase)) & SSP_SR_MASK_RNE) + && (pl022->rx < pl022->rx_end)) { + switch (pl022->read) { + case READING_NULL: + readw(SSP_DR(pl022->virtbase)); + break; + case READING_U8: + *(u8 *) (pl022->rx) = + readw(SSP_DR(pl022->virtbase)) & 0xFFU; + break; + case READING_U16: + *(u16 *) (pl022->rx) = + (u16) readw(SSP_DR(pl022->virtbase)); + break; + case READING_U32: + *(u32 *) (pl022->rx) = + readl(SSP_DR(pl022->virtbase)); + break; + } + pl022->rx += (pl022->cur_chip->n_bytes); + } + } + /* + * When we exit here the TX FIFO should be full and the RX FIFO + * should be empty + */ +} + + +/** + * next_transfer - Move to the Next transfer in the current spi message + * @pl022: SSP driver private data structure + * + * This function moves though the linked list of spi transfers in the + * current spi message and returns with the state of current spi + * message i.e whether its last transfer is done(STATE_DONE) or + * Next transfer is ready(STATE_RUNNING) + */ +static void *next_transfer(struct pl022 *pl022) +{ + struct spi_message *msg = pl022->cur_msg; + struct spi_transfer *trans = pl022->cur_transfer; + + /* Move to next transfer */ + if (trans->transfer_list.next != &msg->transfers) { + pl022->cur_transfer = + list_entry(trans->transfer_list.next, + struct spi_transfer, transfer_list); + return STATE_RUNNING; + } + return STATE_DONE; +} +/** + * pl022_interrupt_handler - Interrupt handler for SSP controller + * + * This function handles interrupts generated for an interrupt based transfer. + * If a receive overrun (ROR) interrupt is there then we disable SSP, flag the + * current message's state as STATE_ERROR and schedule the tasklet + * pump_transfers which will do the postprocessing of the current message by + * calling giveback(). Otherwise it reads data from RX FIFO till there is no + * more data, and writes data in TX FIFO till it is not full. If we complete + * the transfer we move to the next transfer and schedule the tasklet. + */ +static irqreturn_t pl022_interrupt_handler(int irq, void *dev_id) +{ + struct pl022 *pl022 = dev_id; + struct spi_message *msg = pl022->cur_msg; + u16 irq_status = 0; + u16 flag = 0; + + if (unlikely(!msg)) { + dev_err(&pl022->adev->dev, + "bad message state in interrupt handler"); + /* Never fail */ + return IRQ_HANDLED; + } + + /* Read the Interrupt Status Register */ + irq_status = readw(SSP_MIS(pl022->virtbase)); + + if (unlikely(!irq_status)) + return IRQ_NONE; + + /* This handles the error code interrupts */ + if (unlikely(irq_status & SSP_MIS_MASK_RORMIS)) { + /* + * Overrun interrupt - bail out since our Data has been + * corrupted + */ + dev_err(&pl022->adev->dev, + "FIFO overrun\n"); + if (readw(SSP_SR(pl022->virtbase)) & SSP_SR_MASK_RFF) + dev_err(&pl022->adev->dev, + "RXFIFO is full\n"); + if (readw(SSP_SR(pl022->virtbase)) & SSP_SR_MASK_TNF) + dev_err(&pl022->adev->dev, + "TXFIFO is full\n"); + + /* + * Disable and clear interrupts, disable SSP, + * mark message with bad status so it can be + * retried. + */ + writew(DISABLE_ALL_INTERRUPTS, + SSP_IMSC(pl022->virtbase)); + writew(CLEAR_ALL_INTERRUPTS, SSP_ICR(pl022->virtbase)); + writew((readw(SSP_CR1(pl022->virtbase)) & + (~SSP_CR1_MASK_SSE)), SSP_CR1(pl022->virtbase)); + msg->state = STATE_ERROR; + + /* Schedule message queue handler */ + tasklet_schedule(&pl022->pump_transfers); + return IRQ_HANDLED; + } + + readwriter(pl022); + + if ((pl022->tx == pl022->tx_end) && (flag == 0)) { + flag = 1; + /* Disable Transmit interrupt */ + writew(readw(SSP_IMSC(pl022->virtbase)) & + (~SSP_IMSC_MASK_TXIM), + SSP_IMSC(pl022->virtbase)); + } + + /* + * Since all transactions must write as much as shall be read, + * we can conclude the entire transaction once RX is complete. + * At this point, all TX will always be finished. + */ + if (pl022->rx >= pl022->rx_end) { + writew(DISABLE_ALL_INTERRUPTS, + SSP_IMSC(pl022->virtbase)); + writew(CLEAR_ALL_INTERRUPTS, SSP_ICR(pl022->virtbase)); + if (unlikely(pl022->rx > pl022->rx_end)) { + dev_warn(&pl022->adev->dev, "read %u surplus " + "bytes (did you request an odd " + "number of bytes on a 16bit bus?)\n", + (u32) (pl022->rx - pl022->rx_end)); + } + /* Update total bytes transfered */ + msg->actual_length += pl022->cur_transfer->len; + if (pl022->cur_transfer->cs_change) + pl022->cur_chip-> + cs_control(SSP_CHIP_DESELECT); + /* Move to next transfer */ + msg->state = next_transfer(pl022); + tasklet_schedule(&pl022->pump_transfers); + return IRQ_HANDLED; + } + + return IRQ_HANDLED; +} + +/** + * This sets up the pointers to memory for the next message to + * send out on the SPI bus. + */ +static int set_up_next_transfer(struct pl022 *pl022, + struct spi_transfer *transfer) +{ + int residue; + + /* Sanity check the message for this bus width */ + residue = pl022->cur_transfer->len % pl022->cur_chip->n_bytes; + if (unlikely(residue != 0)) { + dev_err(&pl022->adev->dev, + "message of %u bytes to transmit but the current " + "chip bus has a data width of %u bytes!\n", + pl022->cur_transfer->len, + pl022->cur_chip->n_bytes); + dev_err(&pl022->adev->dev, "skipping this message\n"); + return -EIO; + } + pl022->tx = (void *)transfer->tx_buf; + pl022->tx_end = pl022->tx + pl022->cur_transfer->len; + pl022->rx = (void *)transfer->rx_buf; + pl022->rx_end = pl022->rx + pl022->cur_transfer->len; + pl022->write = + pl022->tx ? pl022->cur_chip->write : WRITING_NULL; + pl022->read = pl022->rx ? pl022->cur_chip->read : READING_NULL; + return 0; +} + +/** + * pump_transfers - Tasklet function which schedules next interrupt transfer + * when running in interrupt transfer mode. + * @data: SSP driver private data structure + * + */ +static void pump_transfers(unsigned long data) +{ + struct pl022 *pl022 = (struct pl022 *) data; + struct spi_message *message = NULL; + struct spi_transfer *transfer = NULL; + struct spi_transfer *previous = NULL; + + /* Get current state information */ + message = pl022->cur_msg; + transfer = pl022->cur_transfer; + + /* Handle for abort */ + if (message->state == STATE_ERROR) { + message->status = -EIO; + giveback(pl022); + return; + } + + /* Handle end of message */ + if (message->state == STATE_DONE) { + message->status = 0; + giveback(pl022); + return; + } + + /* Delay if requested at end of transfer before CS change */ + if (message->state == STATE_RUNNING) { + previous = list_entry(transfer->transfer_list.prev, + struct spi_transfer, + transfer_list); + if (previous->delay_usecs) + /* + * FIXME: This runs in interrupt context. + * Is this really smart? + */ + udelay(previous->delay_usecs); + + /* Drop chip select only if cs_change is requested */ + if (previous->cs_change) + pl022->cur_chip->cs_control(SSP_CHIP_SELECT); + } else { + /* STATE_START */ + message->state = STATE_RUNNING; + } + + if (set_up_next_transfer(pl022, transfer)) { + message->state = STATE_ERROR; + message->status = -EIO; + giveback(pl022); + return; + } + /* Flush the FIFOs and let's go! */ + flush(pl022); + writew(ENABLE_ALL_INTERRUPTS, SSP_IMSC(pl022->virtbase)); +} + +/** + * NOT IMPLEMENTED + * configure_dma - It configures the DMA pipes for DMA transfers + * @data: SSP driver's private data structure + * + */ +static int configure_dma(void *data) +{ + struct pl022 *pl022 = data; + dev_dbg(&pl022->adev->dev, "configure DMA\n"); + return -ENOTSUPP; +} + +/** + * do_dma_transfer - It handles transfers of the current message + * if it is DMA xfer. + * NOT FULLY IMPLEMENTED + * @data: SSP driver's private data structure + */ +static void do_dma_transfer(void *data) +{ + struct pl022 *pl022 = data; + + if (configure_dma(data)) { + dev_dbg(&pl022->adev->dev, "configuration of DMA Failed!\n"); + goto err_config_dma; + } + + /* TODO: Implememt DMA setup of pipes here */ + + /* Enable target chip, set up transfer */ + pl022->cur_chip->cs_control(SSP_CHIP_SELECT); + if (set_up_next_transfer(pl022, pl022->cur_transfer)) { + /* Error path */ + pl022->cur_msg->state = STATE_ERROR; + pl022->cur_msg->status = -EIO; + giveback(pl022); + return; + } + /* Enable SSP */ + writew((readw(SSP_CR1(pl022->virtbase)) | SSP_CR1_MASK_SSE), + SSP_CR1(pl022->virtbase)); + + /* TODO: Enable the DMA transfer here */ + return; + + err_config_dma: + pl022->cur_msg->state = STATE_ERROR; + pl022->cur_msg->status = -EIO; + giveback(pl022); + return; +} + +static void do_interrupt_transfer(void *data) +{ + struct pl022 *pl022 = data; + + /* Enable target chip */ + pl022->cur_chip->cs_control(SSP_CHIP_SELECT); + if (set_up_next_transfer(pl022, pl022->cur_transfer)) { + /* Error path */ + pl022->cur_msg->state = STATE_ERROR; + pl022->cur_msg->status = -EIO; + giveback(pl022); + return; + } + /* Enable SSP, turn on interrupts */ + writew((readw(SSP_CR1(pl022->virtbase)) | SSP_CR1_MASK_SSE), + SSP_CR1(pl022->virtbase)); + writew(ENABLE_ALL_INTERRUPTS, SSP_IMSC(pl022->virtbase)); +} + +static void do_polling_transfer(void *data) +{ + struct pl022 *pl022 = data; + struct spi_message *message = NULL; + struct spi_transfer *transfer = NULL; + struct spi_transfer *previous = NULL; + struct chip_data *chip; + + chip = pl022->cur_chip; + message = pl022->cur_msg; + + while (message->state != STATE_DONE) { + /* Handle for abort */ + if (message->state == STATE_ERROR) + break; + transfer = pl022->cur_transfer; + + /* Delay if requested at end of transfer */ + if (message->state == STATE_RUNNING) { + previous = + list_entry(transfer->transfer_list.prev, + struct spi_transfer, transfer_list); + if (previous->delay_usecs) + udelay(previous->delay_usecs); + if (previous->cs_change) + pl022->cur_chip->cs_control(SSP_CHIP_SELECT); + } else { + /* STATE_START */ + message->state = STATE_RUNNING; + pl022->cur_chip->cs_control(SSP_CHIP_SELECT); + } + + /* Configuration Changing Per Transfer */ + if (set_up_next_transfer(pl022, transfer)) { + /* Error path */ + message->state = STATE_ERROR; + break; + } + /* Flush FIFOs and enable SSP */ + flush(pl022); + writew((readw(SSP_CR1(pl022->virtbase)) | SSP_CR1_MASK_SSE), + SSP_CR1(pl022->virtbase)); + + dev_dbg(&pl022->adev->dev, "POLLING TRANSFER ONGOING ... \n"); + /* FIXME: insert a timeout so we don't hang here indefinately */ + while (pl022->tx < pl022->tx_end || pl022->rx < pl022->rx_end) + readwriter(pl022); + + /* Update total byte transfered */ + message->actual_length += pl022->cur_transfer->len; + if (pl022->cur_transfer->cs_change) + pl022->cur_chip->cs_control(SSP_CHIP_DESELECT); + /* Move to next transfer */ + message->state = next_transfer(pl022); + } + + /* Handle end of message */ + if (message->state == STATE_DONE) + message->status = 0; + else + message->status = -EIO; + + giveback(pl022); + return; +} + +/** + * pump_messages - Workqueue function which processes spi message queue + * @data: pointer to private data of SSP driver + * + * This function checks if there is any spi message in the queue that + * needs processing and delegate control to appropriate function + * do_polling_transfer()/do_interrupt_transfer()/do_dma_transfer() + * based on the kind of the transfer + * + */ +static void pump_messages(struct work_struct *work) +{ + struct pl022 *pl022 = + container_of(work, struct pl022, pump_messages); + unsigned long flags; + + /* Lock queue and check for queue work */ + spin_lock_irqsave(&pl022->queue_lock, flags); + if (list_empty(&pl022->queue) || pl022->run == QUEUE_STOPPED) { + pl022->busy = 0; + spin_unlock_irqrestore(&pl022->queue_lock, flags); + return; + } + /* Make sure we are not already running a message */ + if (pl022->cur_msg) { + spin_unlock_irqrestore(&pl022->queue_lock, flags); + return; + } + /* Extract head of queue */ + pl022->cur_msg = + list_entry(pl022->queue.next, struct spi_message, queue); + + list_del_init(&pl022->cur_msg->queue); + pl022->busy = 1; + spin_unlock_irqrestore(&pl022->queue_lock, flags); + + /* Initial message state */ + pl022->cur_msg->state = STATE_START; + pl022->cur_transfer = list_entry(pl022->cur_msg->transfers.next, + struct spi_transfer, + transfer_list); + + /* Setup the SPI using the per chip configuration */ + pl022->cur_chip = spi_get_ctldata(pl022->cur_msg->spi); + /* + * We enable the clock here, then the clock will be disabled when + * giveback() is called in each method (poll/interrupt/DMA) + */ + clk_enable(pl022->clk); + restore_state(pl022); + flush(pl022); + + if (pl022->cur_chip->xfer_type == POLLING_TRANSFER) + do_polling_transfer(pl022); + else if (pl022->cur_chip->xfer_type == INTERRUPT_TRANSFER) + do_interrupt_transfer(pl022); + else + do_dma_transfer(pl022); +} + + +static int __init init_queue(struct pl022 *pl022) +{ + INIT_LIST_HEAD(&pl022->queue); + spin_lock_init(&pl022->queue_lock); + + pl022->run = QUEUE_STOPPED; + pl022->busy = 0; + + tasklet_init(&pl022->pump_transfers, + pump_transfers, (unsigned long)pl022); + + INIT_WORK(&pl022->pump_messages, pump_messages); + pl022->workqueue = create_singlethread_workqueue( + dev_name(pl022->master->dev.parent)); + if (pl022->workqueue == NULL) + return -EBUSY; + + return 0; +} + + +static int start_queue(struct pl022 *pl022) +{ + unsigned long flags; + + spin_lock_irqsave(&pl022->queue_lock, flags); + + if (pl022->run == QUEUE_RUNNING || pl022->busy) { + spin_unlock_irqrestore(&pl022->queue_lock, flags); + return -EBUSY; + } + + pl022->run = QUEUE_RUNNING; + pl022->cur_msg = NULL; + pl022->cur_transfer = NULL; + pl022->cur_chip = NULL; + spin_unlock_irqrestore(&pl022->queue_lock, flags); + + queue_work(pl022->workqueue, &pl022->pump_messages); + + return 0; +} + + +static int stop_queue(struct pl022 *pl022) +{ + unsigned long flags; + unsigned limit = 500; + int status = 0; + + spin_lock_irqsave(&pl022->queue_lock, flags); + + /* This is a bit lame, but is optimized for the common execution path. + * A wait_queue on the pl022->busy could be used, but then the common + * execution path (pump_messages) would be required to call wake_up or + * friends on every SPI message. Do this instead */ + pl022->run = QUEUE_STOPPED; + while (!list_empty(&pl022->queue) && pl022->busy && limit--) { + spin_unlock_irqrestore(&pl022->queue_lock, flags); + msleep(10); + spin_lock_irqsave(&pl022->queue_lock, flags); + } + + if (!list_empty(&pl022->queue) || pl022->busy) + status = -EBUSY; + + spin_unlock_irqrestore(&pl022->queue_lock, flags); + + return status; +} + +static int destroy_queue(struct pl022 *pl022) +{ + int status; + + status = stop_queue(pl022); + /* we are unloading the module or failing to load (only two calls + * to this routine), and neither call can handle a return value. + * However, destroy_workqueue calls flush_workqueue, and that will + * block until all work is done. If the reason that stop_queue + * timed out is that the work will never finish, then it does no + * good to call destroy_workqueue, so return anyway. */ + if (status != 0) + return status; + + destroy_workqueue(pl022->workqueue); + + return 0; +} + +static int verify_controller_parameters(struct pl022 *pl022, + struct pl022_config_chip *chip_info) +{ + if ((chip_info->lbm != LOOPBACK_ENABLED) + && (chip_info->lbm != LOOPBACK_DISABLED)) { + dev_err(chip_info->dev, + "loopback Mode is configured incorrectly\n"); + return -EINVAL; + } + if ((chip_info->iface < SSP_INTERFACE_MOTOROLA_SPI) + || (chip_info->iface > SSP_INTERFACE_UNIDIRECTIONAL)) { + dev_err(chip_info->dev, + "interface is configured incorrectly\n"); + return -EINVAL; + } + if ((chip_info->iface == SSP_INTERFACE_UNIDIRECTIONAL) && + (!pl022->vendor->unidir)) { + dev_err(chip_info->dev, + "unidirectional mode not supported in this " + "hardware version\n"); + return -EINVAL; + } + if ((chip_info->hierarchy != SSP_MASTER) + && (chip_info->hierarchy != SSP_SLAVE)) { + dev_err(chip_info->dev, + "hierarchy is configured incorrectly\n"); + return -EINVAL; + } + if (((chip_info->clk_freq).cpsdvsr < CPSDVR_MIN) + || ((chip_info->clk_freq).cpsdvsr > CPSDVR_MAX)) { + dev_err(chip_info->dev, + "cpsdvsr is configured incorrectly\n"); + return -EINVAL; + } + if ((chip_info->endian_rx != SSP_RX_MSB) + && (chip_info->endian_rx != SSP_RX_LSB)) { + dev_err(chip_info->dev, + "RX FIFO endianess is configured incorrectly\n"); + return -EINVAL; + } + if ((chip_info->endian_tx != SSP_TX_MSB) + && (chip_info->endian_tx != SSP_TX_LSB)) { + dev_err(chip_info->dev, + "TX FIFO endianess is configured incorrectly\n"); + return -EINVAL; + } + if ((chip_info->data_size < SSP_DATA_BITS_4) + || (chip_info->data_size > SSP_DATA_BITS_32)) { + dev_err(chip_info->dev, + "DATA Size is configured incorrectly\n"); + return -EINVAL; + } + if ((chip_info->com_mode != INTERRUPT_TRANSFER) + && (chip_info->com_mode != DMA_TRANSFER) + && (chip_info->com_mode != POLLING_TRANSFER)) { + dev_err(chip_info->dev, + "Communication mode is configured incorrectly\n"); + return -EINVAL; + } + if ((chip_info->rx_lev_trig < SSP_RX_1_OR_MORE_ELEM) + || (chip_info->rx_lev_trig > SSP_RX_32_OR_MORE_ELEM)) { + dev_err(chip_info->dev, + "RX FIFO Trigger Level is configured incorrectly\n"); + return -EINVAL; + } + if ((chip_info->tx_lev_trig < SSP_TX_1_OR_MORE_EMPTY_LOC) + || (chip_info->tx_lev_trig > SSP_TX_32_OR_MORE_EMPTY_LOC)) { + dev_err(chip_info->dev, + "TX FIFO Trigger Level is configured incorrectly\n"); + return -EINVAL; + } + if (chip_info->iface == SSP_INTERFACE_MOTOROLA_SPI) { + if ((chip_info->clk_phase != SSP_CLK_RISING_EDGE) + && (chip_info->clk_phase != SSP_CLK_FALLING_EDGE)) { + dev_err(chip_info->dev, + "Clock Phase is configured incorrectly\n"); + return -EINVAL; + } + if ((chip_info->clk_pol != SSP_CLK_POL_IDLE_LOW) + && (chip_info->clk_pol != SSP_CLK_POL_IDLE_HIGH)) { + dev_err(chip_info->dev, + "Clock Polarity is configured incorrectly\n"); + return -EINVAL; + } + } + if (chip_info->iface == SSP_INTERFACE_NATIONAL_MICROWIRE) { + if ((chip_info->ctrl_len < SSP_BITS_4) + || (chip_info->ctrl_len > SSP_BITS_32)) { + dev_err(chip_info->dev, + "CTRL LEN is configured incorrectly\n"); + return -EINVAL; + } + if ((chip_info->wait_state != SSP_MWIRE_WAIT_ZERO) + && (chip_info->wait_state != SSP_MWIRE_WAIT_ONE)) { + dev_err(chip_info->dev, + "Wait State is configured incorrectly\n"); + return -EINVAL; + } + if ((chip_info->duplex != SSP_MICROWIRE_CHANNEL_FULL_DUPLEX) + && (chip_info->duplex != + SSP_MICROWIRE_CHANNEL_HALF_DUPLEX)) { + dev_err(chip_info->dev, + "DUPLEX is configured incorrectly\n"); + return -EINVAL; + } + } + if (chip_info->cs_control == NULL) { + dev_warn(chip_info->dev, + "Chip Select Function is NULL for this chip\n"); + chip_info->cs_control = null_cs_control; + } + return 0; +} + +/** + * pl022_transfer - transfer function registered to SPI master framework + * @spi: spi device which is requesting transfer + * @msg: spi message which is to handled is queued to driver queue + * + * This function is registered to the SPI framework for this SPI master + * controller. It will queue the spi_message in the queue of driver if + * the queue is not stopped and return. + */ +static int pl022_transfer(struct spi_device *spi, struct spi_message *msg) +{ + struct pl022 *pl022 = spi_master_get_devdata(spi->master); + unsigned long flags; + + spin_lock_irqsave(&pl022->queue_lock, flags); + + if (pl022->run == QUEUE_STOPPED) { + spin_unlock_irqrestore(&pl022->queue_lock, flags); + return -ESHUTDOWN; + } + msg->actual_length = 0; + msg->status = -EINPROGRESS; + msg->state = STATE_START; + + list_add_tail(&msg->queue, &pl022->queue); + if (pl022->run == QUEUE_RUNNING && !pl022->busy) + queue_work(pl022->workqueue, &pl022->pump_messages); + + spin_unlock_irqrestore(&pl022->queue_lock, flags); + return 0; +} + +static int calculate_effective_freq(struct pl022 *pl022, + int freq, + struct ssp_clock_params *clk_freq) +{ + /* Lets calculate the frequency parameters */ + u16 cpsdvsr = 2; + u16 scr = 0; + bool freq_found = false; + u32 rate; + u32 max_tclk; + u32 min_tclk; + + rate = clk_get_rate(pl022->clk); + /* cpsdvscr = 2 & scr 0 */ + max_tclk = (rate / (CPSDVR_MIN * (1 + SCR_MIN))); + /* cpsdvsr = 254 & scr = 255 */ + min_tclk = (rate / (CPSDVR_MAX * (1 + SCR_MAX))); + + if ((freq <= max_tclk) && (freq >= min_tclk)) { + while (cpsdvsr <= CPSDVR_MAX && !freq_found) { + while (scr <= SCR_MAX && !freq_found) { + if ((rate / + (cpsdvsr * (1 + scr))) > freq) + scr += 1; + else { + /* + * This bool is made true when + * effective frequency >= + * target frequency is found + */ + freq_found = true; + if ((rate / + (cpsdvsr * (1 + scr))) != freq) { + if (scr == SCR_MIN) { + cpsdvsr -= 2; + scr = SCR_MAX; + } else + scr -= 1; + } + } + } + if (!freq_found) { + cpsdvsr += 2; + scr = SCR_MIN; + } + } + if (cpsdvsr != 0) { + dev_dbg(&pl022->adev->dev, + "SSP Effective Frequency is %u\n", + (rate / (cpsdvsr * (1 + scr)))); + clk_freq->cpsdvsr = (u8) (cpsdvsr & 0xFF); + clk_freq->scr = (u8) (scr & 0xFF); + dev_dbg(&pl022->adev->dev, + "SSP cpsdvsr = %d, scr = %d\n", + clk_freq->cpsdvsr, clk_freq->scr); + } + } else { + dev_err(&pl022->adev->dev, + "controller data is incorrect: out of range frequency"); + return -EINVAL; + } + return 0; +} + +/** + * NOT IMPLEMENTED + * process_dma_info - Processes the DMA info provided by client drivers + * @chip_info: chip info provided by client device + * @chip: Runtime state maintained by the SSP controller for each spi device + * + * This function processes and stores DMA config provided by client driver + * into the runtime state maintained by the SSP controller driver + */ +static int process_dma_info(struct pl022_config_chip *chip_info, + struct chip_data *chip) +{ + dev_err(chip_info->dev, + "cannot process DMA info, DMA not implemented!\n"); + return -ENOTSUPP; +} + +/** + * pl022_setup - setup function registered to SPI master framework + * @spi: spi device which is requesting setup + * + * This function is registered to the SPI framework for this SPI master + * controller. If it is the first time when setup is called by this device, + * this function will initialize the runtime state for this chip and save + * the same in the device structure. Else it will update the runtime info + * with the updated chip info. Nothing is really being written to the + * controller hardware here, that is not done until the actual transfer + * commence. + */ + +/* FIXME: JUST GUESSING the spi->mode bits understood by this driver */ +#define MODEBITS (SPI_CPOL | SPI_CPHA | SPI_CS_HIGH \ + | SPI_LSB_FIRST | SPI_LOOP) + +static int pl022_setup(struct spi_device *spi) +{ + struct pl022_config_chip *chip_info; + struct chip_data *chip; + int status = 0; + struct pl022 *pl022 = spi_master_get_devdata(spi->master); + + if (spi->mode & ~MODEBITS) { + dev_dbg(&spi->dev, "unsupported mode bits %x\n", + spi->mode & ~MODEBITS); + return -EINVAL; + } + + if (!spi->max_speed_hz) + return -EINVAL; + + /* Get controller_state if one is supplied */ + chip = spi_get_ctldata(spi); + + if (chip == NULL) { + chip = kzalloc(sizeof(struct chip_data), GFP_KERNEL); + if (!chip) { + dev_err(&spi->dev, + "cannot allocate controller state\n"); + return -ENOMEM; + } + dev_dbg(&spi->dev, + "allocated memory for controller's runtime state\n"); + } + + /* Get controller data if one is supplied */ + chip_info = spi->controller_data; + + if (chip_info == NULL) { + /* spi_board_info.controller_data not is supplied */ + dev_dbg(&spi->dev, + "using default controller_data settings\n"); + + chip_info = + kzalloc(sizeof(struct pl022_config_chip), GFP_KERNEL); + + if (!chip_info) { + dev_err(&spi->dev, + "cannot allocate controller data\n"); + status = -ENOMEM; + goto err_first_setup; + } + + dev_dbg(&spi->dev, "allocated memory for controller data\n"); + + /* Pointer back to the SPI device */ + chip_info->dev = &spi->dev; + /* + * Set controller data default values: + * Polling is supported by default + */ + chip_info->lbm = LOOPBACK_DISABLED; + chip_info->com_mode = POLLING_TRANSFER; + chip_info->iface = SSP_INTERFACE_MOTOROLA_SPI; + chip_info->hierarchy = SSP_SLAVE; + chip_info->slave_tx_disable = DO_NOT_DRIVE_TX; + chip_info->endian_tx = SSP_TX_LSB; + chip_info->endian_rx = SSP_RX_LSB; + chip_info->data_size = SSP_DATA_BITS_12; + chip_info->rx_lev_trig = SSP_RX_1_OR_MORE_ELEM; + chip_info->tx_lev_trig = SSP_TX_1_OR_MORE_EMPTY_LOC; + chip_info->clk_phase = SSP_CLK_FALLING_EDGE; + chip_info->clk_pol = SSP_CLK_POL_IDLE_LOW; + chip_info->ctrl_len = SSP_BITS_8; + chip_info->wait_state = SSP_MWIRE_WAIT_ZERO; + chip_info->duplex = SSP_MICROWIRE_CHANNEL_FULL_DUPLEX; + chip_info->cs_control = null_cs_control; + } else { + dev_dbg(&spi->dev, + "using user supplied controller_data settings\n"); + } + + /* + * We can override with custom divisors, else we use the board + * frequency setting + */ + if ((0 == chip_info->clk_freq.cpsdvsr) + && (0 == chip_info->clk_freq.scr)) { + status = calculate_effective_freq(pl022, + spi->max_speed_hz, + &chip_info->clk_freq); + if (status < 0) + goto err_config_params; + } else { + if ((chip_info->clk_freq.cpsdvsr % 2) != 0) + chip_info->clk_freq.cpsdvsr = + chip_info->clk_freq.cpsdvsr - 1; + } + status = verify_controller_parameters(pl022, chip_info); + if (status) { + dev_err(&spi->dev, "controller data is incorrect"); + goto err_config_params; + } + /* Now set controller state based on controller data */ + chip->xfer_type = chip_info->com_mode; + chip->cs_control = chip_info->cs_control; + + if (chip_info->data_size <= 8) { + dev_dbg(&spi->dev, "1 <= n <=8 bits per word\n"); + chip->n_bytes = 1; + chip->read = READING_U8; + chip->write = WRITING_U8; + } else if (chip_info->data_size <= 16) { + dev_dbg(&spi->dev, "9 <= n <= 16 bits per word\n"); + chip->n_bytes = 2; + chip->read = READING_U16; + chip->write = WRITING_U16; + } else { + if (pl022->vendor->max_bpw >= 32) { + dev_dbg(&spi->dev, "17 <= n <= 32 bits per word\n"); + chip->n_bytes = 4; + chip->read = READING_U32; + chip->write = WRITING_U32; + } else { + dev_err(&spi->dev, + "illegal data size for this controller!\n"); + dev_err(&spi->dev, + "a standard pl022 can only handle " + "1 <= n <= 16 bit words\n"); + goto err_config_params; + } + } + + /* Now Initialize all register settings required for this chip */ + chip->cr0 = 0; + chip->cr1 = 0; + chip->dmacr = 0; + chip->cpsr = 0; + if ((chip_info->com_mode == DMA_TRANSFER) + && ((pl022->master_info)->enable_dma)) { + chip->enable_dma = 1; + dev_dbg(&spi->dev, "DMA mode set in controller state\n"); + status = process_dma_info(chip_info, chip); + if (status < 0) + goto err_config_params; + SSP_WRITE_BITS(chip->dmacr, SSP_DMA_ENABLED, + SSP_DMACR_MASK_RXDMAE, 0); + SSP_WRITE_BITS(chip->dmacr, SSP_DMA_ENABLED, + SSP_DMACR_MASK_TXDMAE, 1); + } else { + chip->enable_dma = 0; + dev_dbg(&spi->dev, "DMA mode NOT set in controller state\n"); + SSP_WRITE_BITS(chip->dmacr, SSP_DMA_DISABLED, + SSP_DMACR_MASK_RXDMAE, 0); + SSP_WRITE_BITS(chip->dmacr, SSP_DMA_DISABLED, + SSP_DMACR_MASK_TXDMAE, 1); + } + + chip->cpsr = chip_info->clk_freq.cpsdvsr; + + SSP_WRITE_BITS(chip->cr0, chip_info->data_size, SSP_CR0_MASK_DSS, 0); + SSP_WRITE_BITS(chip->cr0, chip_info->duplex, SSP_CR0_MASK_HALFDUP, 5); + SSP_WRITE_BITS(chip->cr0, chip_info->clk_pol, SSP_CR0_MASK_SPO, 6); + SSP_WRITE_BITS(chip->cr0, chip_info->clk_phase, SSP_CR0_MASK_SPH, 7); + SSP_WRITE_BITS(chip->cr0, chip_info->clk_freq.scr, SSP_CR0_MASK_SCR, 8); + SSP_WRITE_BITS(chip->cr0, chip_info->ctrl_len, SSP_CR0_MASK_CSS, 16); + SSP_WRITE_BITS(chip->cr0, chip_info->iface, SSP_CR0_MASK_FRF, 21); + SSP_WRITE_BITS(chip->cr1, chip_info->lbm, SSP_CR1_MASK_LBM, 0); + SSP_WRITE_BITS(chip->cr1, SSP_DISABLED, SSP_CR1_MASK_SSE, 1); + SSP_WRITE_BITS(chip->cr1, chip_info->hierarchy, SSP_CR1_MASK_MS, 2); + SSP_WRITE_BITS(chip->cr1, chip_info->slave_tx_disable, SSP_CR1_MASK_SOD, 3); + SSP_WRITE_BITS(chip->cr1, chip_info->endian_rx, SSP_CR1_MASK_RENDN, 4); + SSP_WRITE_BITS(chip->cr1, chip_info->endian_tx, SSP_CR1_MASK_TENDN, 5); + SSP_WRITE_BITS(chip->cr1, chip_info->wait_state, SSP_CR1_MASK_MWAIT, 6); + SSP_WRITE_BITS(chip->cr1, chip_info->rx_lev_trig, SSP_CR1_MASK_RXIFLSEL, 7); + SSP_WRITE_BITS(chip->cr1, chip_info->tx_lev_trig, SSP_CR1_MASK_TXIFLSEL, 10); + + /* Save controller_state */ + spi_set_ctldata(spi, chip); + return status; + err_config_params: + err_first_setup: + kfree(chip); + return status; +} + +/** + * pl022_cleanup - cleanup function registered to SPI master framework + * @spi: spi device which is requesting cleanup + * + * This function is registered to the SPI framework for this SPI master + * controller. It will free the runtime state of chip. + */ +static void pl022_cleanup(struct spi_device *spi) +{ + struct chip_data *chip = spi_get_ctldata(spi); + + spi_set_ctldata(spi, NULL); + kfree(chip); +} + + +static int __init +pl022_probe(struct amba_device *adev, struct amba_id *id) +{ + struct device *dev = &adev->dev; + struct pl022_ssp_controller *platform_info = adev->dev.platform_data; + struct spi_master *master; + struct pl022 *pl022 = NULL; /*Data for this driver */ + int status = 0; + + dev_info(&adev->dev, + "ARM PL022 driver, device ID: 0x%08x\n", adev->periphid); + if (platform_info == NULL) { + dev_err(&adev->dev, "probe - no platform data supplied\n"); + status = -ENODEV; + goto err_no_pdata; + } + + /* Allocate master with space for data */ + master = spi_alloc_master(dev, sizeof(struct pl022)); + if (master == NULL) { + dev_err(&adev->dev, "probe - cannot alloc SPI master\n"); + status = -ENOMEM; + goto err_no_master; + } + + pl022 = spi_master_get_devdata(master); + pl022->master = master; + pl022->master_info = platform_info; + pl022->adev = adev; + pl022->vendor = id->data; + + /* + * Bus Number Which has been Assigned to this SSP controller + * on this board + */ + master->bus_num = platform_info->bus_id; + master->num_chipselect = platform_info->num_chipselect; + master->cleanup = pl022_cleanup; + master->setup = pl022_setup; + master->transfer = pl022_transfer; + + dev_dbg(&adev->dev, "BUSNO: %d\n", master->bus_num); + + status = amba_request_regions(adev, NULL); + if (status) + goto err_no_ioregion; + + pl022->virtbase = ioremap(adev->res.start, resource_size(&adev->res)); + if (pl022->virtbase == NULL) { + status = -ENOMEM; + goto err_no_ioremap; + } + printk(KERN_INFO "pl022: mapped registers from 0x%08x to %p\n", + adev->res.start, pl022->virtbase); + + pl022->clk = clk_get(&adev->dev, NULL); + if (IS_ERR(pl022->clk)) { + status = PTR_ERR(pl022->clk); + dev_err(&adev->dev, "could not retrieve SSP/SPI bus clock\n"); + goto err_no_clk; + } + + /* Disable SSP */ + clk_enable(pl022->clk); + writew((readw(SSP_CR1(pl022->virtbase)) & (~SSP_CR1_MASK_SSE)), + SSP_CR1(pl022->virtbase)); + load_ssp_default_config(pl022); + clk_disable(pl022->clk); + + status = request_irq(adev->irq[0], pl022_interrupt_handler, 0, "pl022", + pl022); + if (status < 0) { + dev_err(&adev->dev, "probe - cannot get IRQ (%d)\n", status); + goto err_no_irq; + } + /* Initialize and start queue */ + status = init_queue(pl022); + if (status != 0) { + dev_err(&adev->dev, "probe - problem initializing queue\n"); + goto err_init_queue; + } + status = start_queue(pl022); + if (status != 0) { + dev_err(&adev->dev, "probe - problem starting queue\n"); + goto err_start_queue; + } + /* Register with the SPI framework */ + amba_set_drvdata(adev, pl022); + status = spi_register_master(master); + if (status != 0) { + dev_err(&adev->dev, + "probe - problem registering spi master\n"); + goto err_spi_register; + } + dev_dbg(dev, "probe succeded\n"); + return 0; + + err_spi_register: + err_start_queue: + err_init_queue: + destroy_queue(pl022); + free_irq(adev->irq[0], pl022); + err_no_irq: + clk_put(pl022->clk); + err_no_clk: + iounmap(pl022->virtbase); + err_no_ioremap: + amba_release_regions(adev); + err_no_ioregion: + spi_master_put(master); + err_no_master: + err_no_pdata: + return status; +} + +static int __exit +pl022_remove(struct amba_device *adev) +{ + struct pl022 *pl022 = amba_get_drvdata(adev); + int status = 0; + if (!pl022) + return 0; + + /* Remove the queue */ + status = destroy_queue(pl022); + if (status != 0) { + dev_err(&adev->dev, + "queue remove failed (%d)\n", status); + return status; + } + load_ssp_default_config(pl022); + free_irq(adev->irq[0], pl022); + clk_disable(pl022->clk); + clk_put(pl022->clk); + iounmap(pl022->virtbase); + amba_release_regions(adev); + tasklet_disable(&pl022->pump_transfers); + spi_unregister_master(pl022->master); + spi_master_put(pl022->master); + amba_set_drvdata(adev, NULL); + dev_dbg(&adev->dev, "remove succeded\n"); + return 0; +} + +#ifdef CONFIG_PM +static int pl022_suspend(struct amba_device *adev, pm_message_t state) +{ + struct pl022 *pl022 = amba_get_drvdata(adev); + int status = 0; + + status = stop_queue(pl022); + if (status) { + dev_warn(&adev->dev, "suspend cannot stop queue\n"); + return status; + } + + clk_enable(pl022->clk); + load_ssp_default_config(pl022); + clk_disable(pl022->clk); + dev_dbg(&adev->dev, "suspended\n"); + return 0; +} + +static int pl022_resume(struct amba_device *adev) +{ + struct pl022 *pl022 = amba_get_drvdata(adev); + int status = 0; + + /* Start the queue running */ + status = start_queue(pl022); + if (status) + dev_err(&adev->dev, "problem starting queue (%d)\n", status); + else + dev_dbg(&adev->dev, "resumed\n"); + + return status; +} +#else +#define pl022_suspend NULL +#define pl022_resume NULL +#endif /* CONFIG_PM */ + +static struct vendor_data vendor_arm = { + .fifodepth = 8, + .max_bpw = 16, + .unidir = false, +}; + + +static struct vendor_data vendor_st = { + .fifodepth = 32, + .max_bpw = 32, + .unidir = false, +}; + +static struct amba_id pl022_ids[] = { + { + /* + * ARM PL022 variant, this has a 16bit wide + * and 8 locations deep TX/RX FIFO + */ + .id = 0x00041022, + .mask = 0x000fffff, + .data = &vendor_arm, + }, + { + /* + * ST Micro derivative, this has 32bit wide + * and 32 locations deep TX/RX FIFO + */ + .id = 0x00108022, + .mask = 0xffffffff, + .data = &vendor_st, + }, + { 0, 0 }, +}; + +static struct amba_driver pl022_driver = { + .drv = { + .name = "ssp-pl022", + }, + .id_table = pl022_ids, + .probe = pl022_probe, + .remove = __exit_p(pl022_remove), + .suspend = pl022_suspend, + .resume = pl022_resume, +}; + + +static int __init pl022_init(void) +{ + return amba_driver_register(&pl022_driver); +} + +module_init(pl022_init); + +static void __exit pl022_exit(void) +{ + amba_driver_unregister(&pl022_driver); +} + +module_exit(pl022_exit); + +MODULE_AUTHOR("Linus Walleij "); +MODULE_DESCRIPTION("PL022 SSP Controller Driver"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/amba/pl022.h b/include/linux/amba/pl022.h new file mode 100644 index 00000000000..dcad0ffd175 --- /dev/null +++ b/include/linux/amba/pl022.h @@ -0,0 +1,264 @@ +/* + * include/linux/amba/pl022.h + * + * Copyright (C) 2008-2009 ST-Ericsson AB + * Copyright (C) 2006 STMicroelectronics Pvt. Ltd. + * + * Author: Linus Walleij + * + * Initial version inspired by: + * linux-2.6.17-rc3-mm1/drivers/spi/pxa2xx_spi.c + * Initial adoption to PL022 by: + * Sachin Verma + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _SSP_PL022_H +#define _SSP_PL022_H + +#include + +/** + * whether SSP is in loopback mode or not + */ +enum ssp_loopback { + LOOPBACK_DISABLED, + LOOPBACK_ENABLED +}; + +/** + * enum ssp_interface - interfaces allowed for this SSP Controller + * @SSP_INTERFACE_MOTOROLA_SPI: Motorola Interface + * @SSP_INTERFACE_TI_SYNC_SERIAL: Texas Instrument Synchronous Serial + * interface + * @SSP_INTERFACE_NATIONAL_MICROWIRE: National Semiconductor Microwire + * interface + * @SSP_INTERFACE_UNIDIRECTIONAL: Unidirectional interface (STn8810 + * &STn8815 only) + */ +enum ssp_interface { + SSP_INTERFACE_MOTOROLA_SPI, + SSP_INTERFACE_TI_SYNC_SERIAL, + SSP_INTERFACE_NATIONAL_MICROWIRE, + SSP_INTERFACE_UNIDIRECTIONAL +}; + +/** + * enum ssp_hierarchy - whether SSP is configured as Master or Slave + */ +enum ssp_hierarchy { + SSP_MASTER, + SSP_SLAVE +}; + +/** + * enum ssp_clock_params - clock parameters, to set SSP clock at a + * desired freq + */ +struct ssp_clock_params { + u8 cpsdvsr; /* value from 2 to 254 (even only!) */ + u8 scr; /* value from 0 to 255 */ +}; + +/** + * enum ssp_rx_endian - endianess of Rx FIFO Data + */ +enum ssp_rx_endian { + SSP_RX_MSB, + SSP_RX_LSB +}; + +/** + * enum ssp_tx_endian - endianess of Tx FIFO Data + */ +enum ssp_tx_endian { + SSP_TX_MSB, + SSP_TX_LSB +}; + +/** + * enum ssp_data_size - number of bits in one data element + */ +enum ssp_data_size { + SSP_DATA_BITS_4 = 0x03, SSP_DATA_BITS_5, SSP_DATA_BITS_6, + SSP_DATA_BITS_7, SSP_DATA_BITS_8, SSP_DATA_BITS_9, + SSP_DATA_BITS_10, SSP_DATA_BITS_11, SSP_DATA_BITS_12, + SSP_DATA_BITS_13, SSP_DATA_BITS_14, SSP_DATA_BITS_15, + SSP_DATA_BITS_16, SSP_DATA_BITS_17, SSP_DATA_BITS_18, + SSP_DATA_BITS_19, SSP_DATA_BITS_20, SSP_DATA_BITS_21, + SSP_DATA_BITS_22, SSP_DATA_BITS_23, SSP_DATA_BITS_24, + SSP_DATA_BITS_25, SSP_DATA_BITS_26, SSP_DATA_BITS_27, + SSP_DATA_BITS_28, SSP_DATA_BITS_29, SSP_DATA_BITS_30, + SSP_DATA_BITS_31, SSP_DATA_BITS_32 +}; + +/** + * enum ssp_mode - SSP mode of operation (Communication modes) + */ +enum ssp_mode { + INTERRUPT_TRANSFER, + POLLING_TRANSFER, + DMA_TRANSFER +}; + +/** + * enum ssp_rx_level_trig - receive FIFO watermark level which triggers + * IT: Interrupt fires when _N_ or more elements in RX FIFO. + */ +enum ssp_rx_level_trig { + SSP_RX_1_OR_MORE_ELEM, + SSP_RX_4_OR_MORE_ELEM, + SSP_RX_8_OR_MORE_ELEM, + SSP_RX_16_OR_MORE_ELEM, + SSP_RX_32_OR_MORE_ELEM +}; + +/** + * Transmit FIFO watermark level which triggers (IT Interrupt fires + * when _N_ or more empty locations in TX FIFO) + */ +enum ssp_tx_level_trig { + SSP_TX_1_OR_MORE_EMPTY_LOC, + SSP_TX_4_OR_MORE_EMPTY_LOC, + SSP_TX_8_OR_MORE_EMPTY_LOC, + SSP_TX_16_OR_MORE_EMPTY_LOC, + SSP_TX_32_OR_MORE_EMPTY_LOC +}; + +/** + * enum SPI Clock Phase - clock phase (Motorola SPI interface only) + * @SSP_CLK_RISING_EDGE: Receive data on rising edge + * @SSP_CLK_FALLING_EDGE: Receive data on falling edge + */ +enum ssp_spi_clk_phase { + SSP_CLK_RISING_EDGE, + SSP_CLK_FALLING_EDGE +}; + +/** + * enum SPI Clock Polarity - clock polarity (Motorola SPI interface only) + * @SSP_CLK_POL_IDLE_LOW: Low inactive level + * @SSP_CLK_POL_IDLE_HIGH: High inactive level + */ +enum ssp_spi_clk_pol { + SSP_CLK_POL_IDLE_LOW, + SSP_CLK_POL_IDLE_HIGH +}; + +/** + * Microwire Conrol Lengths Command size in microwire format + */ +enum ssp_microwire_ctrl_len { + SSP_BITS_4 = 0x03, SSP_BITS_5, SSP_BITS_6, + SSP_BITS_7, SSP_BITS_8, SSP_BITS_9, + SSP_BITS_10, SSP_BITS_11, SSP_BITS_12, + SSP_BITS_13, SSP_BITS_14, SSP_BITS_15, + SSP_BITS_16, SSP_BITS_17, SSP_BITS_18, + SSP_BITS_19, SSP_BITS_20, SSP_BITS_21, + SSP_BITS_22, SSP_BITS_23, SSP_BITS_24, + SSP_BITS_25, SSP_BITS_26, SSP_BITS_27, + SSP_BITS_28, SSP_BITS_29, SSP_BITS_30, + SSP_BITS_31, SSP_BITS_32 +}; + +/** + * enum Microwire Wait State + * @SSP_MWIRE_WAIT_ZERO: No wait state inserted after last command bit + * @SSP_MWIRE_WAIT_ONE: One wait state inserted after last command bit + */ +enum ssp_microwire_wait_state { + SSP_MWIRE_WAIT_ZERO, + SSP_MWIRE_WAIT_ONE +}; + +/** + * enum Microwire - whether Full/Half Duplex + * @SSP_MICROWIRE_CHANNEL_FULL_DUPLEX: SSPTXD becomes bi-directional, + * SSPRXD not used + * @SSP_MICROWIRE_CHANNEL_HALF_DUPLEX: SSPTXD is an output, SSPRXD is + * an input. + */ +enum ssp_duplex { + SSP_MICROWIRE_CHANNEL_FULL_DUPLEX, + SSP_MICROWIRE_CHANNEL_HALF_DUPLEX +}; + +/** + * CHIP select/deselect commands + */ +enum ssp_chip_select { + SSP_CHIP_SELECT, + SSP_CHIP_DESELECT +}; + + +/** + * struct pl022_ssp_master - device.platform_data for SPI controller devices. + * @num_chipselect: chipselects are used to distinguish individual + * SPI slaves, and are numbered from zero to num_chipselects - 1. + * each slave has a chipselect signal, but it's common that not + * every chipselect is connected to a slave. + * @enable_dma: if true enables DMA driven transfers. + */ +struct pl022_ssp_controller { + u16 bus_id; + u8 num_chipselect; + u8 enable_dma:1; +}; + +/** + * struct ssp_config_chip - spi_board_info.controller_data for SPI + * slave devices, copied to spi_device.controller_data. + * + * @lbm: used for test purpose to internally connect RX and TX + * @iface: Interface type(Motorola, TI, Microwire, Universal) + * @hierarchy: sets whether interface is master or slave + * @slave_tx_disable: SSPTXD is disconnected (in slave mode only) + * @clk_freq: Tune freq parameters of SSP(when in master mode) + * @endian_rx: Endianess of Data in Rx FIFO + * @endian_tx: Endianess of Data in Tx FIFO + * @data_size: Width of data element(4 to 32 bits) + * @com_mode: communication mode: polling, Interrupt or DMA + * @rx_lev_trig: Rx FIFO watermark level (for IT & DMA mode) + * @tx_lev_trig: Tx FIFO watermark level (for IT & DMA mode) + * @clk_phase: Motorola SPI interface Clock phase + * @clk_pol: Motorola SPI interface Clock polarity + * @ctrl_len: Microwire interface: Control length + * @wait_state: Microwire interface: Wait state + * @duplex: Microwire interface: Full/Half duplex + * @cs_control: function pointer to board-specific function to + * assert/deassert I/O port to control HW generation of devices chip-select. + * @dma_xfer_type: Type of DMA xfer (Mem-to-periph or Periph-to-Periph) + * @dma_config: DMA configuration for SSP controller and peripheral + */ +struct pl022_config_chip { + struct device *dev; + enum ssp_loopback lbm; + enum ssp_interface iface; + enum ssp_hierarchy hierarchy; + bool slave_tx_disable; + struct ssp_clock_params clk_freq; + enum ssp_rx_endian endian_rx; + enum ssp_tx_endian endian_tx; + enum ssp_data_size data_size; + enum ssp_mode com_mode; + enum ssp_rx_level_trig rx_lev_trig; + enum ssp_tx_level_trig tx_lev_trig; + enum ssp_spi_clk_phase clk_phase; + enum ssp_spi_clk_pol clk_pol; + enum ssp_microwire_ctrl_len ctrl_len; + enum ssp_microwire_wait_state wait_state; + enum ssp_duplex duplex; + void (*cs_control) (u32 control); +}; + +#endif /* _SSP_PL022_H */ -- cgit v1.2.3-70-g09d2 From df1a132bf3d3508f863336c80a27806a2ac947e0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Jun 2009 21:02:22 +0200 Subject: perf_counter: Introduce struct for sample data For easy extension of the sample data, put it in a structure. Signed-off-by: Peter Zijlstra Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 10 +++++++--- arch/x86/kernel/cpu/perf_counter.c | 15 +++++++++++---- include/linux/perf_counter.h | 10 ++++++++-- kernel/perf_counter.c | 38 ++++++++++++++++++++++---------------- 4 files changed, 48 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 4786ad9a288..5e0bf399c43 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -1001,7 +1001,11 @@ static void record_and_restart(struct perf_counter *counter, long val, * Finally record data if requested. */ if (record) { - addr = 0; + struct perf_sample_data data = { + .regs = regs, + .addr = 0, + }; + if (counter->attr.sample_type & PERF_SAMPLE_ADDR) { /* * The user wants a data address recorded. @@ -1016,9 +1020,9 @@ static void record_and_restart(struct perf_counter *counter, long val, sdsync = (ppmu->flags & PPMU_ALT_SIPR) ? POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC; if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync)) - addr = mfspr(SPRN_SDAR); + data.addr = mfspr(SPRN_SDAR); } - if (perf_counter_overflow(counter, nmi, regs, addr)) { + if (perf_counter_overflow(counter, nmi, &data)) { /* * Interrupts are coming too fast - throttle them * by setting the counter to 0, so it will be diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 240ca563063..82a23d487f9 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -1173,11 +1173,14 @@ static void intel_pmu_reset(void) */ static int intel_pmu_handle_irq(struct pt_regs *regs) { + struct perf_sample_data data; struct cpu_hw_counters *cpuc; - struct cpu_hw_counters; int bit, cpu, loops; u64 ack, status; + data.regs = regs; + data.addr = 0; + cpu = smp_processor_id(); cpuc = &per_cpu(cpu_hw_counters, cpu); @@ -1210,7 +1213,7 @@ again: if (!intel_pmu_save_and_restart(counter)) continue; - if (perf_counter_overflow(counter, 1, regs, 0)) + if (perf_counter_overflow(counter, 1, &data)) intel_pmu_disable_counter(&counter->hw, bit); } @@ -1230,12 +1233,16 @@ again: static int amd_pmu_handle_irq(struct pt_regs *regs) { - int cpu, idx, handled = 0; + struct perf_sample_data data; struct cpu_hw_counters *cpuc; struct perf_counter *counter; struct hw_perf_counter *hwc; + int cpu, idx, handled = 0; u64 val; + data.regs = regs; + data.addr = 0; + cpu = smp_processor_id(); cpuc = &per_cpu(cpu_hw_counters, cpu); @@ -1256,7 +1263,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) if (!x86_perf_counter_set_period(counter, hwc, idx)) continue; - if (perf_counter_overflow(counter, 1, regs, 0)) + if (perf_counter_overflow(counter, 1, &data)) amd_pmu_disable_counter(hwc, idx); } diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 282d8cc4898..d8c0eb480f9 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -605,8 +605,14 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader, struct perf_counter_context *ctx, int cpu); extern void perf_counter_update_userpage(struct perf_counter *counter); -extern int perf_counter_overflow(struct perf_counter *counter, - int nmi, struct pt_regs *regs, u64 addr); +struct perf_sample_data { + struct pt_regs *regs; + u64 addr; +}; + +extern int perf_counter_overflow(struct perf_counter *counter, int nmi, + struct perf_sample_data *data); + /* * Return 1 for a software counter, 0 for a hardware counter */ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index ae591a1275a..4fe85e804f4 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -2378,8 +2378,8 @@ static u32 perf_counter_tid(struct perf_counter *counter, struct task_struct *p) return task_pid_nr_ns(p, counter->ns); } -static void perf_counter_output(struct perf_counter *counter, - int nmi, struct pt_regs *regs, u64 addr) +static void perf_counter_output(struct perf_counter *counter, int nmi, + struct perf_sample_data *data) { int ret; u64 sample_type = counter->attr.sample_type; @@ -2404,10 +2404,10 @@ static void perf_counter_output(struct perf_counter *counter, header.size = sizeof(header); header.misc = PERF_EVENT_MISC_OVERFLOW; - header.misc |= perf_misc_flags(regs); + header.misc |= perf_misc_flags(data->regs); if (sample_type & PERF_SAMPLE_IP) { - ip = perf_instruction_pointer(regs); + ip = perf_instruction_pointer(data->regs); header.type |= PERF_SAMPLE_IP; header.size += sizeof(ip); } @@ -2460,7 +2460,7 @@ static void perf_counter_output(struct perf_counter *counter, } if (sample_type & PERF_SAMPLE_CALLCHAIN) { - callchain = perf_callchain(regs); + callchain = perf_callchain(data->regs); if (callchain) { callchain_size = (1 + callchain->nr) * sizeof(u64); @@ -2486,7 +2486,7 @@ static void perf_counter_output(struct perf_counter *counter, perf_output_put(&handle, time); if (sample_type & PERF_SAMPLE_ADDR) - perf_output_put(&handle, addr); + perf_output_put(&handle, data->addr); if (sample_type & PERF_SAMPLE_ID) perf_output_put(&handle, counter->id); @@ -2950,8 +2950,8 @@ static void perf_log_throttle(struct perf_counter *counter, int enable) * Generic counter overflow handling. */ -int perf_counter_overflow(struct perf_counter *counter, - int nmi, struct pt_regs *regs, u64 addr) +int perf_counter_overflow(struct perf_counter *counter, int nmi, + struct perf_sample_data *data) { int events = atomic_read(&counter->event_limit); int throttle = counter->pmu->unthrottle != NULL; @@ -3005,7 +3005,7 @@ int perf_counter_overflow(struct perf_counter *counter, perf_counter_disable(counter); } - perf_counter_output(counter, nmi, regs, addr); + perf_counter_output(counter, nmi, data); return ret; } @@ -3054,24 +3054,25 @@ static void perf_swcounter_set_period(struct perf_counter *counter) static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) { enum hrtimer_restart ret = HRTIMER_RESTART; + struct perf_sample_data data; struct perf_counter *counter; - struct pt_regs *regs; u64 period; counter = container_of(hrtimer, struct perf_counter, hw.hrtimer); counter->pmu->read(counter); - regs = get_irq_regs(); + data.addr = 0; + data.regs = get_irq_regs(); /* * In case we exclude kernel IPs or are somehow not in interrupt * context, provide the next best thing, the user IP. */ - if ((counter->attr.exclude_kernel || !regs) && + if ((counter->attr.exclude_kernel || !data.regs) && !counter->attr.exclude_user) - regs = task_pt_regs(current); + data.regs = task_pt_regs(current); - if (regs) { - if (perf_counter_overflow(counter, 0, regs, 0)) + if (data.regs) { + if (perf_counter_overflow(counter, 0, &data)) ret = HRTIMER_NORESTART; } @@ -3084,9 +3085,14 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) static void perf_swcounter_overflow(struct perf_counter *counter, int nmi, struct pt_regs *regs, u64 addr) { + struct perf_sample_data data = { + .regs = regs, + .addr = addr, + }; + perf_swcounter_update(counter); perf_swcounter_set_period(counter); - if (perf_counter_overflow(counter, nmi, regs, addr)) + if (perf_counter_overflow(counter, nmi, &data)) /* soft-disable the counter */ ; -- cgit v1.2.3-70-g09d2 From 9e350de37ac9607012fcf9c5314a28fbddf8f43c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Jun 2009 21:34:59 +0200 Subject: perf_counter: Accurate period data We currently log hw.sample_period for PERF_SAMPLE_PERIOD, however this is incorrect. When we adjust the period, it will only take effect the next cycle but report it for the current cycle. So when we adjust the period for every cycle, we're always wrong. Solve this by keeping track of the last_period. Signed-off-by: Peter Zijlstra Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 9 ++++++--- arch/x86/kernel/cpu/perf_counter.c | 15 ++++++++++++--- include/linux/perf_counter.h | 6 ++++-- kernel/perf_counter.c | 9 ++++++--- 4 files changed, 28 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 5e0bf399c43..4990ce2e5f0 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -767,6 +767,7 @@ static void power_pmu_unthrottle(struct perf_counter *counter) perf_disable(); power_pmu_read(counter); left = counter->hw.sample_period; + counter->hw.last_period = left; val = 0; if (left < 0x80000000L) val = 0x80000000L - left; @@ -937,7 +938,8 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) counter->hw.config = events[n]; counter->hw.counter_base = cflags[n]; - atomic64_set(&counter->hw.period_left, counter->hw.sample_period); + counter->hw.last_period = counter->hw.sample_period; + atomic64_set(&counter->hw.period_left, counter->hw.last_period); /* * See if we need to reserve the PMU. @@ -1002,8 +1004,9 @@ static void record_and_restart(struct perf_counter *counter, long val, */ if (record) { struct perf_sample_data data = { - .regs = regs, - .addr = 0, + .regs = regs, + .addr = 0, + .period = counter->hw.last_period, }; if (counter->attr.sample_type & PERF_SAMPLE_ADDR) { diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 82a23d487f9..57ae1bec81b 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -698,6 +698,7 @@ static int __hw_perf_counter_init(struct perf_counter *counter) if (!hwc->sample_period) { hwc->sample_period = x86_pmu.max_period; + hwc->last_period = hwc->sample_period; atomic64_set(&hwc->period_left, hwc->sample_period); } @@ -880,12 +881,14 @@ x86_perf_counter_set_period(struct perf_counter *counter, if (unlikely(left <= -period)) { left = period; atomic64_set(&hwc->period_left, left); + hwc->last_period = period; ret = 1; } if (unlikely(left <= 0)) { left += period; atomic64_set(&hwc->period_left, left); + hwc->last_period = period; ret = 1; } /* @@ -1257,9 +1260,12 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) if (val & (1ULL << (x86_pmu.counter_bits - 1))) continue; - /* counter overflow */ - handled = 1; - inc_irq_stat(apic_perf_irqs); + /* + * counter overflow + */ + handled = 1; + data.period = counter->hw.last_period; + if (!x86_perf_counter_set_period(counter, hwc, idx)) continue; @@ -1267,6 +1273,9 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) amd_pmu_disable_counter(hwc, idx); } + if (handled) + inc_irq_stat(apic_perf_irqs); + return handled; } diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index d8c0eb480f9..5b966472b45 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -366,6 +366,7 @@ struct hw_perf_counter { }; atomic64_t prev_count; u64 sample_period; + u64 last_period; atomic64_t period_left; u64 interrupts; @@ -606,8 +607,9 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader, extern void perf_counter_update_userpage(struct perf_counter *counter); struct perf_sample_data { - struct pt_regs *regs; - u64 addr; + struct pt_regs *regs; + u64 addr; + u64 period; }; extern int perf_counter_overflow(struct perf_counter *counter, int nmi, diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 4fe85e804f4..8b89b40bd0f 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -2495,7 +2495,7 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, perf_output_put(&handle, cpu_entry); if (sample_type & PERF_SAMPLE_PERIOD) - perf_output_put(&handle, counter->hw.sample_period); + perf_output_put(&handle, data->period); /* * XXX PERF_SAMPLE_GROUP vs inherited counters seems difficult. @@ -3040,11 +3040,13 @@ static void perf_swcounter_set_period(struct perf_counter *counter) if (unlikely(left <= -period)) { left = period; atomic64_set(&hwc->period_left, left); + hwc->last_period = period; } if (unlikely(left <= 0)) { left += period; atomic64_add(period, &hwc->period_left); + hwc->last_period = period; } atomic64_set(&hwc->prev_count, -left); @@ -3086,8 +3088,9 @@ static void perf_swcounter_overflow(struct perf_counter *counter, int nmi, struct pt_regs *regs, u64 addr) { struct perf_sample_data data = { - .regs = regs, - .addr = addr, + .regs = regs, + .addr = addr, + .period = counter->hw.last_period, }; perf_swcounter_update(counter); -- cgit v1.2.3-70-g09d2 From dcae3626d031fe6296b1e96a16f986193a41f840 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Wed, 10 Jun 2009 12:43:48 -0700 Subject: drm: fix LOCK_TEST_WITH_RETURN macro When this macro isn't called with 'file_priv' this will result in a build failure. Signed-off-by: Roel Kluin Signed-off-by: Andrew Morton Signed-off-by: Dave Airlie --- include/drm/drmP.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/drm/drmP.h b/include/drm/drmP.h index b84d8ae35e6..efa5f79a35c 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -237,15 +237,15 @@ struct drm_device; * \param dev DRM device. * \param filp file pointer of the caller. */ -#define LOCK_TEST_WITH_RETURN( dev, file_priv ) \ -do { \ - if (!_DRM_LOCK_IS_HELD(file_priv->master->lock.hw_lock->lock) || \ - file_priv->master->lock.file_priv != file_priv) { \ +#define LOCK_TEST_WITH_RETURN( dev, _file_priv ) \ +do { \ + if (!_DRM_LOCK_IS_HELD(_file_priv->master->lock.hw_lock->lock) || \ + _file_priv->master->lock.file_priv != _file_priv) { \ DRM_ERROR( "%s called without lock held, held %d owner %p %p\n",\ - __func__, _DRM_LOCK_IS_HELD(file_priv->master->lock.hw_lock->lock),\ - file_priv->master->lock.file_priv, file_priv); \ - return -EINVAL; \ - } \ + __func__, _DRM_LOCK_IS_HELD(_file_priv->master->lock.hw_lock->lock),\ + _file_priv->master->lock.file_priv, _file_priv); \ + return -EINVAL; \ + } \ } while (0) /** -- cgit v1.2.3-70-g09d2 From 4fefcb27050b98c97b1c32bc710fc2f874449dee Mon Sep 17 00:00:00 2001 From: yakui_zhao Date: Tue, 2 Jun 2009 14:09:47 +0800 Subject: drm: add separate drm debugging levels Now all the DRM debug info will be reported if the boot option of "drm.debug=1" is added. Sometimes it is inconvenient to get the debug info in KMS mode. We will get too much unrelated info. This will separate several DRM debug levels and the debug level can be used to print the different debug info. And the debug level is controlled by the module parameter of drm.debug In this patch it is divided into four debug levels; drm_core, drm_driver, drm_kms, drm_mode. At the same time we can get the different debug info by changing the debug level. This can be done by adding the module parameter. Of course it can be changed through the /sys/module/drm/parameters/debug after the system is booted. Four debug macro definitions are provided. DRM_DEBUG(fmt, args...) DRM_DEBUG_DRIVER(prefix, fmt, args...) DRM_DEBUG_KMS(prefix, fmt, args...) DRM_DEBUG_MODE(prefix, fmt, args...) When the boot option of "drm.debug=4" is added, it will print the debug info using DRM_DEBUG_KMS macro definition. When the boot option of "drm.debug=6" is added, it will print the debug info using DRM_DEBUG_KMS/DRM_DEBUG_DRIVER. Sometimes we expect to print the value of an array. For example: SDVO command, In such case the following four DRM debug macro definitions are added: DRM_LOG(fmt, args...) DRM_LOG_DRIVER(fmt, args...) DRM_LOG_KMS(fmt, args...) DRM_LOG_MODE(fmt, args...) Signed-off-by: Zhao Yakui Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_stub.c | 17 ++++++++++++- include/drm/drmP.h | 61 ++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 72 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_stub.c b/drivers/gpu/drm/drm_stub.c index b9631e3a1ea..89050684fe0 100644 --- a/drivers/gpu/drm/drm_stub.c +++ b/drivers/gpu/drm/drm_stub.c @@ -51,7 +51,22 @@ struct idr drm_minors_idr; struct class *drm_class; struct proc_dir_entry *drm_proc_root; struct dentry *drm_debugfs_root; - +void drm_ut_debug_printk(unsigned int request_level, + const char *prefix, + const char *function_name, + const char *format, ...) +{ + va_list args; + + if (drm_debug & request_level) { + if (function_name) + printk(KERN_DEBUG "[%s:%s], ", prefix, function_name); + va_start(args, format); + vprintk(format, args); + va_end(args); + } +} +EXPORT_SYMBOL(drm_ut_debug_printk); static int drm_minor_get_id(struct drm_device *dev, int type) { int new_id; diff --git a/include/drm/drmP.h b/include/drm/drmP.h index efa5f79a35c..afc21685230 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -87,6 +87,15 @@ struct drm_device; #include "drm_os_linux.h" #include "drm_hashtab.h" +#define DRM_UT_CORE 0x01 +#define DRM_UT_DRIVER 0x02 +#define DRM_UT_KMS 0x04 +#define DRM_UT_MODE 0x08 + +extern void drm_ut_debug_printk(unsigned int request_level, + const char *prefix, + const char *function_name, + const char *format, ...); /***********************************************************************/ /** \name DRM template customization defaults */ /*@{*/ @@ -186,15 +195,57 @@ struct drm_device; * \param arg arguments */ #if DRM_DEBUG_CODE -#define DRM_DEBUG(fmt, arg...) \ +#define DRM_DEBUG(fmt, args...) \ do { \ - if ( drm_debug ) \ - printk(KERN_DEBUG \ - "[" DRM_NAME ":%s] " fmt , \ - __func__ , ##arg); \ + drm_ut_debug_printk(DRM_UT_CORE, DRM_NAME, \ + __func__, fmt, ##args); \ + } while (0) + +#define DRM_DEBUG_DRIVER(prefix, fmt, args...) \ + do { \ + drm_ut_debug_printk(DRM_UT_DRIVER, prefix, \ + __func__, fmt, ##args); \ + } while (0) +#define DRM_DEBUG_KMS(prefix, fmt, args...) \ + do { \ + drm_ut_debug_printk(DRM_UT_KMS, prefix, \ + __func__, fmt, ##args); \ + } while (0) +#define DRM_DEBUG_MODE(prefix, fmt, args...) \ + do { \ + drm_ut_debug_printk(DRM_UT_MODE, prefix, \ + __func__, fmt, ##args); \ + } while (0) +#define DRM_LOG(fmt, args...) \ + do { \ + drm_ut_debug_printk(DRM_UT_CORE, NULL, \ + NULL, fmt, ##args); \ + } while (0) +#define DRM_LOG_KMS(fmt, args...) \ + do { \ + drm_ut_debug_printk(DRM_UT_KMS, NULL, \ + NULL, fmt, ##args); \ + } while (0) +#define DRM_LOG_MODE(fmt, args...) \ + do { \ + drm_ut_debug_printk(DRM_UT_MODE, NULL, \ + NULL, fmt, ##args); \ + } while (0) +#define DRM_LOG_DRIVER(fmt, args...) \ + do { \ + drm_ut_debug_printk(DRM_UT_DRIVER, NULL, \ + NULL, fmt, ##args); \ } while (0) #else +#define DRM_DEBUG_DRIVER(prefix, fmt, args...) do { } while (0) +#define DRM_DEBUG_KMS(prefix, fmt, args...) do { } while (0) +#define DRM_DEBUG_MODE(prefix, fmt, args...) do { } while (0) #define DRM_DEBUG(fmt, arg...) do { } while (0) +#define DRM_LOG(fmt, arg...) do { } while (0) +#define DRM_LOG_KMS(fmt, args...) do { } while (0) +#define DRM_LOG_MODE(fmt, arg...) do { } while (0) +#define DRM_LOG_DRIVER(fmt, arg...) do { } while (0) + #endif #define DRM_PROC_LIMIT (PAGE_SIZE-80) -- cgit v1.2.3-70-g09d2 From 3f68535adad8dd89499505a65fb25d0e02d118cc Mon Sep 17 00:00:00 2001 From: john stultz Date: Wed, 21 Jan 2009 22:53:22 -0700 Subject: clocksource: sanity check sysfs clocksource changes Thomas, Andrew and Ingo pointed out that we don't have any safety checks in the clocksource sysfs entries to make sure sysadmins don't try to change the clocksource to a non high-res timer capable clocksource (such as jiffies) when high-res timers (HRT) is enabled. Doing so will likely hang a system. Correct this by filtering non HRT clocksources from available_clocksources and not accepting non HRT clocksources with HRT enabled. Signed-off-by: John Stultz Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 2 +- kernel/hrtimer.c | 4 ++-- kernel/time/clocksource.c | 18 +++++++++++++++++- 3 files changed, 20 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 0d2f7c8a33d..58021b0c396 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -305,7 +305,7 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer) extern ktime_t ktime_get(void); extern ktime_t ktime_get_real(void); - +extern int hrtimer_hres_active(void); DECLARE_PER_CPU(struct tick_device, tick_cpu_device); diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index cb8a15c1958..1a70c18cdff 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -476,7 +476,7 @@ static inline int hrtimer_is_hres_enabled(void) /* * Is the high resolution mode active ? */ -static inline int hrtimer_hres_active(void) +int hrtimer_hres_active(void) { return __get_cpu_var(hrtimer_bases).hres_active; } @@ -704,7 +704,7 @@ static int hrtimer_switch_to_hres(void) #else -static inline int hrtimer_hres_active(void) { return 0; } +int hrtimer_hres_active(void) { return 0; } static inline int hrtimer_is_hres_enabled(void) { return 0; } static inline int hrtimer_switch_to_hres(void) { return 0; } static inline void hrtimer_force_reprogram(struct hrtimer_cpu_base *base) { } diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 80189f6f1c5..18b9f5da4ee 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -30,6 +30,7 @@ #include #include /* for spin_unlock_irq() using preempt_count() m68k */ #include +#include void timecounter_init(struct timecounter *tc, const struct cyclecounter *cc, @@ -509,6 +510,18 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev, } } + /* + * Check to make sure we don't switch to a non-HRT usable + * clocksource if HRT is enabled and running + */ + if (hrtimer_hres_active() && + !(ovr->flags & CLOCK_SOURCE_VALID_FOR_HRES)) { + printk(KERN_WARNING "%s clocksource is not HRT compatible. " + "Cannot switch while in HRT mode\n", ovr->name); + ovr = NULL; + override_name[0] = 0; + } + /* Reselect, when the override name has changed */ if (ovr != clocksource_override) { clocksource_override = ovr; @@ -537,7 +550,10 @@ sysfs_show_available_clocksources(struct sys_device *dev, spin_lock_irq(&clocksource_lock); list_for_each_entry(src, &clocksource_list, list) { - count += snprintf(buf + count, + /* Don't show non-HRES clocksource if HRES is enabled */ + if (!hrtimer_hres_active() || + (src->flags & CLOCK_SOURCE_VALID_FOR_HRES)) + count += snprintf(buf + count, max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "%s ", src->name); } -- cgit v1.2.3-70-g09d2 From d005ba6cc82440d9ebf96f3ec8f79c54578b898f Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 10 Jun 2009 05:28:04 +0000 Subject: mdio: Expose 10GBASE-T MDI-X status via ethtool This is available in a standard MDIO register in 10GBASE-T PHYs. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- drivers/net/mdio.c | 17 +++++++++++++++++ include/linux/mdio.h | 9 +++++++++ 2 files changed, 26 insertions(+) (limited to 'include') diff --git a/drivers/net/mdio.c b/drivers/net/mdio.c index 66483035f68..dc45e9856c3 100644 --- a/drivers/net/mdio.c +++ b/drivers/net/mdio.c @@ -296,6 +296,23 @@ void mdio45_ethtool_gset_npage(const struct mdio_if_info *mdio, ecmd->duplex = (reg & MDIO_CTRL1_FULLDPLX || ecmd->speed == SPEED_10000); } + + /* 10GBASE-T MDI/MDI-X */ + if (ecmd->port == PORT_TP && ecmd->speed == SPEED_10000) { + switch (mdio->mdio_read(mdio->dev, mdio->prtad, MDIO_MMD_PMAPMD, + MDIO_PMA_10GBT_SWAPPOL)) { + case MDIO_PMA_10GBT_SWAPPOL_ABNX | MDIO_PMA_10GBT_SWAPPOL_CDNX: + ecmd->eth_tp_mdix = ETH_TP_MDI; + break; + case 0: + ecmd->eth_tp_mdix = ETH_TP_MDI_X; + break; + default: + /* It's complicated... */ + ecmd->eth_tp_mdix = ETH_TP_MDI_INVALID; + break; + } + } } EXPORT_SYMBOL(mdio45_ethtool_gset_npage); diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 56851646529..cfdf1df2875 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -45,6 +45,7 @@ #define MDIO_PHYXS_LNSTAT 24 /* PHY XGXS lane state */ /* Media-dependent registers. */ +#define MDIO_PMA_10GBT_SWAPPOL 130 /* 10GBASE-T pair swap & polarity */ #define MDIO_PMA_10GBT_TXPWR 131 /* 10GBASE-T TX power control */ #define MDIO_PMA_10GBT_SNR 133 /* 10GBASE-T SNR margin, lane A. * Lanes B-D are numbered 134-136. */ @@ -195,6 +196,14 @@ #define MDIO_PHYXS_LNSTAT_SYNC3 0x0008 #define MDIO_PHYXS_LNSTAT_ALIGN 0x1000 +/* PMA 10GBASE-T pair swap & polarity */ +#define MDIO_PMA_10GBT_SWAPPOL_ABNX 0x0001 /* Pair A/B uncrossed */ +#define MDIO_PMA_10GBT_SWAPPOL_CDNX 0x0002 /* Pair C/D uncrossed */ +#define MDIO_PMA_10GBT_SWAPPOL_AREV 0x0100 /* Pair A polarity reversed */ +#define MDIO_PMA_10GBT_SWAPPOL_BREV 0x0200 /* Pair B polarity reversed */ +#define MDIO_PMA_10GBT_SWAPPOL_CREV 0x0400 /* Pair C polarity reversed */ +#define MDIO_PMA_10GBT_SWAPPOL_DREV 0x0800 /* Pair D polarity reversed */ + /* PMA 10GBASE-T TX power register. */ #define MDIO_PMA_10GBT_TXPWR_SHORT 0x0001 /* Short-reach mode */ -- cgit v1.2.3-70-g09d2 From 2b85a34e911bf483c27cfdd124aeb1605145dc80 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 11 Jun 2009 02:55:43 -0700 Subject: net: No more expensive sock_hold()/sock_put() on each tx One of the problem with sock memory accounting is it uses a pair of sock_hold()/sock_put() for each transmitted packet. This slows down bidirectional flows because the receive path also needs to take a refcount on socket and might use a different cpu than transmit path or transmit completion path. So these two atomic operations also trigger cache line bounces. We can see this in tx or tx/rx workloads (media gateways for example), where sock_wfree() can be in top five functions in profiles. We use this sock_hold()/sock_put() so that sock freeing is delayed until all tx packets are completed. As we also update sk_wmem_alloc, we could offset sk_wmem_alloc by one unit at init time, until sk_free() is called. Once sk_free() is called, we atomic_dec_and_test(sk_wmem_alloc) to decrement initial offset and atomicaly check if any packets are in flight. skb_set_owner_w() doesnt call sock_hold() anymore sock_wfree() doesnt call sock_put() anymore, but check if sk_wmem_alloc reached 0 to perform the final freeing. Drawback is that a skb->truesize error could lead to unfreeable sockets, or even worse, prematurely calling __sk_free() on a live socket. Nice speedups on SMP. tbench for example, going from 2691 MB/s to 2711 MB/s on my 8 cpu dev machine, even if tbench was not really hitting sk_refcnt contention point. 5 % speedup on a UDP transmit workload (depends on number of flows), lowering TX completion cpu usage. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 6 +++++- net/core/sock.c | 29 +++++++++++++++++++++++++---- net/ipv4/ip_output.c | 1 - net/ipv6/ip6_output.c | 1 - 4 files changed, 30 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 4bb1ff9fd15..010e14a93c9 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1217,9 +1217,13 @@ static inline int skb_copy_to_page(struct sock *sk, char __user *from, static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) { - sock_hold(sk); skb->sk = sk; skb->destructor = sock_wfree; + /* + * We used to take a refcount on sk, but following operation + * is enough to guarantee sk_free() wont free this sock until + * all in-flight packets are completed + */ atomic_add(skb->truesize, &sk->sk_wmem_alloc); } diff --git a/net/core/sock.c b/net/core/sock.c index 04e35eb2e73..06e26b77ad9 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1008,7 +1008,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, } EXPORT_SYMBOL(sk_alloc); -void sk_free(struct sock *sk) +static void __sk_free(struct sock *sk) { struct sk_filter *filter; @@ -1031,6 +1031,17 @@ void sk_free(struct sock *sk) put_net(sock_net(sk)); sk_prot_free(sk->sk_prot_creator, sk); } + +void sk_free(struct sock *sk) +{ + /* + * We substract one from sk_wmem_alloc and can know if + * some packets are still in some tx queue. + * If not null, sock_wfree() will call __sk_free(sk) later + */ + if (atomic_dec_and_test(&sk->sk_wmem_alloc)) + __sk_free(sk); +} EXPORT_SYMBOL(sk_free); /* @@ -1071,7 +1082,10 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; atomic_set(&newsk->sk_rmem_alloc, 0); - atomic_set(&newsk->sk_wmem_alloc, 0); + /* + * sk_wmem_alloc set to one (see sk_free() and sock_wfree()) + */ + atomic_set(&newsk->sk_wmem_alloc, 1); atomic_set(&newsk->sk_omem_alloc, 0); skb_queue_head_init(&newsk->sk_receive_queue); skb_queue_head_init(&newsk->sk_write_queue); @@ -1175,12 +1189,18 @@ void __init sk_init(void) void sock_wfree(struct sk_buff *skb) { struct sock *sk = skb->sk; + int res; /* In case it might be waiting for more memory. */ - atomic_sub(skb->truesize, &sk->sk_wmem_alloc); + res = atomic_sub_return(skb->truesize, &sk->sk_wmem_alloc); if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) sk->sk_write_space(sk); - sock_put(sk); + /* + * if sk_wmem_alloc reached 0, we are last user and should + * free this sock, as sk_free() call could not do it. + */ + if (res == 0) + __sk_free(sk); } EXPORT_SYMBOL(sock_wfree); @@ -1819,6 +1839,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_stamp = ktime_set(-1L, 0); atomic_set(&sk->sk_refcnt, 1); + atomic_set(&sk->sk_wmem_alloc, 1); atomic_set(&sk->sk_drops, 0); } EXPORT_SYMBOL(sock_init_data); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 9248d2807ba..24702628266 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -498,7 +498,6 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) BUG_ON(frag->sk); if (skb->sk) { - sock_hold(skb->sk); frag->sk = skb->sk; frag->destructor = sock_wfree; truesizes += frag->truesize; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index db6c7224a86..7c76e3d1821 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -680,7 +680,6 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) BUG_ON(frag->sk); if (skb->sk) { - sock_hold(skb->sk); frag->sk = skb->sk; frag->destructor = sock_wfree; truesizes += frag->truesize; -- cgit v1.2.3-70-g09d2 From 8593a1967fb9746d318dde88a0a39a36dbfc3445 Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Wed, 20 May 2009 16:53:30 -0700 Subject: wimax/i2400m: rename misleading I2400M_PL_PAD to I2400M_PL_ALIGN The constant is being use as an alignment factor, not as a padding factor; made reading/reviewing the code quite confusing. Signed-off-by: Inaky Perez-Gonzalez --- drivers/net/wimax/i2400m/fw.c | 2 +- drivers/net/wimax/i2400m/rx.c | 4 ++-- drivers/net/wimax/i2400m/tx.c | 4 ++-- include/linux/wimax/i2400m.h | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/net/wimax/i2400m/fw.c b/drivers/net/wimax/i2400m/fw.c index 7ee1b99b384..26924f17f19 100644 --- a/drivers/net/wimax/i2400m/fw.c +++ b/drivers/net/wimax/i2400m/fw.c @@ -397,7 +397,7 @@ static int i2400m_download_chunk(struct i2400m *i2400m, const void *chunk, unsigned int direct, unsigned int do_csum) { int ret; - size_t chunk_len = ALIGN(__chunk_len, I2400M_PL_PAD); + size_t chunk_len = ALIGN(__chunk_len, I2400M_PL_ALIGN); struct device *dev = i2400m_dev(i2400m); struct { struct i2400m_bootrom_header cmd; diff --git a/drivers/net/wimax/i2400m/rx.c b/drivers/net/wimax/i2400m/rx.c index 7643850a6fb..07c32e68909 100644 --- a/drivers/net/wimax/i2400m/rx.c +++ b/drivers/net/wimax/i2400m/rx.c @@ -1148,7 +1148,7 @@ int i2400m_rx(struct i2400m *i2400m, struct sk_buff *skb) num_pls = le16_to_cpu(msg_hdr->num_pls); pl_itr = sizeof(*msg_hdr) + /* Check payload descriptor(s) */ num_pls * sizeof(msg_hdr->pld[0]); - pl_itr = ALIGN(pl_itr, I2400M_PL_PAD); + pl_itr = ALIGN(pl_itr, I2400M_PL_ALIGN); if (pl_itr > skb->len) { /* got all the payload descriptors? */ dev_err(dev, "RX: HW BUG? message too short (%u bytes) for " "%u payload descriptors (%zu each, total %zu)\n", @@ -1166,7 +1166,7 @@ int i2400m_rx(struct i2400m *i2400m, struct sk_buff *skb) single_last = num_pls == 1 || i == num_pls - 1; i2400m_rx_payload(i2400m, skb, single_last, &msg_hdr->pld[i], skb->data + pl_itr); - pl_itr += ALIGN(pl_size, I2400M_PL_PAD); + pl_itr += ALIGN(pl_size, I2400M_PL_ALIGN); cond_resched(); /* Don't monopolize */ } kfree_skb(skb); diff --git a/drivers/net/wimax/i2400m/tx.c b/drivers/net/wimax/i2400m/tx.c index 8ef724d31fb..a635fd720f3 100644 --- a/drivers/net/wimax/i2400m/tx.c +++ b/drivers/net/wimax/i2400m/tx.c @@ -491,7 +491,7 @@ void i2400m_tx_close(struct i2400m *i2400m) */ hdr_size = sizeof(*tx_msg) + le16_to_cpu(tx_msg->num_pls) * sizeof(tx_msg->pld[0]); - hdr_size = ALIGN(hdr_size, I2400M_PL_PAD); + hdr_size = ALIGN(hdr_size, I2400M_PL_ALIGN); tx_msg->offset = I2400M_TX_PLD_SIZE - hdr_size; tx_msg_moved = (void *) tx_msg + tx_msg->offset; memmove(tx_msg_moved, tx_msg, hdr_size); @@ -574,7 +574,7 @@ int i2400m_tx(struct i2400m *i2400m, const void *buf, size_t buf_len, d_fnstart(3, dev, "(i2400m %p skb %p [%zu bytes] pt %u)\n", i2400m, buf, buf_len, pl_type); - padded_len = ALIGN(buf_len, I2400M_PL_PAD); + padded_len = ALIGN(buf_len, I2400M_PL_ALIGN); d_printf(5, dev, "padded_len %zd buf_len %zd\n", padded_len, buf_len); /* If there is no current TX message, create one; if the * current one is out of payload slots or we have a singleton, diff --git a/include/linux/wimax/i2400m.h b/include/linux/wimax/i2400m.h index d5148a7889a..433693ef2bb 100644 --- a/include/linux/wimax/i2400m.h +++ b/include/linux/wimax/i2400m.h @@ -266,7 +266,7 @@ enum i2400m_ro_type { /* Misc constants */ enum { - I2400M_PL_PAD = 16, /* Payload data size alignment */ + I2400M_PL_ALIGN = 16, /* Payload data size alignment */ I2400M_PL_SIZE_MAX = 0x3EFF, I2400M_MAX_PLS_IN_MSG = 60, /* protocol barkers: sync sequences; for notifications they -- cgit v1.2.3-70-g09d2 From b0fd271d5fba0b2d00888363f3869e3f9b26caa9 Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Thu, 11 Jun 2009 13:10:16 +0200 Subject: block: add request clone interface (v2) This patch adds the following 2 interfaces for request-stacking drivers: - blk_rq_prep_clone(struct request *clone, struct request *orig, struct bio_set *bs, gfp_t gfp_mask, int (*bio_ctr)(struct bio *, struct bio*, void *), void *data) * Clones bios in the original request to the clone request (bio_ctr is called for each cloned bios.) * Copies attributes of the original request to the clone request. The actual data parts (e.g. ->cmd, ->buffer, ->sense) are not copied. - blk_rq_unprep_clone(struct request *clone) * Frees cloned bios from the clone request. Request stacking drivers (e.g. request-based dm) need to make a clone request for a submitted request and dispatch it to other devices. To allocate request for the clone, request stacking drivers may not be able to use blk_get_request() because the allocation may be done in an irq-disabled context. So blk_rq_prep_clone() takes a request allocated by the caller as an argument. For each clone bio in the clone request, request stacking drivers should be able to set up their own completion handler. So blk_rq_prep_clone() takes a callback function which is called for each clone bio, and a pointer for private data which is passed to the callback. NOTE: blk_rq_prep_clone() doesn't copy any actual data of the original request. Pages are shared between original bios and cloned bios. So caller must not complete the original request before the clone request. Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Cc: Boaz Harrosh Signed-off-by: Jens Axboe --- block/blk-core.c | 100 +++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/blkdev.h | 5 +++ 2 files changed, 105 insertions(+) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index 03c5a64b6cc..02a9252107a 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2295,6 +2295,106 @@ int blk_lld_busy(struct request_queue *q) } EXPORT_SYMBOL_GPL(blk_lld_busy); +/** + * blk_rq_unprep_clone - Helper function to free all bios in a cloned request + * @rq: the clone request to be cleaned up + * + * Description: + * Free all bios in @rq for a cloned request. + */ +void blk_rq_unprep_clone(struct request *rq) +{ + struct bio *bio; + + while ((bio = rq->bio) != NULL) { + rq->bio = bio->bi_next; + + bio_put(bio); + } +} +EXPORT_SYMBOL_GPL(blk_rq_unprep_clone); + +/* + * Copy attributes of the original request to the clone request. + * The actual data parts (e.g. ->cmd, ->buffer, ->sense) are not copied. + */ +static void __blk_rq_prep_clone(struct request *dst, struct request *src) +{ + dst->cpu = src->cpu; + dst->cmd_flags = (rq_data_dir(src) | REQ_NOMERGE); + dst->cmd_type = src->cmd_type; + dst->__sector = blk_rq_pos(src); + dst->__data_len = blk_rq_bytes(src); + dst->nr_phys_segments = src->nr_phys_segments; + dst->ioprio = src->ioprio; + dst->extra_len = src->extra_len; +} + +/** + * blk_rq_prep_clone - Helper function to setup clone request + * @rq: the request to be setup + * @rq_src: original request to be cloned + * @bs: bio_set that bios for clone are allocated from + * @gfp_mask: memory allocation mask for bio + * @bio_ctr: setup function to be called for each clone bio. + * Returns %0 for success, non %0 for failure. + * @data: private data to be passed to @bio_ctr + * + * Description: + * Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq. + * The actual data parts of @rq_src (e.g. ->cmd, ->buffer, ->sense) + * are not copied, and copying such parts is the caller's responsibility. + * Also, pages which the original bios are pointing to are not copied + * and the cloned bios just point same pages. + * So cloned bios must be completed before original bios, which means + * the caller must complete @rq before @rq_src. + */ +int blk_rq_prep_clone(struct request *rq, struct request *rq_src, + struct bio_set *bs, gfp_t gfp_mask, + int (*bio_ctr)(struct bio *, struct bio *, void *), + void *data) +{ + struct bio *bio, *bio_src; + + if (!bs) + bs = fs_bio_set; + + blk_rq_init(NULL, rq); + + __rq_for_each_bio(bio_src, rq_src) { + bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs); + if (!bio) + goto free_and_out; + + __bio_clone(bio, bio_src); + + if (bio_integrity(bio_src) && + bio_integrity_clone(bio, bio_src, gfp_mask)) + goto free_and_out; + + if (bio_ctr && bio_ctr(bio, bio_src, data)) + goto free_and_out; + + if (rq->bio) { + rq->biotail->bi_next = bio; + rq->biotail = bio; + } else + rq->bio = rq->biotail = bio; + } + + __blk_rq_prep_clone(rq, rq_src); + + return 0; + +free_and_out: + if (bio) + bio_free(bio, bs); + blk_rq_unprep_clone(rq); + + return -ENOMEM; +} +EXPORT_SYMBOL_GPL(blk_rq_prep_clone); + int kblockd_schedule_work(struct request_queue *q, struct work_struct *work) { return queue_work(kblockd_workqueue, work); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5e740a135e7..ebdfde8fe55 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -765,6 +765,11 @@ extern void blk_insert_request(struct request_queue *, struct request *, int, vo extern void blk_requeue_request(struct request_queue *, struct request *); extern int blk_rq_check_limits(struct request_queue *q, struct request *rq); extern int blk_lld_busy(struct request_queue *q); +extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, + struct bio_set *bs, gfp_t gfp_mask, + int (*bio_ctr)(struct bio *, struct bio *, void *), + void *data); +extern void blk_rq_unprep_clone(struct request *rq); extern int blk_insert_cloned_request(struct request_queue *q, struct request *rq); extern void blk_plug_device(struct request_queue *); -- cgit v1.2.3-70-g09d2 From 0764771dab80d7b84b9a271bee7f1b21a04a3f0c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Jun 2009 11:18:36 +0200 Subject: perf_counter: More paranoia settings Rename the perf_counter_priv knob to perf_counter_paranoia (because priv can be read as private, as opposed to privileged) and provide one more level: 0 - permissive 1 - restrict cpu counters to privilidged contexts 2 - restrict kernel-mode code counting and profiling Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 +- kernel/perf_counter.c | 25 +++++++++++++++++++++++-- kernel/sysctl.c | 6 +++--- 3 files changed, 27 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 5b966472b45..386be915baa 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -648,7 +648,7 @@ struct perf_callchain_entry { extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); -extern int sysctl_perf_counter_priv; +extern int sysctl_perf_counter_paranoid; extern int sysctl_perf_counter_mlock; extern int sysctl_perf_counter_limit; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 8b89b40bd0f..63f1987c1c1 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -43,7 +43,23 @@ static atomic_t nr_counters __read_mostly; static atomic_t nr_mmap_counters __read_mostly; static atomic_t nr_comm_counters __read_mostly; -int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */ +/* + * 0 - not paranoid + * 1 - disallow cpu counters to unpriv + * 2 - disallow kernel profiling to unpriv + */ +int sysctl_perf_counter_paranoid __read_mostly; /* do we need to be privileged */ + +static inline bool perf_paranoid_cpu(void) +{ + return sysctl_perf_counter_paranoid > 0; +} + +static inline bool perf_paranoid_kernel(void) +{ + return sysctl_perf_counter_paranoid > 1; +} + int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */ int sysctl_perf_counter_limit __read_mostly = 100000; /* max NMIs per second */ @@ -1385,7 +1401,7 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu) */ if (cpu != -1) { /* Must be root to operate on a CPU counter: */ - if (sysctl_perf_counter_priv && !capable(CAP_SYS_ADMIN)) + if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) return ERR_PTR(-EACCES); if (cpu < 0 || cpu > num_possible_cpus()) @@ -3618,6 +3634,11 @@ SYSCALL_DEFINE5(perf_counter_open, if (copy_from_user(&attr, attr_uptr, sizeof(attr)) != 0) return -EFAULT; + if (!attr.exclude_kernel) { + if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) + return -EACCES; + } + /* * Get the target context (task or percpu): */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 0c4bf863afa..344a65981de 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -916,9 +916,9 @@ static struct ctl_table kern_table[] = { #ifdef CONFIG_PERF_COUNTERS { .ctl_name = CTL_UNNUMBERED, - .procname = "perf_counter_privileged", - .data = &sysctl_perf_counter_priv, - .maxlen = sizeof(sysctl_perf_counter_priv), + .procname = "perf_counter_paranoid", + .data = &sysctl_perf_counter_paranoid, + .maxlen = sizeof(sysctl_perf_counter_paranoid), .mode = 0644, .proc_handler = &proc_dointvec, }, -- cgit v1.2.3-70-g09d2 From df58ab24bf26b166874bfb18b3b5a2e0a8e63179 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Jun 2009 11:25:05 +0200 Subject: perf_counter: Rename perf_counter_limit sysctl Rename perf_counter_limit to perf_counter_max_sample_rate and prohibit creation of counters with a known higher sample frequency. Signed-off-by: Peter Zijlstra Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 +- kernel/perf_counter.c | 27 +++++++++++++++++++-------- kernel/sysctl.c | 6 +++--- 3 files changed, 23 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 386be915baa..95c797c480e 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -650,7 +650,7 @@ extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); extern int sysctl_perf_counter_paranoid; extern int sysctl_perf_counter_mlock; -extern int sysctl_perf_counter_limit; +extern int sysctl_perf_counter_sample_rate; extern void perf_counter_init(void); diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 63f1987c1c1..3b2829de559 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -44,11 +44,12 @@ static atomic_t nr_mmap_counters __read_mostly; static atomic_t nr_comm_counters __read_mostly; /* - * 0 - not paranoid - * 1 - disallow cpu counters to unpriv - * 2 - disallow kernel profiling to unpriv + * perf counter paranoia level: + * 0 - not paranoid + * 1 - disallow cpu counters to unpriv + * 2 - disallow kernel profiling to unpriv */ -int sysctl_perf_counter_paranoid __read_mostly; /* do we need to be privileged */ +int sysctl_perf_counter_paranoid __read_mostly; static inline bool perf_paranoid_cpu(void) { @@ -61,7 +62,11 @@ static inline bool perf_paranoid_kernel(void) } int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */ -int sysctl_perf_counter_limit __read_mostly = 100000; /* max NMIs per second */ + +/* + * max perf counter sample rate + */ +int sysctl_perf_counter_sample_rate __read_mostly = 100000; static atomic64_t perf_counter_id; @@ -1244,7 +1249,7 @@ static void perf_ctx_adjust_freq(struct perf_counter_context *ctx) if (interrupts == MAX_INTERRUPTS) { perf_log_throttle(counter, 1); counter->pmu->unthrottle(counter); - interrupts = 2*sysctl_perf_counter_limit/HZ; + interrupts = 2*sysctl_perf_counter_sample_rate/HZ; } if (!counter->attr.freq || !counter->attr.sample_freq) @@ -1682,7 +1687,7 @@ static int perf_counter_period(struct perf_counter *counter, u64 __user *arg) spin_lock_irq(&ctx->lock); if (counter->attr.freq) { - if (value > sysctl_perf_counter_limit) { + if (value > sysctl_perf_counter_sample_rate) { ret = -EINVAL; goto unlock; } @@ -2979,7 +2984,8 @@ int perf_counter_overflow(struct perf_counter *counter, int nmi, } else { if (hwc->interrupts != MAX_INTERRUPTS) { hwc->interrupts++; - if (HZ * hwc->interrupts > (u64)sysctl_perf_counter_limit) { + if (HZ * hwc->interrupts > + (u64)sysctl_perf_counter_sample_rate) { hwc->interrupts = MAX_INTERRUPTS; perf_log_throttle(counter, 0); ret = 1; @@ -3639,6 +3645,11 @@ SYSCALL_DEFINE5(perf_counter_open, return -EACCES; } + if (attr.freq) { + if (attr.sample_freq > sysctl_perf_counter_sample_rate) + return -EINVAL; + } + /* * Get the target context (task or percpu): */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 344a65981de..9fd4e436b69 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -932,9 +932,9 @@ static struct ctl_table kern_table[] = { }, { .ctl_name = CTL_UNNUMBERED, - .procname = "perf_counter_int_limit", - .data = &sysctl_perf_counter_limit, - .maxlen = sizeof(sysctl_perf_counter_limit), + .procname = "perf_counter_max_sample_rate", + .data = &sysctl_perf_counter_sample_rate, + .maxlen = sizeof(sysctl_perf_counter_sample_rate), .mode = 0644, .proc_handler = &proc_dointvec, }, -- cgit v1.2.3-70-g09d2 From fcc8ac1825d3d0fb81f73bc1a80ebc863168bb56 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 11 Jun 2009 12:24:17 +0100 Subject: tty: Add carrier processing on close to the tty_port core Some drivers implement this internally, others miss it out. Push the behaviour into the core code as that way everyone will do it consistently. Update the dtr rts method to raise or lower depending upon flags. Having a single method in this style fits most of the implementations more cleanly than two funtions. We need this in place before we tackle the USB side Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- drivers/char/epca.c | 4 ++-- drivers/char/isicom.c | 19 +++++++++++++------ drivers/char/istallion.c | 8 ++++---- drivers/char/mxser.c | 12 ++++++++---- drivers/char/pcmcia/synclink_cs.c | 11 +++++++---- drivers/char/rocket.c | 13 +++++++++---- drivers/char/stallion.c | 6 +++--- drivers/char/synclink.c | 9 ++++++--- drivers/char/synclink_gt.c | 9 ++++++--- drivers/char/synclinkmp.c | 9 ++++++--- drivers/char/tty_port.c | 27 +++++++++++++++++++++++---- include/linux/tty.h | 3 ++- 12 files changed, 89 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/drivers/char/epca.c b/drivers/char/epca.c index af7c13ca949..8797b774235 100644 --- a/drivers/char/epca.c +++ b/drivers/char/epca.c @@ -745,7 +745,7 @@ static int epca_carrier_raised(struct tty_port *port) return 0; } -static void epca_raise_dtr_rts(struct tty_port *port) +static void epca_dtr_rts(struct tty_port *port, int onoff) { } @@ -925,7 +925,7 @@ static const struct tty_operations pc_ops = { static const struct tty_port_operations epca_port_ops = { .carrier_raised = epca_carrier_raised, - .raise_dtr_rts = epca_raise_dtr_rts, + .dtr_rts = epca_dtr_rts, }; static int info_open(struct tty_struct *tty, struct file *filp) diff --git a/drivers/char/isicom.c b/drivers/char/isicom.c index a59eac584d1..4d745a89504 100644 --- a/drivers/char/isicom.c +++ b/drivers/char/isicom.c @@ -329,7 +329,7 @@ static inline void drop_rts(struct isi_port *port) /* card->lock MUST NOT be held */ -static void isicom_raise_dtr_rts(struct tty_port *port) +static void isicom_dtr_rts(struct tty_port *port, int on) { struct isi_port *ip = container_of(port, struct isi_port, port); struct isi_board *card = ip->card; @@ -339,10 +339,17 @@ static void isicom_raise_dtr_rts(struct tty_port *port) if (!lock_card(card)) return; - outw(0x8000 | (channel << card->shift_count) | 0x02, base); - outw(0x0f04, base); - InterruptTheCard(base); - ip->status |= (ISI_DTR | ISI_RTS); + if (on) { + outw(0x8000 | (channel << card->shift_count) | 0x02, base); + outw(0x0f04, base); + InterruptTheCard(base); + ip->status |= (ISI_DTR | ISI_RTS); + } else { + outw(0x8000 | (channel << card->shift_count) | 0x02, base); + outw(0x0C04, base); + InterruptTheCard(base); + ip->status &= ~(ISI_DTR | ISI_RTS); + } unlock_card(card); } @@ -1339,7 +1346,7 @@ static const struct tty_operations isicom_ops = { static const struct tty_port_operations isicom_port_ops = { .carrier_raised = isicom_carrier_raised, - .raise_dtr_rts = isicom_raise_dtr_rts, + .dtr_rts = isicom_dtr_rts, }; static int __devinit reset_card(struct pci_dev *pdev, diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c index fff19f7e29d..e18800c400b 100644 --- a/drivers/char/istallion.c +++ b/drivers/char/istallion.c @@ -1140,14 +1140,14 @@ static int stli_carrier_raised(struct tty_port *port) return (portp->sigs & TIOCM_CD) ? 1 : 0; } -static void stli_raise_dtr_rts(struct tty_port *port) +static void stli_dtr_rts(struct tty_port *port, int on) { struct stliport *portp = container_of(port, struct stliport, port); struct stlibrd *brdp = stli_brds[portp->brdnr]; - stli_mkasysigs(&portp->asig, 1, 1); + stli_mkasysigs(&portp->asig, on, on); if (stli_cmdwait(brdp, portp, A_SETSIGNALS, &portp->asig, sizeof(asysigs_t), 0) < 0) - printk(KERN_WARNING "istallion: dtr raise failed.\n"); + printk(KERN_WARNING "istallion: dtr set failed.\n"); } @@ -4417,7 +4417,7 @@ static const struct tty_operations stli_ops = { static const struct tty_port_operations stli_port_ops = { .carrier_raised = stli_carrier_raised, - .raise_dtr_rts = stli_raise_dtr_rts, + .dtr_rts = stli_dtr_rts, }; /*****************************************************************************/ diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c index 13f8871e5b2..9533f43a30b 100644 --- a/drivers/char/mxser.c +++ b/drivers/char/mxser.c @@ -547,14 +547,18 @@ static int mxser_carrier_raised(struct tty_port *port) return (inb(mp->ioaddr + UART_MSR) & UART_MSR_DCD)?1:0; } -static void mxser_raise_dtr_rts(struct tty_port *port) +static void mxser_dtr_rts(struct tty_port *port, int on) { struct mxser_port *mp = container_of(port, struct mxser_port, port); unsigned long flags; spin_lock_irqsave(&mp->slock, flags); - outb(inb(mp->ioaddr + UART_MCR) | - UART_MCR_DTR | UART_MCR_RTS, mp->ioaddr + UART_MCR); + if (on) + outb(inb(mp->ioaddr + UART_MCR) | + UART_MCR_DTR | UART_MCR_RTS, mp->ioaddr + UART_MCR); + else + outb(inb(mp->ioaddr + UART_MCR)&~(UART_MCR_DTR | UART_MCR_RTS), + mp->ioaddr + UART_MCR); spin_unlock_irqrestore(&mp->slock, flags); } @@ -2356,7 +2360,7 @@ static const struct tty_operations mxser_ops = { struct tty_port_operations mxser_port_ops = { .carrier_raised = mxser_carrier_raised, - .raise_dtr_rts = mxser_raise_dtr_rts, + .dtr_rts = mxser_dtr_rts, }; /* diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c index 19d79fc5446..77b36488922 100644 --- a/drivers/char/pcmcia/synclink_cs.c +++ b/drivers/char/pcmcia/synclink_cs.c @@ -383,7 +383,7 @@ static void async_mode(MGSLPC_INFO *info); static void tx_timeout(unsigned long context); static int carrier_raised(struct tty_port *port); -static void raise_dtr_rts(struct tty_port *port); +static void dtr_rts(struct tty_port *port, int onoff); #if SYNCLINK_GENERIC_HDLC #define dev_to_port(D) (dev_to_hdlc(D)->priv) @@ -513,7 +513,7 @@ static void ldisc_receive_buf(struct tty_struct *tty, static const struct tty_port_operations mgslpc_port_ops = { .carrier_raised = carrier_raised, - .raise_dtr_rts = raise_dtr_rts + .dtr_rts = dtr_rts }; static int mgslpc_probe(struct pcmcia_device *link) @@ -2528,13 +2528,16 @@ static int carrier_raised(struct tty_port *port) return 0; } -static void raise_dtr_rts(struct tty_port *port) +static void dtr_rts(struct tty_port *port, int onoff) { MGSLPC_INFO *info = container_of(port, MGSLPC_INFO, port); unsigned long flags; spin_lock_irqsave(&info->lock,flags); - info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR; + if (onoff) + info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR; + else + info->serial_signals &= ~SerialSignal_RTS + SerialSignal_DTR; set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } diff --git a/drivers/char/rocket.c b/drivers/char/rocket.c index f59fc5cea06..7399188049d 100644 --- a/drivers/char/rocket.c +++ b/drivers/char/rocket.c @@ -872,11 +872,16 @@ static int carrier_raised(struct tty_port *port) return (sGetChanStatusLo(&info->channel) & CD_ACT) ? 1 : 0; } -static void raise_dtr_rts(struct tty_port *port) +static void dtr_rts(struct tty_port *port, int on) { struct r_port *info = container_of(port, struct r_port, port); - sSetDTR(&info->channel); - sSetRTS(&info->channel); + if (on) { + sSetDTR(&info->channel); + sSetRTS(&info->channel); + } else { + sClrDTR(&info->channel); + sClrRTS(&info->channel); + } } /* @@ -2250,7 +2255,7 @@ static const struct tty_operations rocket_ops = { static const struct tty_port_operations rocket_port_ops = { .carrier_raised = carrier_raised, - .raise_dtr_rts = raise_dtr_rts, + .dtr_rts = dtr_rts, }; /* diff --git a/drivers/char/stallion.c b/drivers/char/stallion.c index 2ad813a801d..53e504f41b2 100644 --- a/drivers/char/stallion.c +++ b/drivers/char/stallion.c @@ -772,11 +772,11 @@ static int stl_carrier_raised(struct tty_port *port) return (portp->sigs & TIOCM_CD) ? 1 : 0; } -static void stl_raise_dtr_rts(struct tty_port *port) +static void stl_dtr_rts(struct tty_port *port, int on) { struct stlport *portp = container_of(port, struct stlport, port); /* Takes brd_lock internally */ - stl_setsignals(portp, 1, 1); + stl_setsignals(portp, on, on); } /*****************************************************************************/ @@ -2547,7 +2547,7 @@ static const struct tty_operations stl_ops = { static const struct tty_port_operations stl_port_ops = { .carrier_raised = stl_carrier_raised, - .raise_dtr_rts = stl_raise_dtr_rts, + .dtr_rts = stl_dtr_rts, }; /*****************************************************************************/ diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c index afd0b26ca05..afded3a2379 100644 --- a/drivers/char/synclink.c +++ b/drivers/char/synclink.c @@ -3247,13 +3247,16 @@ static int carrier_raised(struct tty_port *port) return (info->serial_signals & SerialSignal_DCD) ? 1 : 0; } -static void raise_dtr_rts(struct tty_port *port) +static void dtr_rts(struct tty_port *port, int on) { struct mgsl_struct *info = container_of(port, struct mgsl_struct, port); unsigned long flags; spin_lock_irqsave(&info->irq_spinlock,flags); - info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR; + if (on) + info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR; + else + info->serial_signals &= ~(SerialSignal_RTS + SerialSignal_DTR); usc_set_serial_signals(info); spin_unlock_irqrestore(&info->irq_spinlock,flags); } @@ -4258,7 +4261,7 @@ static void mgsl_add_device( struct mgsl_struct *info ) static const struct tty_port_operations mgsl_port_ops = { .carrier_raised = carrier_raised, - .raise_dtr_rts = raise_dtr_rts, + .dtr_rts = dtr_rts, }; diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c index 5e256494686..67986ea0d47 100644 --- a/drivers/char/synclink_gt.c +++ b/drivers/char/synclink_gt.c @@ -3099,13 +3099,16 @@ static int carrier_raised(struct tty_port *port) return (info->signals & SerialSignal_DCD) ? 1 : 0; } -static void raise_dtr_rts(struct tty_port *port) +static void dtr_rts(struct tty_port *port, int on) { unsigned long flags; struct slgt_info *info = container_of(port, struct slgt_info, port); spin_lock_irqsave(&info->lock,flags); - info->signals |= SerialSignal_RTS + SerialSignal_DTR; + if (on) + info->signals |= SerialSignal_RTS + SerialSignal_DTR; + else + info->signals &= ~(SerialSignal_RTS + SerialSignal_DTR); set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } @@ -3419,7 +3422,7 @@ static void add_device(struct slgt_info *info) static const struct tty_port_operations slgt_port_ops = { .carrier_raised = carrier_raised, - .raise_dtr_rts = raise_dtr_rts, + .dtr_rts = dtr_rts, }; /* diff --git a/drivers/char/synclinkmp.c b/drivers/char/synclinkmp.c index 26de60efe4b..6f727e3c53a 100644 --- a/drivers/char/synclinkmp.c +++ b/drivers/char/synclinkmp.c @@ -3277,13 +3277,16 @@ static int carrier_raised(struct tty_port *port) return (info->serial_signals & SerialSignal_DCD) ? 1 : 0; } -static void raise_dtr_rts(struct tty_port *port) +static void dtr_rts(struct tty_port *port, int on) { SLMP_INFO *info = container_of(port, SLMP_INFO, port); unsigned long flags; spin_lock_irqsave(&info->lock,flags); - info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR; + if (on) + info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR; + else + info->serial_signals &= ~(SerialSignal_RTS + SerialSignal_DTR); set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } @@ -3746,7 +3749,7 @@ static void add_device(SLMP_INFO *info) static const struct tty_port_operations port_ops = { .carrier_raised = carrier_raised, - .raise_dtr_rts = raise_dtr_rts, + .dtr_rts = dtr_rts, }; /* Allocate and initialize a device instance structure diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c index 9b8004c7268..926d4a5593f 100644 --- a/drivers/char/tty_port.c +++ b/drivers/char/tty_port.c @@ -137,7 +137,7 @@ int tty_port_carrier_raised(struct tty_port *port) EXPORT_SYMBOL(tty_port_carrier_raised); /** - * tty_port_raise_dtr_rts - Riase DTR/RTS + * tty_port_raise_dtr_rts - Raise DTR/RTS * @port: tty port * * Wrapper for the DTR/RTS raise logic. For the moment this is used @@ -147,11 +147,27 @@ EXPORT_SYMBOL(tty_port_carrier_raised); void tty_port_raise_dtr_rts(struct tty_port *port) { - if (port->ops->raise_dtr_rts) - port->ops->raise_dtr_rts(port); + if (port->ops->dtr_rts) + port->ops->dtr_rts(port, 1); } EXPORT_SYMBOL(tty_port_raise_dtr_rts); +/** + * tty_port_lower_dtr_rts - Lower DTR/RTS + * @port: tty port + * + * Wrapper for the DTR/RTS raise logic. For the moment this is used + * to hide some internal details. This will eventually become entirely + * internal to the tty port. + */ + +void tty_port_lower_dtr_rts(struct tty_port *port) +{ + if (port->ops->dtr_rts) + port->ops->dtr_rts(port, 0); +} +EXPORT_SYMBOL(tty_port_lower_dtr_rts); + /** * tty_port_block_til_ready - Waiting logic for tty open * @port: the tty port being opened @@ -167,7 +183,7 @@ EXPORT_SYMBOL(tty_port_raise_dtr_rts); * - port flags and counts * * The passed tty_port must implement the carrier_raised method if it can - * do carrier detect and the raise_dtr_rts method if it supports software + * do carrier detect and the dtr_rts method if it supports software * management of these lines. Note that the dtr/rts raise is done each * iteration as a hangup may have previously dropped them while we wait. */ @@ -302,6 +318,9 @@ void tty_port_close_end(struct tty_port *port, struct tty_struct *tty) tty_ldisc_flush(tty); + if (tty->termios->c_cflag & HUPCL) + tty_port_lower_dtr_rts(port); + spin_lock_irqsave(&port->lock, flags); tty->closing = 0; diff --git a/include/linux/tty.h b/include/linux/tty.h index fc39db95499..98694364c96 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -185,7 +185,7 @@ struct tty_port; struct tty_port_operations { /* Return 1 if the carrier is raised */ int (*carrier_raised)(struct tty_port *port); - void (*raise_dtr_rts)(struct tty_port *port); + void (*dtr_rts)(struct tty_port *port, int raise); }; struct tty_port { @@ -438,6 +438,7 @@ extern struct tty_struct *tty_port_tty_get(struct tty_port *port); extern void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty); extern int tty_port_carrier_raised(struct tty_port *port); extern void tty_port_raise_dtr_rts(struct tty_port *port); +extern void tty_port_lower_dtr_rts(struct tty_port *port); extern void tty_port_hangup(struct tty_port *port); extern int tty_port_block_til_ready(struct tty_port *port, struct tty_struct *tty, struct file *filp); -- cgit v1.2.3-70-g09d2 From 1ec739be75a6cb961a46ba0b1982d0edb7f27558 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 11 Jun 2009 12:25:25 +0100 Subject: tty: Implement a drain delay in the tty port We need this for devices that cannot flush and wait, but which do not order data and modem events. Without it we will hang up before all the data clears the hardware. Needed for the USB changes. Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- drivers/char/tty_port.c | 11 +++++++++++ include/linux/tty.h | 3 +++ 2 files changed, 14 insertions(+) (limited to 'include') diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c index 926d4a5593f..4d08b6d27c2 100644 --- a/drivers/char/tty_port.c +++ b/drivers/char/tty_port.c @@ -308,6 +308,17 @@ int tty_port_close_start(struct tty_port *port, struct tty_struct *tty, struct f if (port->flags & ASYNC_INITIALIZED && port->closing_wait != ASYNC_CLOSING_WAIT_NONE) tty_wait_until_sent(tty, port->closing_wait); + if (port->drain_delay) { + unsigned int bps = tty_get_baud_rate(tty); + long timeout; + + if (bps > 1200) + timeout = max_t(long, (HZ * 10 * port->drain_delay) / bps, + HZ / 10); + else + timeout = 2 * HZ; + schedule_timeout_interruptible(timeout); + } return 1; } EXPORT_SYMBOL(tty_port_close_start); diff --git a/include/linux/tty.h b/include/linux/tty.h index 98694364c96..bed5a3d4030 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -201,6 +201,9 @@ struct tty_port { unsigned char *xmit_buf; /* Optional buffer */ int close_delay; /* Close port delay */ int closing_wait; /* Delay for output */ + int drain_delay; /* Set to zero if no pure time + based drain is needed else + set to size of fifo */ }; /* -- cgit v1.2.3-70-g09d2 From 335f8514f200e63d689113d29cb7253a5c282967 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 11 Jun 2009 12:26:29 +0100 Subject: tty: Bring the usb tty port structure into more use This allows us to clean stuff up, but is probably also going to cause some app breakage with buggy apps as we now implement proper POSIX behaviour for USB ports matching all the other ports. This does also mean other apps that break on USB will now work properly. Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- drivers/usb/serial/belkin_sa.c | 6 +- drivers/usb/serial/ch341.c | 46 ++++++----- drivers/usb/serial/console.c | 6 +- drivers/usb/serial/cp210x.c | 6 +- drivers/usb/serial/cyberjack.c | 6 +- drivers/usb/serial/cypress_m8.c | 81 ++++++------------- drivers/usb/serial/digi_acceleport.c | 75 +++++------------- drivers/usb/serial/empeg.c | 6 +- drivers/usb/serial/ftdi_sio.c | 50 ++++++------ drivers/usb/serial/garmin_gps.c | 3 +- drivers/usb/serial/generic.c | 3 +- drivers/usb/serial/io_edgeport.c | 10 +-- drivers/usb/serial/io_ti.c | 3 +- drivers/usb/serial/ipaq.c | 6 +- drivers/usb/serial/ipw.c | 18 ++--- drivers/usb/serial/ir-usb.c | 6 +- drivers/usb/serial/iuu_phoenix.c | 25 +----- drivers/usb/serial/keyspan.c | 13 ++- drivers/usb/serial/keyspan.h | 8 +- drivers/usb/serial/keyspan_pda.c | 48 ++++++++---- drivers/usb/serial/kl5kusb105.c | 6 +- drivers/usb/serial/kobil_sct.c | 9 +-- drivers/usb/serial/mct_u232.c | 37 ++++----- drivers/usb/serial/mos7720.c | 3 +- drivers/usb/serial/mos7840.c | 48 +----------- drivers/usb/serial/navman.c | 3 +- drivers/usb/serial/omninet.c | 6 +- drivers/usb/serial/opticon.c | 3 +- drivers/usb/serial/option.c | 68 ++++++++-------- drivers/usb/serial/oti6858.c | 57 ++------------ drivers/usb/serial/pl2303.c | 79 ++++++++----------- drivers/usb/serial/sierra.c | 97 ++++++++++------------- drivers/usb/serial/spcp8x5.c | 85 ++++++++------------ drivers/usb/serial/symbolserial.c | 3 +- drivers/usb/serial/ti_usb_3410_5052.c | 6 +- drivers/usb/serial/usb-serial.c | 144 +++++++++++++++++++++++----------- drivers/usb/serial/visor.c | 6 +- drivers/usb/serial/whiteheat.c | 33 +------- include/linux/usb/serial.h | 10 ++- 39 files changed, 467 insertions(+), 661 deletions(-) (limited to 'include') diff --git a/drivers/usb/serial/belkin_sa.c b/drivers/usb/serial/belkin_sa.c index b7eacad4d48..2bfd6dd85b5 100644 --- a/drivers/usb/serial/belkin_sa.c +++ b/drivers/usb/serial/belkin_sa.c @@ -93,8 +93,7 @@ static int belkin_sa_startup(struct usb_serial *serial); static void belkin_sa_shutdown(struct usb_serial *serial); static int belkin_sa_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void belkin_sa_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); +static void belkin_sa_close(struct usb_serial_port *port); static void belkin_sa_read_int_callback(struct urb *urb); static void belkin_sa_set_termios(struct tty_struct *tty, struct usb_serial_port *port, struct ktermios * old); @@ -244,8 +243,7 @@ exit: } /* belkin_sa_open */ -static void belkin_sa_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void belkin_sa_close(struct usb_serial_port *port) { dbg("%s port %d", __func__, port->number); diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index ab4cc277aa6..2830766f5b3 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -262,13 +262,33 @@ error: kfree(priv); return r; } -static void ch341_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp) +static int ch341_carrier_raised(struct usb_serial_port *port) +{ + struct ch341_private *priv = usb_get_serial_port_data(port); + if (priv->line_status & CH341_BIT_DCD) + return 1; + return 0; +} + +static void ch341_dtr_rts(struct usb_serial_port *port, int on) { struct ch341_private *priv = usb_get_serial_port_data(port); unsigned long flags; - unsigned int c_cflag; + dbg("%s - port %d", __func__, port->number); + /* drop DTR and RTS */ + spin_lock_irqsave(&priv->lock, flags); + if (on) + priv->line_control |= CH341_BIT_RTS | CH341_BIT_DTR; + else + priv->line_control &= ~(CH341_BIT_RTS | CH341_BIT_DTR); + spin_unlock_irqrestore(&priv->lock, flags); + ch341_set_handshake(port->serial->dev, priv->line_control); + wake_up_interruptible(&priv->delta_msr_wait); +} + +static void ch341_close(struct usb_serial_port *port) +{ dbg("%s - port %d", __func__, port->number); /* shutdown our urbs */ @@ -276,18 +296,6 @@ static void ch341_close(struct tty_struct *tty, struct usb_serial_port *port, usb_kill_urb(port->write_urb); usb_kill_urb(port->read_urb); usb_kill_urb(port->interrupt_in_urb); - - if (tty) { - c_cflag = tty->termios->c_cflag; - if (c_cflag & HUPCL) { - /* drop DTR and RTS */ - spin_lock_irqsave(&priv->lock, flags); - priv->line_control = 0; - spin_unlock_irqrestore(&priv->lock, flags); - ch341_set_handshake(port->serial->dev, 0); - } - } - wake_up_interruptible(&priv->delta_msr_wait); } @@ -302,7 +310,6 @@ static int ch341_open(struct tty_struct *tty, struct usb_serial_port *port, dbg("ch341_open()"); priv->baud_rate = DEFAULT_BAUD_RATE; - priv->line_control = CH341_BIT_RTS | CH341_BIT_DTR; r = ch341_configure(serial->dev, priv); if (r) @@ -322,7 +329,7 @@ static int ch341_open(struct tty_struct *tty, struct usb_serial_port *port, if (r) { dev_err(&port->dev, "%s - failed submitting interrupt urb," " error %d\n", __func__, r); - ch341_close(tty, port, NULL); + ch341_close(port); return -EPROTO; } @@ -343,9 +350,6 @@ static void ch341_set_termios(struct tty_struct *tty, dbg("ch341_set_termios()"); - if (!tty || !tty->termios) - return; - baud_rate = tty_get_baud_rate(tty); priv->baud_rate = baud_rate; @@ -568,6 +572,8 @@ static struct usb_serial_driver ch341_device = { .usb_driver = &ch341_driver, .num_ports = 1, .open = ch341_open, + .dtr_rts = ch341_dtr_rts, + .carrier_raised = ch341_carrier_raised, .close = ch341_close, .ioctl = ch341_ioctl, .set_termios = ch341_set_termios, diff --git a/drivers/usb/serial/console.c b/drivers/usb/serial/console.c index 19e24045b13..247b61bfb7f 100644 --- a/drivers/usb/serial/console.c +++ b/drivers/usb/serial/console.c @@ -169,7 +169,9 @@ static int usb_console_setup(struct console *co, char *options) kfree(tty); } } - + /* So we know not to kill the hardware on a hangup on this + port. We have also bumped the use count by one so it won't go + idle */ port->console = 1; retval = 0; @@ -182,7 +184,7 @@ free_tty: kfree(tty); reset_open_count: port->port.count = 0; -goto out; + goto out; } static void usb_console_write(struct console *co, diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index e8d5133ce9c..d9f586dc6ec 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -36,8 +36,7 @@ static int cp2101_open(struct tty_struct *, struct usb_serial_port *, struct file *); static void cp2101_cleanup(struct usb_serial_port *); -static void cp2101_close(struct tty_struct *, struct usb_serial_port *, - struct file*); +static void cp2101_close(struct usb_serial_port *); static void cp2101_get_termios(struct tty_struct *, struct usb_serial_port *port); static void cp2101_get_termios_port(struct usb_serial_port *port, @@ -398,8 +397,7 @@ static void cp2101_cleanup(struct usb_serial_port *port) } } -static void cp2101_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp) +static void cp2101_close(struct usb_serial_port *port) { dbg("%s - port %d", __func__, port->number); diff --git a/drivers/usb/serial/cyberjack.c b/drivers/usb/serial/cyberjack.c index dd501bb63ed..933ba913e66 100644 --- a/drivers/usb/serial/cyberjack.c +++ b/drivers/usb/serial/cyberjack.c @@ -61,8 +61,7 @@ static int cyberjack_startup(struct usb_serial *serial); static void cyberjack_shutdown(struct usb_serial *serial); static int cyberjack_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void cyberjack_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); +static void cyberjack_close(struct usb_serial_port *port); static int cyberjack_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); static int cyberjack_write_room(struct tty_struct *tty); @@ -185,8 +184,7 @@ static int cyberjack_open(struct tty_struct *tty, return result; } -static void cyberjack_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void cyberjack_close(struct usb_serial_port *port) { dbg("%s - port %d", __func__, port->number); diff --git a/drivers/usb/serial/cypress_m8.c b/drivers/usb/serial/cypress_m8.c index e568710b263..669f9384853 100644 --- a/drivers/usb/serial/cypress_m8.c +++ b/drivers/usb/serial/cypress_m8.c @@ -174,8 +174,8 @@ static int cypress_ca42v2_startup(struct usb_serial *serial); static void cypress_shutdown(struct usb_serial *serial); static int cypress_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void cypress_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); +static void cypress_close(struct usb_serial_port *port); +static void cypress_dtr_rts(struct usb_serial_port *port, int on); static int cypress_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); static void cypress_send(struct usb_serial_port *port); @@ -218,6 +218,7 @@ static struct usb_serial_driver cypress_earthmate_device = { .shutdown = cypress_shutdown, .open = cypress_open, .close = cypress_close, + .dtr_rts = cypress_dtr_rts, .write = cypress_write, .write_room = cypress_write_room, .ioctl = cypress_ioctl, @@ -244,6 +245,7 @@ static struct usb_serial_driver cypress_hidcom_device = { .shutdown = cypress_shutdown, .open = cypress_open, .close = cypress_close, + .dtr_rts = cypress_dtr_rts, .write = cypress_write, .write_room = cypress_write_room, .ioctl = cypress_ioctl, @@ -270,6 +272,7 @@ static struct usb_serial_driver cypress_ca42v2_device = { .shutdown = cypress_shutdown, .open = cypress_open, .close = cypress_close, + .dtr_rts = cypress_dtr_rts, .write = cypress_write, .write_room = cypress_write_room, .ioctl = cypress_ioctl, @@ -656,11 +659,7 @@ static int cypress_open(struct tty_struct *tty, priv->rx_flags = 0; spin_unlock_irqrestore(&priv->lock, flags); - /* raise both lines and set termios */ - spin_lock_irqsave(&priv->lock, flags); - priv->line_control = CONTROL_DTR | CONTROL_RTS; - priv->cmd_ctrl = 1; - spin_unlock_irqrestore(&priv->lock, flags); + /* Set termios */ result = cypress_write(tty, port, NULL, 0); if (result) { @@ -694,76 +693,42 @@ static int cypress_open(struct tty_struct *tty, __func__, result); cypress_set_dead(port); } - + port->port.drain_delay = 256; return result; } /* cypress_open */ +static void cypress_dtr_rts(struct usb_serial_port *port, int on) +{ + struct cypress_private *priv = usb_get_serial_port_data(port); + /* drop dtr and rts */ + priv = usb_get_serial_port_data(port); + spin_lock_irq(&priv->lock); + if (on == 0) + priv->line_control = 0; + else + priv->line_control = CONTROL_DTR | CONTROL_RTS; + priv->cmd_ctrl = 1; + spin_unlock_irq(&priv->lock); + cypress_write(NULL, port, NULL, 0); +} -static void cypress_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void cypress_close(struct usb_serial_port *port) { struct cypress_private *priv = usb_get_serial_port_data(port); - unsigned int c_cflag; - int bps; - long timeout; - wait_queue_t wait; dbg("%s - port %d", __func__, port->number); - /* wait for data to drain from buffer */ - spin_lock_irq(&priv->lock); - timeout = CYPRESS_CLOSING_WAIT; - init_waitqueue_entry(&wait, current); - add_wait_queue(&tty->write_wait, &wait); - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - if (cypress_buf_data_avail(priv->buf) == 0 - || timeout == 0 || signal_pending(current) - /* without mutex, allowed due to harmless failure mode */ - || port->serial->disconnected) - break; - spin_unlock_irq(&priv->lock); - timeout = schedule_timeout(timeout); - spin_lock_irq(&priv->lock); - } - set_current_state(TASK_RUNNING); - remove_wait_queue(&tty->write_wait, &wait); - /* clear out any remaining data in the buffer */ - cypress_buf_clear(priv->buf); - spin_unlock_irq(&priv->lock); - /* writing is potentially harmful, lock must be taken */ mutex_lock(&port->serial->disc_mutex); if (port->serial->disconnected) { mutex_unlock(&port->serial->disc_mutex); return; } - /* wait for characters to drain from device */ - if (tty) { - bps = tty_get_baud_rate(tty); - if (bps > 1200) - timeout = max((HZ * 2560) / bps, HZ / 10); - else - timeout = 2 * HZ; - schedule_timeout_interruptible(timeout); - } - + cypress_buf_clear(priv->buf); dbg("%s - stopping urbs", __func__); usb_kill_urb(port->interrupt_in_urb); usb_kill_urb(port->interrupt_out_urb); - if (tty) { - c_cflag = tty->termios->c_cflag; - if (c_cflag & HUPCL) { - /* drop dtr and rts */ - priv = usb_get_serial_port_data(port); - spin_lock_irq(&priv->lock); - priv->line_control = 0; - priv->cmd_ctrl = 1; - spin_unlock_irq(&priv->lock); - cypress_write(tty, port, NULL, 0); - } - } if (stats) dev_info(&port->dev, "Statistics: %d Bytes In | %d Bytes Out | %d Commands Issued\n", diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c index 38ba4ea8b6b..30f5140eff0 100644 --- a/drivers/usb/serial/digi_acceleport.c +++ b/drivers/usb/serial/digi_acceleport.c @@ -422,7 +422,6 @@ struct digi_port { int dp_throttled; int dp_throttle_restart; wait_queue_head_t dp_flush_wait; - int dp_in_close; /* close in progress */ wait_queue_head_t dp_close_wait; /* wait queue for close */ struct work_struct dp_wakeup_work; struct usb_serial_port *dp_port; @@ -456,8 +455,9 @@ static int digi_write_room(struct tty_struct *tty); static int digi_chars_in_buffer(struct tty_struct *tty); static int digi_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void digi_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp); +static void digi_close(struct usb_serial_port *port); +static int digi_carrier_raised(struct usb_serial_port *port); +static void digi_dtr_rts(struct usb_serial_port *port, int on); static int digi_startup_device(struct usb_serial *serial); static int digi_startup(struct usb_serial *serial); static void digi_shutdown(struct usb_serial *serial); @@ -510,6 +510,8 @@ static struct usb_serial_driver digi_acceleport_2_device = { .num_ports = 3, .open = digi_open, .close = digi_close, + .dtr_rts = digi_dtr_rts, + .carrier_raised = digi_carrier_raised, .write = digi_write, .write_room = digi_write_room, .write_bulk_callback = digi_write_bulk_callback, @@ -1328,6 +1330,19 @@ static int digi_chars_in_buffer(struct tty_struct *tty) } +static void digi_dtr_rts(struct usb_serial_port *port, int on) +{ + /* Adjust DTR and RTS */ + digi_set_modem_signals(port, on * (TIOCM_DTR|TIOCM_RTS), 1); +} + +static int digi_carrier_raised(struct usb_serial_port *port) +{ + struct digi_port *priv = usb_get_serial_port_data(port); + if (priv->dp_modem_signals & TIOCM_CD) + return 1; + return 0; +} static int digi_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp) @@ -1336,7 +1351,6 @@ static int digi_open(struct tty_struct *tty, struct usb_serial_port *port, unsigned char buf[32]; struct digi_port *priv = usb_get_serial_port_data(port); struct ktermios not_termios; - unsigned long flags = 0; dbg("digi_open: TOP: port=%d, open_count=%d", priv->dp_port_num, port->port.count); @@ -1345,26 +1359,6 @@ static int digi_open(struct tty_struct *tty, struct usb_serial_port *port, if (digi_startup_device(port->serial) != 0) return -ENXIO; - spin_lock_irqsave(&priv->dp_port_lock, flags); - - /* don't wait on a close in progress for non-blocking opens */ - if (priv->dp_in_close && (filp->f_flags&(O_NDELAY|O_NONBLOCK)) == 0) { - spin_unlock_irqrestore(&priv->dp_port_lock, flags); - return -EAGAIN; - } - - /* wait for a close in progress to finish */ - while (priv->dp_in_close) { - cond_wait_interruptible_timeout_irqrestore( - &priv->dp_close_wait, DIGI_RETRY_TIMEOUT, - &priv->dp_port_lock, flags); - if (signal_pending(current)) - return -EINTR; - spin_lock_irqsave(&priv->dp_port_lock, flags); - } - - spin_unlock_irqrestore(&priv->dp_port_lock, flags); - /* read modem signals automatically whenever they change */ buf[0] = DIGI_CMD_READ_INPUT_SIGNALS; buf[1] = priv->dp_port_num; @@ -1387,16 +1381,11 @@ static int digi_open(struct tty_struct *tty, struct usb_serial_port *port, not_termios.c_iflag = ~tty->termios->c_iflag; digi_set_termios(tty, port, ¬_termios); } - - /* set DTR and RTS */ - digi_set_modem_signals(port, TIOCM_DTR|TIOCM_RTS, 1); - return 0; } -static void digi_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp) +static void digi_close(struct usb_serial_port *port) { DEFINE_WAIT(wait); int ret; @@ -1411,28 +1400,9 @@ static void digi_close(struct tty_struct *tty, struct usb_serial_port *port, if (port->serial->disconnected) goto exit; - /* do cleanup only after final close on this port */ - spin_lock_irq(&priv->dp_port_lock); - priv->dp_in_close = 1; - spin_unlock_irq(&priv->dp_port_lock); - - /* tell line discipline to process only XON/XOFF */ - tty->closing = 1; - - /* wait for output to drain */ - if ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) == 0) - tty_wait_until_sent(tty, DIGI_CLOSE_TIMEOUT); - - /* flush driver and line discipline buffers */ - tty_driver_flush_buffer(tty); - tty_ldisc_flush(tty); - if (port->serial->dev) { - /* wait for transmit idle */ - if ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) == 0) - digi_transmit_idle(port, DIGI_CLOSE_TIMEOUT); - /* drop DTR and RTS */ - digi_set_modem_signals(port, 0, 0); + /* FIXME: Transmit idle belongs in the wait_unti_sent path */ + digi_transmit_idle(port, DIGI_CLOSE_TIMEOUT); /* disable input flow control */ buf[0] = DIGI_CMD_SET_INPUT_FLOW_CONTROL; @@ -1477,11 +1447,9 @@ static void digi_close(struct tty_struct *tty, struct usb_serial_port *port, /* shutdown any outstanding bulk writes */ usb_kill_urb(port->write_urb); } - tty->closing = 0; exit: spin_lock_irq(&priv->dp_port_lock); priv->dp_write_urb_in_use = 0; - priv->dp_in_close = 0; wake_up_interruptible(&priv->dp_close_wait); spin_unlock_irq(&priv->dp_port_lock); mutex_unlock(&port->serial->disc_mutex); @@ -1560,7 +1528,6 @@ static int digi_startup(struct usb_serial *serial) priv->dp_throttled = 0; priv->dp_throttle_restart = 0; init_waitqueue_head(&priv->dp_flush_wait); - priv->dp_in_close = 0; init_waitqueue_head(&priv->dp_close_wait); INIT_WORK(&priv->dp_wakeup_work, digi_wakeup_write_lock); priv->dp_port = serial->port[i]; diff --git a/drivers/usb/serial/empeg.c b/drivers/usb/serial/empeg.c index c709ec474a8..2b141ccb0cd 100644 --- a/drivers/usb/serial/empeg.c +++ b/drivers/usb/serial/empeg.c @@ -81,8 +81,7 @@ static int debug; /* function prototypes for an empeg-car player */ static int empeg_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void empeg_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp); +static void empeg_close(struct usb_serial_port *port); static int empeg_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); @@ -181,8 +180,7 @@ static int empeg_open(struct tty_struct *tty, struct usb_serial_port *port, } -static void empeg_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp) +static void empeg_close(struct usb_serial_port *port) { dbg("%s - port %d", __func__, port->number); diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index d9fcdaedf38..d9d87111f9a 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -719,8 +719,8 @@ static int ftdi_sio_port_probe(struct usb_serial_port *port); static int ftdi_sio_port_remove(struct usb_serial_port *port); static int ftdi_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void ftdi_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); +static void ftdi_close(struct usb_serial_port *port); +static void ftdi_dtr_rts(struct usb_serial_port *port, int on); static int ftdi_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); static int ftdi_write_room(struct tty_struct *tty); @@ -758,6 +758,7 @@ static struct usb_serial_driver ftdi_sio_device = { .port_remove = ftdi_sio_port_remove, .open = ftdi_open, .close = ftdi_close, + .dtr_rts = ftdi_dtr_rts, .throttle = ftdi_throttle, .unthrottle = ftdi_unthrottle, .write = ftdi_write, @@ -1558,6 +1559,30 @@ static int ftdi_open(struct tty_struct *tty, } /* ftdi_open */ +static void ftdi_dtr_rts(struct usb_serial_port *port, int on) +{ + struct ftdi_private *priv = usb_get_serial_port_data(port); + char buf[1]; + + mutex_lock(&port->serial->disc_mutex); + if (!port->serial->disconnected) { + /* Disable flow control */ + if (!on && usb_control_msg(port->serial->dev, + usb_sndctrlpipe(port->serial->dev, 0), + FTDI_SIO_SET_FLOW_CTRL_REQUEST, + FTDI_SIO_SET_FLOW_CTRL_REQUEST_TYPE, + 0, priv->interface, buf, 0, + WDR_TIMEOUT) < 0) { + dev_err(&port->dev, "error from flowcontrol urb\n"); + } + /* drop RTS and DTR */ + if (on) + set_mctrl(port, TIOCM_DTR | TIOCM_RTS); + else + clear_mctrl(port, TIOCM_DTR | TIOCM_RTS); + } + mutex_unlock(&port->serial->disc_mutex); +} /* * usbserial:__serial_close only calls ftdi_close if the point is open @@ -1567,31 +1592,12 @@ static int ftdi_open(struct tty_struct *tty, * */ -static void ftdi_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void ftdi_close(struct usb_serial_port *port) { /* ftdi_close */ - unsigned int c_cflag = tty->termios->c_cflag; struct ftdi_private *priv = usb_get_serial_port_data(port); - char buf[1]; dbg("%s", __func__); - mutex_lock(&port->serial->disc_mutex); - if (c_cflag & HUPCL && !port->serial->disconnected) { - /* Disable flow control */ - if (usb_control_msg(port->serial->dev, - usb_sndctrlpipe(port->serial->dev, 0), - FTDI_SIO_SET_FLOW_CTRL_REQUEST, - FTDI_SIO_SET_FLOW_CTRL_REQUEST_TYPE, - 0, priv->interface, buf, 0, - WDR_TIMEOUT) < 0) { - dev_err(&port->dev, "error from flowcontrol urb\n"); - } - - /* drop RTS and DTR */ - clear_mctrl(port, TIOCM_DTR | TIOCM_RTS); - } /* Note change no line if hupcl is off */ - mutex_unlock(&port->serial->disc_mutex); /* cancel any scheduled reading */ cancel_delayed_work_sync(&priv->rx_work); diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c index 586d30ff450..ee25a3fe3b0 100644 --- a/drivers/usb/serial/garmin_gps.c +++ b/drivers/usb/serial/garmin_gps.c @@ -993,8 +993,7 @@ static int garmin_open(struct tty_struct *tty, } -static void garmin_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void garmin_close(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; struct garmin_data *garmin_data_p = usb_get_serial_port_data(port); diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c index 4cec9906ccf..be82ea95672 100644 --- a/drivers/usb/serial/generic.c +++ b/drivers/usb/serial/generic.c @@ -184,8 +184,7 @@ int usb_serial_generic_resume(struct usb_serial *serial) } EXPORT_SYMBOL_GPL(usb_serial_generic_resume); -void usb_serial_generic_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +void usb_serial_generic_close(struct usb_serial_port *port) { dbg("%s - port %d", __func__, port->number); generic_cleanup(port); diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c index fb4a73d090f..53ef5996e33 100644 --- a/drivers/usb/serial/io_edgeport.c +++ b/drivers/usb/serial/io_edgeport.c @@ -207,8 +207,7 @@ static void edge_bulk_out_cmd_callback(struct urb *urb); /* function prototypes for the usbserial callbacks */ static int edge_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void edge_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp); +static void edge_close(struct usb_serial_port *port); static int edge_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); static int edge_write_room(struct tty_struct *tty); @@ -965,7 +964,7 @@ static int edge_open(struct tty_struct *tty, if (!edge_port->txfifo.fifo) { dbg("%s - no memory", __func__); - edge_close(tty, port, filp); + edge_close(port); return -ENOMEM; } @@ -975,7 +974,7 @@ static int edge_open(struct tty_struct *tty, if (!edge_port->write_urb) { dbg("%s - no memory", __func__); - edge_close(tty, port, filp); + edge_close(port); return -ENOMEM; } @@ -1099,8 +1098,7 @@ static void block_until_tx_empty(struct edgeport_port *edge_port) * edge_close * this function is called by the tty driver when a port is closed *****************************************************************************/ -static void edge_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void edge_close(struct usb_serial_port *port) { struct edgeport_serial *edge_serial; struct edgeport_port *edge_port; diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c index 513b25e044c..eabf20eeb37 100644 --- a/drivers/usb/serial/io_ti.c +++ b/drivers/usb/serial/io_ti.c @@ -2009,8 +2009,7 @@ release_es_lock: return status; } -static void edge_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void edge_close(struct usb_serial_port *port) { struct edgeport_serial *edge_serial; struct edgeport_port *edge_port; diff --git a/drivers/usb/serial/ipaq.c b/drivers/usb/serial/ipaq.c index cd62825a9ac..c610a99fa47 100644 --- a/drivers/usb/serial/ipaq.c +++ b/drivers/usb/serial/ipaq.c @@ -76,8 +76,7 @@ static int initial_wait; /* Function prototypes for an ipaq */ static int ipaq_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void ipaq_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); +static void ipaq_close(struct usb_serial_port *port); static int ipaq_calc_num_ports(struct usb_serial *serial); static int ipaq_startup(struct usb_serial *serial); static void ipaq_shutdown(struct usb_serial *serial); @@ -714,8 +713,7 @@ error: } -static void ipaq_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void ipaq_close(struct usb_serial_port *port) { struct ipaq_private *priv = usb_get_serial_port_data(port); diff --git a/drivers/usb/serial/ipw.c b/drivers/usb/serial/ipw.c index da2a2b46644..29ad038b9c8 100644 --- a/drivers/usb/serial/ipw.c +++ b/drivers/usb/serial/ipw.c @@ -302,23 +302,17 @@ static int ipw_open(struct tty_struct *tty, return 0; } -static void ipw_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void ipw_dtr_rts(struct usb_serial_port *port, int on) { struct usb_device *dev = port->serial->dev; int result; - if (tty_hung_up_p(filp)) { - dbg("%s: tty_hung_up_p ...", __func__); - return; - } - /*--1: drop the dtr */ dbg("%s:dropping dtr", __func__); result = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), IPW_SIO_SET_PIN, USB_TYPE_VENDOR | USB_RECIP_INTERFACE | USB_DIR_OUT, - IPW_PIN_CLRDTR, + on ? IPW_PIN_SETDTR : IPW_PIN_CLRDTR, 0, NULL, 0, @@ -332,7 +326,7 @@ static void ipw_close(struct tty_struct *tty, result = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), IPW_SIO_SET_PIN, USB_TYPE_VENDOR | USB_RECIP_INTERFACE | USB_DIR_OUT, - IPW_PIN_CLRRTS, + on ? IPW_PIN_SETRTS : IPW_PIN_CLRRTS, 0, NULL, 0, @@ -340,7 +334,12 @@ static void ipw_close(struct tty_struct *tty, if (result < 0) dev_err(&port->dev, "dropping rts failed (error = %d)\n", result); +} +static void ipw_close(struct usb_serial_port *port) +{ + struct usb_device *dev = port->serial->dev; + int result; /*--3: purge */ dbg("%s:sending purge", __func__); @@ -461,6 +460,7 @@ static struct usb_serial_driver ipw_device = { .num_ports = 1, .open = ipw_open, .close = ipw_close, + .dtr_rts = ipw_dtr_rts, .port_probe = ipw_probe, .port_remove = ipw_disconnect, .write = ipw_write, diff --git a/drivers/usb/serial/ir-usb.c b/drivers/usb/serial/ir-usb.c index 4e2cda93da5..66009b6b763 100644 --- a/drivers/usb/serial/ir-usb.c +++ b/drivers/usb/serial/ir-usb.c @@ -88,8 +88,7 @@ static int xbof = -1; static int ir_startup (struct usb_serial *serial); static int ir_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filep); -static void ir_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filep); +static void ir_close(struct usb_serial_port *port); static int ir_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); static void ir_write_bulk_callback (struct urb *urb); @@ -346,8 +345,7 @@ static int ir_open(struct tty_struct *tty, return result; } -static void ir_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file * filp) +static void ir_close(struct usb_serial_port *port) { dbg("%s - port %d", __func__, port->number); diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c index 4473d442b2a..bb572cee6ec 100644 --- a/drivers/usb/serial/iuu_phoenix.c +++ b/drivers/usb/serial/iuu_phoenix.c @@ -70,7 +70,6 @@ static void read_rxcmd_callback(struct urb *urb); struct iuu_private { spinlock_t lock; /* store irq state */ wait_queue_head_t delta_msr_wait; - u8 line_control; u8 line_status; u8 termios_initialized; int tiostatus; /* store IUART SIGNAL for tiocmget call */ @@ -946,19 +945,10 @@ static int iuu_uart_baud(struct usb_serial_port *port, u32 baud, return status; } -static int set_control_lines(struct usb_device *dev, u8 value) -{ - return 0; -} - -static void iuu_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void iuu_close(struct usb_serial_port *port) { /* iuu_led (port,255,0,0,0); */ struct usb_serial *serial; - struct iuu_private *priv = usb_get_serial_port_data(port); - unsigned long flags; - unsigned int c_cflag; serial = port->serial; if (!serial) @@ -968,17 +958,6 @@ static void iuu_close(struct tty_struct *tty, iuu_uart_off(port); if (serial->dev) { - if (tty) { - c_cflag = tty->termios->c_cflag; - if (c_cflag & HUPCL) { - /* drop DTR and RTS */ - priv = usb_get_serial_port_data(port); - spin_lock_irqsave(&priv->lock, flags); - priv->line_control = 0; - spin_unlock_irqrestore(&priv->lock, flags); - set_control_lines(port->serial->dev, 0); - } - } /* free writebuf */ /* shutdown our urbs */ dbg("%s - shutting down urbs", __func__); @@ -1154,7 +1133,7 @@ static int iuu_open(struct tty_struct *tty, if (result) { dev_err(&port->dev, "%s - failed submitting read urb," " error %d\n", __func__, result); - iuu_close(tty, port, NULL); + iuu_close(port); return -EPROTO; } else { dbg("%s - rxcmd OK", __func__); diff --git a/drivers/usb/serial/keyspan.c b/drivers/usb/serial/keyspan.c index 00daa8f7759..f1195a98f31 100644 --- a/drivers/usb/serial/keyspan.c +++ b/drivers/usb/serial/keyspan.c @@ -1298,8 +1298,16 @@ static inline void stop_urb(struct urb *urb) usb_kill_urb(urb); } -static void keyspan_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void keyspan_dtr_rts(struct usb_serial_port *port, int on) +{ + struct keyspan_port_private *p_priv = usb_get_serial_port_data(port); + + p_priv->rts_state = on; + p_priv->dtr_state = on; + keyspan_send_setup(port, 0); +} + +static void keyspan_close(struct usb_serial_port *port) { int i; struct usb_serial *serial = port->serial; @@ -1336,7 +1344,6 @@ static void keyspan_close(struct tty_struct *tty, stop_urb(p_priv->out_urbs[i]); } } - tty_port_tty_set(&port->port, NULL); } /* download the firmware to a pre-renumeration device */ diff --git a/drivers/usb/serial/keyspan.h b/drivers/usb/serial/keyspan.h index 38b4582e073..0d4569b6076 100644 --- a/drivers/usb/serial/keyspan.h +++ b/drivers/usb/serial/keyspan.h @@ -38,9 +38,8 @@ static int keyspan_open (struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void keyspan_close (struct tty_struct *tty, - struct usb_serial_port *port, - struct file *filp); +static void keyspan_close (struct usb_serial_port *port); +static void keyspan_dtr_rts (struct usb_serial_port *port, int on); static int keyspan_startup (struct usb_serial *serial); static void keyspan_shutdown (struct usb_serial *serial); static int keyspan_write_room (struct tty_struct *tty); @@ -562,6 +561,7 @@ static struct usb_serial_driver keyspan_1port_device = { .num_ports = 1, .open = keyspan_open, .close = keyspan_close, + .dtr_rts = keyspan_dtr_rts, .write = keyspan_write, .write_room = keyspan_write_room, .set_termios = keyspan_set_termios, @@ -582,6 +582,7 @@ static struct usb_serial_driver keyspan_2port_device = { .num_ports = 2, .open = keyspan_open, .close = keyspan_close, + .dtr_rts = keyspan_dtr_rts, .write = keyspan_write, .write_room = keyspan_write_room, .set_termios = keyspan_set_termios, @@ -602,6 +603,7 @@ static struct usb_serial_driver keyspan_4port_device = { .num_ports = 4, .open = keyspan_open, .close = keyspan_close, + .dtr_rts = keyspan_dtr_rts, .write = keyspan_write, .write_room = keyspan_write_room, .set_termios = keyspan_set_termios, diff --git a/drivers/usb/serial/keyspan_pda.c b/drivers/usb/serial/keyspan_pda.c index bf1ae247da6..ab769dbea1b 100644 --- a/drivers/usb/serial/keyspan_pda.c +++ b/drivers/usb/serial/keyspan_pda.c @@ -651,6 +651,35 @@ static int keyspan_pda_chars_in_buffer(struct tty_struct *tty) } +static void keyspan_pda_dtr_rts(struct usb_serial_port *port, int on) +{ + struct usb_serial *serial = port->serial; + + if (serial->dev) { + if (on) + keyspan_pda_set_modem_info(serial, (1<<7) | (1<< 2)); + else + keyspan_pda_set_modem_info(serial, 0); + } +} + +static int keyspan_pda_carrier_raised(struct usb_serial_port *port) +{ + struct usb_serial *serial = port->serial; + unsigned char modembits; + + /* If we can read the modem status and the DCD is low then + carrier is not raised yet */ + if (keyspan_pda_get_modem_info(serial, &modembits) >= 0) { + if (!(modembits & (1>>6))) + return 0; + } + /* Carrier raised, or we failed (eg disconnected) so + progress accordingly */ + return 1; +} + + static int keyspan_pda_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp) { @@ -682,13 +711,6 @@ static int keyspan_pda_open(struct tty_struct *tty, priv->tx_room = room; priv->tx_throttled = room ? 0 : 1; - /* the normal serial device seems to always turn on DTR and RTS here, - so do the same */ - if (tty && (tty->termios->c_cflag & CBAUD)) - keyspan_pda_set_modem_info(serial, (1<<7) | (1<<2)); - else - keyspan_pda_set_modem_info(serial, 0); - /*Start reading from the device*/ port->interrupt_in_urb->dev = serial->dev; rc = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL); @@ -700,19 +722,11 @@ static int keyspan_pda_open(struct tty_struct *tty, error: return rc; } - - -static void keyspan_pda_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void keyspan_pda_close(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; if (serial->dev) { - /* the normal serial device seems to always shut - off DTR and RTS now */ - if (tty->termios->c_cflag & HUPCL) - keyspan_pda_set_modem_info(serial, 0); - /* shutdown our bulk reads and writes */ usb_kill_urb(port->write_urb); usb_kill_urb(port->interrupt_in_urb); @@ -839,6 +853,8 @@ static struct usb_serial_driver keyspan_pda_device = { .usb_driver = &keyspan_pda_driver, .id_table = id_table_std, .num_ports = 1, + .dtr_rts = keyspan_pda_dtr_rts, + .carrier_raised = keyspan_pda_carrier_raised, .open = keyspan_pda_open, .close = keyspan_pda_close, .write = keyspan_pda_write, diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c index fcd9082f3e7..fa817c66b3e 100644 --- a/drivers/usb/serial/kl5kusb105.c +++ b/drivers/usb/serial/kl5kusb105.c @@ -76,8 +76,7 @@ static int klsi_105_startup(struct usb_serial *serial); static void klsi_105_shutdown(struct usb_serial *serial); static int klsi_105_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void klsi_105_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); +static void klsi_105_close(struct usb_serial_port *port); static int klsi_105_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); static void klsi_105_write_bulk_callback(struct urb *urb); @@ -447,8 +446,7 @@ exit: } /* klsi_105_open */ -static void klsi_105_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void klsi_105_close(struct usb_serial_port *port) { struct klsi_105_private *priv = usb_get_serial_port_data(port); int rc; diff --git a/drivers/usb/serial/kobil_sct.c b/drivers/usb/serial/kobil_sct.c index c148544953b..6b570498287 100644 --- a/drivers/usb/serial/kobil_sct.c +++ b/drivers/usb/serial/kobil_sct.c @@ -72,8 +72,7 @@ static int kobil_startup(struct usb_serial *serial); static void kobil_shutdown(struct usb_serial *serial); static int kobil_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void kobil_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp); +static void kobil_close(struct usb_serial_port *port); static int kobil_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); static int kobil_write_room(struct tty_struct *tty); @@ -209,7 +208,7 @@ static void kobil_shutdown(struct usb_serial *serial) for (i = 0; i < serial->num_ports; ++i) { while (serial->port[i]->port.count > 0) - kobil_close(NULL, serial->port[i], NULL); + kobil_close(serial->port[i]); kfree(usb_get_serial_port_data(serial->port[i])); usb_set_serial_port_data(serial->port[i], NULL); } @@ -346,11 +345,11 @@ static int kobil_open(struct tty_struct *tty, } -static void kobil_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void kobil_close(struct usb_serial_port *port) { dbg("%s - port %d", __func__, port->number); + /* FIXME: Add rts/dtr methods */ if (port->write_urb) { usb_kill_urb(port->write_urb); usb_free_urb(port->write_urb); diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c index 82930a7d509..873795548fc 100644 --- a/drivers/usb/serial/mct_u232.c +++ b/drivers/usb/serial/mct_u232.c @@ -95,8 +95,8 @@ static int mct_u232_startup(struct usb_serial *serial); static void mct_u232_shutdown(struct usb_serial *serial); static int mct_u232_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void mct_u232_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); +static void mct_u232_close(struct usb_serial_port *port); +static void mct_u232_dtr_rts(struct usb_serial_port *port, int on); static void mct_u232_read_int_callback(struct urb *urb); static void mct_u232_set_termios(struct tty_struct *tty, struct usb_serial_port *port, struct ktermios *old); @@ -140,6 +140,7 @@ static struct usb_serial_driver mct_u232_device = { .num_ports = 1, .open = mct_u232_open, .close = mct_u232_close, + .dtr_rts = mct_u232_dtr_rts, .throttle = mct_u232_throttle, .unthrottle = mct_u232_unthrottle, .read_int_callback = mct_u232_read_int_callback, @@ -496,29 +497,29 @@ error: return retval; } /* mct_u232_open */ - -static void mct_u232_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void mct_u232_dtr_rts(struct usb_serial_port *port, int on) { - unsigned int c_cflag; unsigned int control_state; struct mct_u232_private *priv = usb_get_serial_port_data(port); - dbg("%s port %d", __func__, port->number); - if (tty) { - c_cflag = tty->termios->c_cflag; - mutex_lock(&port->serial->disc_mutex); - if (c_cflag & HUPCL && !port->serial->disconnected) { - /* drop DTR and RTS */ - spin_lock_irq(&priv->lock); + mutex_lock(&port->serial->disc_mutex); + if (!port->serial->disconnected) { + /* drop DTR and RTS */ + spin_lock_irq(&priv->lock); + if (on) + priv->control_state |= TIOCM_DTR | TIOCM_RTS; + else priv->control_state &= ~(TIOCM_DTR | TIOCM_RTS); - control_state = priv->control_state; - spin_unlock_irq(&priv->lock); - mct_u232_set_modem_ctrl(port->serial, control_state); - } - mutex_unlock(&port->serial->disc_mutex); + control_state = priv->control_state; + spin_unlock_irq(&priv->lock); + mct_u232_set_modem_ctrl(port->serial, control_state); } + mutex_unlock(&port->serial->disc_mutex); +} +static void mct_u232_close(struct usb_serial_port *port) +{ + dbg("%s port %d", __func__, port->number); if (port->serial->dev) { /* shutdown our urbs */ diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c index 24e3b5d4b4d..9e1a013ee7f 100644 --- a/drivers/usb/serial/mos7720.c +++ b/drivers/usb/serial/mos7720.c @@ -533,8 +533,7 @@ static int mos7720_chars_in_buffer(struct tty_struct *tty) return chars; } -static void mos7720_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void mos7720_close(struct usb_serial_port *port) { struct usb_serial *serial; struct moschip_port *mos7720_port; diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index 84fb1dcd30d..10b78a37214 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -1135,54 +1135,12 @@ static int mos7840_chars_in_buffer(struct tty_struct *tty) } -/************************************************************************ - * - * mos7840_block_until_tx_empty - * - * This function will block the close until one of the following: - * 1. TX count are 0 - * 2. The mos7840 has stopped - * 3. A timeout of 3 seconds without activity has expired - * - ************************************************************************/ -static void mos7840_block_until_tx_empty(struct tty_struct *tty, - struct moschip_port *mos7840_port) -{ - int timeout = HZ / 10; - int wait = 30; - int count; - - while (1) { - - count = mos7840_chars_in_buffer(tty); - - /* Check for Buffer status */ - if (count <= 0) - return; - - /* Block the thread for a while */ - interruptible_sleep_on_timeout(&mos7840_port->wait_chase, - timeout); - - /* No activity.. count down section */ - wait--; - if (wait == 0) { - dbg("%s - TIMEOUT", __func__); - return; - } else { - /* Reset timeout value back to seconds */ - wait = 30; - } - } -} - /***************************************************************************** * mos7840_close * this function is called by the tty driver when a port is closed *****************************************************************************/ -static void mos7840_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void mos7840_close(struct usb_serial_port *port) { struct usb_serial *serial; struct moschip_port *mos7840_port; @@ -1223,10 +1181,6 @@ static void mos7840_close(struct tty_struct *tty, } } - if (serial->dev) - /* flush and block until tx is empty */ - mos7840_block_until_tx_empty(tty, mos7840_port); - /* While closing port, shutdown all bulk read, write * * and interrupt read if they exists */ if (serial->dev) { diff --git a/drivers/usb/serial/navman.c b/drivers/usb/serial/navman.c index bcdcbb82270..f5f3751a888 100644 --- a/drivers/usb/serial/navman.c +++ b/drivers/usb/serial/navman.c @@ -98,8 +98,7 @@ static int navman_open(struct tty_struct *tty, return result; } -static void navman_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void navman_close(struct usb_serial_port *port) { dbg("%s - port %d", __func__, port->number); diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c index df653971272..1104617334f 100644 --- a/drivers/usb/serial/omninet.c +++ b/drivers/usb/serial/omninet.c @@ -66,8 +66,7 @@ static int debug; /* function prototypes */ static int omninet_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void omninet_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp); +static void omninet_close(struct usb_serial_port *port); static void omninet_read_bulk_callback(struct urb *urb); static void omninet_write_bulk_callback(struct urb *urb); static int omninet_write(struct tty_struct *tty, struct usb_serial_port *port, @@ -189,8 +188,7 @@ static int omninet_open(struct tty_struct *tty, return result; } -static void omninet_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void omninet_close(struct usb_serial_port *port) { dbg("%s - port %d", __func__, port->number); usb_kill_urb(port->read_urb); diff --git a/drivers/usb/serial/opticon.c b/drivers/usb/serial/opticon.c index b500ad10b75..c20480aa975 100644 --- a/drivers/usb/serial/opticon.c +++ b/drivers/usb/serial/opticon.c @@ -173,8 +173,7 @@ static int opticon_open(struct tty_struct *tty, struct usb_serial_port *port, return result; } -static void opticon_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp) +static void opticon_close(struct usb_serial_port *port) { struct opticon_private *priv = usb_get_serial_data(port->serial); diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 7817b82889c..a16d69fadba 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -45,8 +45,9 @@ /* Function prototypes */ static int option_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void option_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp); +static void option_close(struct usb_serial_port *port); +static void option_dtr_rts(struct usb_serial_port *port, int on); + static int option_startup(struct usb_serial *serial); static void option_shutdown(struct usb_serial *serial); static int option_write_room(struct tty_struct *tty); @@ -61,7 +62,7 @@ static void option_set_termios(struct tty_struct *tty, static int option_tiocmget(struct tty_struct *tty, struct file *file); static int option_tiocmset(struct tty_struct *tty, struct file *file, unsigned int set, unsigned int clear); -static int option_send_setup(struct tty_struct *tty, struct usb_serial_port *port); +static int option_send_setup(struct usb_serial_port *port); static int option_suspend(struct usb_serial *serial, pm_message_t message); static int option_resume(struct usb_serial *serial); @@ -551,6 +552,7 @@ static struct usb_serial_driver option_1port_device = { .num_ports = 1, .open = option_open, .close = option_close, + .dtr_rts = option_dtr_rts, .write = option_write, .write_room = option_write_room, .chars_in_buffer = option_chars_in_buffer, @@ -630,7 +632,7 @@ static void option_set_termios(struct tty_struct *tty, dbg("%s", __func__); /* Doesn't support option setting */ tty_termios_copy_hw(tty->termios, old_termios); - option_send_setup(tty, port); + option_send_setup(port); } static int option_tiocmget(struct tty_struct *tty, struct file *file) @@ -669,7 +671,7 @@ static int option_tiocmset(struct tty_struct *tty, struct file *file, portdata->rts_state = 0; if (clear & TIOCM_DTR) portdata->dtr_state = 0; - return option_send_setup(tty, port); + return option_send_setup(port); } /* Write */ @@ -897,10 +899,6 @@ static int option_open(struct tty_struct *tty, dbg("%s", __func__); - /* Set some sane defaults */ - portdata->rts_state = 1; - portdata->dtr_state = 1; - /* Reset low level data toggle and start reading from endpoints */ for (i = 0; i < N_IN_URB; i++) { urb = portdata->in_urbs[i]; @@ -936,37 +934,43 @@ static int option_open(struct tty_struct *tty, usb_pipeout(urb->pipe), 0); */ } - option_send_setup(tty, port); + option_send_setup(port); return 0; } -static void option_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void option_dtr_rts(struct usb_serial_port *port, int on) { - int i; struct usb_serial *serial = port->serial; struct option_port_private *portdata; dbg("%s", __func__); portdata = usb_get_serial_port_data(port); + mutex_lock(&serial->disc_mutex); + portdata->rts_state = on; + portdata->dtr_state = on; + if (serial->dev) + option_send_setup(port); + mutex_unlock(&serial->disc_mutex); +} - portdata->rts_state = 0; - portdata->dtr_state = 0; - if (serial->dev) { - mutex_lock(&serial->disc_mutex); - if (!serial->disconnected) - option_send_setup(tty, port); - mutex_unlock(&serial->disc_mutex); +static void option_close(struct usb_serial_port *port) +{ + int i; + struct usb_serial *serial = port->serial; + struct option_port_private *portdata; + + dbg("%s", __func__); + portdata = usb_get_serial_port_data(port); + if (serial->dev) { /* Stop reading/writing urbs */ for (i = 0; i < N_IN_URB; i++) usb_kill_urb(portdata->in_urbs[i]); for (i = 0; i < N_OUT_URB; i++) usb_kill_urb(portdata->out_urbs[i]); } - tty_port_tty_set(&port->port, NULL); } /* Helper functions used by option_setup_urbs */ @@ -1032,28 +1036,24 @@ static void option_setup_urbs(struct usb_serial *serial) * This is exactly the same as SET_CONTROL_LINE_STATE from the PSTN * CDC. */ -static int option_send_setup(struct tty_struct *tty, - struct usb_serial_port *port) +static int option_send_setup(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; struct option_port_private *portdata; int ifNum = serial->interface->cur_altsetting->desc.bInterfaceNumber; + int val = 0; dbg("%s", __func__); portdata = usb_get_serial_port_data(port); - if (tty) { - int val = 0; - if (portdata->dtr_state) - val |= 0x01; - if (portdata->rts_state) - val |= 0x02; + if (portdata->dtr_state) + val |= 0x01; + if (portdata->rts_state) + val |= 0x02; - return usb_control_msg(serial->dev, - usb_rcvctrlpipe(serial->dev, 0), - 0x22, 0x21, val, ifNum, NULL, 0, USB_CTRL_SET_TIMEOUT); - } - return 0; + return usb_control_msg(serial->dev, + usb_rcvctrlpipe(serial->dev, 0), + 0x22, 0x21, val, ifNum, NULL, 0, USB_CTRL_SET_TIMEOUT); } static int option_startup(struct usb_serial *serial) diff --git a/drivers/usb/serial/oti6858.c b/drivers/usb/serial/oti6858.c index ba551f00f16..7de54781fe6 100644 --- a/drivers/usb/serial/oti6858.c +++ b/drivers/usb/serial/oti6858.c @@ -143,8 +143,7 @@ struct oti6858_control_pkt { /* function prototypes */ static int oti6858_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void oti6858_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); +static void oti6858_close(struct usb_serial_port *port); static void oti6858_set_termios(struct tty_struct *tty, struct usb_serial_port *port, struct ktermios *old); static int oti6858_ioctl(struct tty_struct *tty, struct file *file, @@ -622,67 +621,30 @@ static int oti6858_open(struct tty_struct *tty, if (result != 0) { dev_err(&port->dev, "%s(): usb_submit_urb() failed" " with error %d\n", __func__, result); - oti6858_close(tty, port, NULL); + oti6858_close(port); return -EPROTO; } /* setup termios */ if (tty) oti6858_set_termios(tty, port, &tmp_termios); - + port->port.drain_delay = 256; /* FIXME: check the FIFO length */ return 0; } -static void oti6858_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void oti6858_close(struct usb_serial_port *port) { struct oti6858_private *priv = usb_get_serial_port_data(port); unsigned long flags; - long timeout; - wait_queue_t wait; dbg("%s(port = %d)", __func__, port->number); - /* wait for data to drain from the buffer */ spin_lock_irqsave(&priv->lock, flags); - timeout = 30 * HZ; /* PL2303_CLOSING_WAIT */ - init_waitqueue_entry(&wait, current); - add_wait_queue(&tty->write_wait, &wait); - dbg("%s(): entering wait loop", __func__); - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - if (oti6858_buf_data_avail(priv->buf) == 0 - || timeout == 0 || signal_pending(current) - || port->serial->disconnected) - break; - spin_unlock_irqrestore(&priv->lock, flags); - timeout = schedule_timeout(timeout); - spin_lock_irqsave(&priv->lock, flags); - } - set_current_state(TASK_RUNNING); - remove_wait_queue(&tty->write_wait, &wait); - dbg("%s(): after wait loop", __func__); - /* clear out any remaining data in the buffer */ oti6858_buf_clear(priv->buf); spin_unlock_irqrestore(&priv->lock, flags); - /* wait for characters to drain from the device */ - /* (this is long enough for the entire 256 byte */ - /* pl2303 hardware buffer to drain with no flow */ - /* control for data rates of 1200 bps or more, */ - /* for lower rates we should really know how much */ - /* data is in the buffer to compute a delay */ - /* that is not unnecessarily long) */ - /* FIXME - bps = tty_get_baud_rate(tty); - if (bps > 1200) - timeout = max((HZ*2560)/bps,HZ/10); - else - */ - timeout = 2*HZ; - schedule_timeout_interruptible(timeout); - dbg("%s(): after schedule_timeout_interruptible()", __func__); + dbg("%s(): after buf_clear()", __func__); /* cancel scheduled setup */ cancel_delayed_work(&priv->delayed_setup_work); @@ -694,15 +656,6 @@ static void oti6858_close(struct tty_struct *tty, usb_kill_urb(port->write_urb); usb_kill_urb(port->read_urb); usb_kill_urb(port->interrupt_in_urb); - - /* - if (tty && (tty->termios->c_cflag) & HUPCL) { - // drop DTR and RTS - spin_lock_irqsave(&priv->lock, flags); - priv->pending_setup.control &= ~CONTROL_MASK; - spin_unlock_irqrestore(&priv->lock, flags); - } - */ } static int oti6858_tiocmset(struct tty_struct *tty, struct file *file, diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 751a533a434..e02dc3d643c 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -652,69 +652,41 @@ static void pl2303_set_termios(struct tty_struct *tty, kfree(buf); } -static void pl2303_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void pl2303_dtr_rts(struct usb_serial_port *port, int on) +{ + struct pl2303_private *priv = usb_get_serial_port_data(port); + unsigned long flags; + u8 control; + + spin_lock_irqsave(&priv->lock, flags); + /* Change DTR and RTS */ + if (on) + priv->line_control |= (CONTROL_DTR | CONTROL_RTS); + else + priv->line_control &= ~(CONTROL_DTR | CONTROL_RTS); + control = priv->line_control; + spin_unlock_irqrestore(&priv->lock, flags); + set_control_lines(port->serial->dev, control); +} + +static void pl2303_close(struct usb_serial_port *port) { struct pl2303_private *priv = usb_get_serial_port_data(port); unsigned long flags; - unsigned int c_cflag; - int bps; - long timeout; - wait_queue_t wait; dbg("%s - port %d", __func__, port->number); - /* wait for data to drain from the buffer */ spin_lock_irqsave(&priv->lock, flags); - timeout = PL2303_CLOSING_WAIT; - init_waitqueue_entry(&wait, current); - add_wait_queue(&tty->write_wait, &wait); - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - if (pl2303_buf_data_avail(priv->buf) == 0 || - timeout == 0 || signal_pending(current) || - port->serial->disconnected) - break; - spin_unlock_irqrestore(&priv->lock, flags); - timeout = schedule_timeout(timeout); - spin_lock_irqsave(&priv->lock, flags); - } - set_current_state(TASK_RUNNING); - remove_wait_queue(&tty->write_wait, &wait); /* clear out any remaining data in the buffer */ pl2303_buf_clear(priv->buf); spin_unlock_irqrestore(&priv->lock, flags); - /* wait for characters to drain from the device */ - /* (this is long enough for the entire 256 byte */ - /* pl2303 hardware buffer to drain with no flow */ - /* control for data rates of 1200 bps or more, */ - /* for lower rates we should really know how much */ - /* data is in the buffer to compute a delay */ - /* that is not unnecessarily long) */ - bps = tty_get_baud_rate(tty); - if (bps > 1200) - timeout = max((HZ*2560)/bps, HZ/10); - else - timeout = 2*HZ; - schedule_timeout_interruptible(timeout); - /* shutdown our urbs */ dbg("%s - shutting down urbs", __func__); usb_kill_urb(port->write_urb); usb_kill_urb(port->read_urb); usb_kill_urb(port->interrupt_in_urb); - if (tty) { - c_cflag = tty->termios->c_cflag; - if (c_cflag & HUPCL) { - /* drop DTR and RTS */ - spin_lock_irqsave(&priv->lock, flags); - priv->line_control = 0; - spin_unlock_irqrestore(&priv->lock, flags); - set_control_lines(port->serial->dev, 0); - } - } } static int pl2303_open(struct tty_struct *tty, @@ -748,7 +720,7 @@ static int pl2303_open(struct tty_struct *tty, if (result) { dev_err(&port->dev, "%s - failed submitting read urb," " error %d\n", __func__, result); - pl2303_close(tty, port, NULL); + pl2303_close(port); return -EPROTO; } @@ -758,9 +730,10 @@ static int pl2303_open(struct tty_struct *tty, if (result) { dev_err(&port->dev, "%s - failed submitting interrupt urb," " error %d\n", __func__, result); - pl2303_close(tty, port, NULL); + pl2303_close(port); return -EPROTO; } + port->port.drain_delay = 256; return 0; } @@ -821,6 +794,14 @@ static int pl2303_tiocmget(struct tty_struct *tty, struct file *file) return result; } +static int pl2303_carrier_raised(struct usb_serial_port *port) +{ + struct pl2303_private *priv = usb_get_serial_port_data(port); + if (priv->line_status & UART_DCD) + return 1; + return 0; +} + static int wait_modem_info(struct usb_serial_port *port, unsigned int arg) { struct pl2303_private *priv = usb_get_serial_port_data(port); @@ -1125,6 +1106,8 @@ static struct usb_serial_driver pl2303_device = { .num_ports = 1, .open = pl2303_open, .close = pl2303_close, + .dtr_rts = pl2303_dtr_rts, + .carrier_raised = pl2303_carrier_raised, .write = pl2303_write, .ioctl = pl2303_ioctl, .break_ctl = pl2303_break_ctl, diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c index 913225c6161..1319b8968d8 100644 --- a/drivers/usb/serial/sierra.c +++ b/drivers/usb/serial/sierra.c @@ -240,57 +240,39 @@ struct sierra_port_private { int ri_state; }; -static int sierra_send_setup(struct tty_struct *tty, - struct usb_serial_port *port) +static int sierra_send_setup(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; struct sierra_port_private *portdata; __u16 interface = 0; + int val = 0; dev_dbg(&port->dev, "%s", __func__); portdata = usb_get_serial_port_data(port); - if (tty) { - int val = 0; - if (portdata->dtr_state) - val |= 0x01; - if (portdata->rts_state) - val |= 0x02; - - /* If composite device then properly report interface */ - if (serial->num_ports == 1) { - interface = sierra_calc_interface(serial); - - /* Control message is sent only to interfaces with - * interrupt_in endpoints - */ - if (port->interrupt_in_urb) { - /* send control message */ - return usb_control_msg(serial->dev, - usb_rcvctrlpipe(serial->dev, 0), - 0x22, 0x21, val, interface, - NULL, 0, USB_CTRL_SET_TIMEOUT); - } - } - - /* Otherwise the need to do non-composite mapping */ - else { - if (port->bulk_out_endpointAddress == 2) - interface = 0; - else if (port->bulk_out_endpointAddress == 4) - interface = 1; - else if (port->bulk_out_endpointAddress == 5) - interface = 2; - - return usb_control_msg(serial->dev, - usb_rcvctrlpipe(serial->dev, 0), - 0x22, 0x21, val, interface, - NULL, 0, USB_CTRL_SET_TIMEOUT); - - } + if (portdata->dtr_state) + val |= 0x01; + if (portdata->rts_state) + val |= 0x02; + + /* If composite device then properly report interface */ + if (serial->num_ports == 1) + interface = sierra_calc_interface(serial); + + /* Otherwise the need to do non-composite mapping */ + else { + if (port->bulk_out_endpointAddress == 2) + interface = 0; + else if (port->bulk_out_endpointAddress == 4) + interface = 1; + else if (port->bulk_out_endpointAddress == 5) + interface = 2; } - + return usb_control_msg(serial->dev, + usb_rcvctrlpipe(serial->dev, 0), + 0x22, 0x21, val, interface, + NULL, 0, USB_CTRL_SET_TIMEOUT); return 0; } @@ -299,7 +281,7 @@ static void sierra_set_termios(struct tty_struct *tty, { dev_dbg(&port->dev, "%s", __func__); tty_termios_copy_hw(tty->termios, old_termios); - sierra_send_setup(tty, port); + sierra_send_setup(port); } static int sierra_tiocmget(struct tty_struct *tty, struct file *file) @@ -338,7 +320,7 @@ static int sierra_tiocmset(struct tty_struct *tty, struct file *file, portdata->rts_state = 0; if (clear & TIOCM_DTR) portdata->dtr_state = 0; - return sierra_send_setup(tty, port); + return sierra_send_setup(port); } static void sierra_outdat_callback(struct urb *urb) @@ -598,7 +580,7 @@ static int sierra_open(struct tty_struct *tty, } } - sierra_send_setup(tty, port); + sierra_send_setup(port); /* start up the interrupt endpoint if we have one */ if (port->interrupt_in_urb) { @@ -610,32 +592,38 @@ static int sierra_open(struct tty_struct *tty, return 0; } -static void sierra_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void sierra_dtr_rts(struct usb_serial_port *port, int on) { - int i; struct usb_serial *serial = port->serial; struct sierra_port_private *portdata; - dev_dbg(&port->dev, "%s", __func__); portdata = usb_get_serial_port_data(port); - - portdata->rts_state = 0; - portdata->dtr_state = 0; + portdata->rts_state = on; + portdata->dtr_state = on; if (serial->dev) { mutex_lock(&serial->disc_mutex); if (!serial->disconnected) - sierra_send_setup(tty, port); + sierra_send_setup(port); mutex_unlock(&serial->disc_mutex); + } +} + +static void sierra_close(struct usb_serial_port *port) +{ + int i; + struct usb_serial *serial = port->serial; + struct sierra_port_private *portdata; + dev_dbg(&port->dev, "%s", __func__); + portdata = usb_get_serial_port_data(port); + + if (serial->dev) { /* Stop reading/writing urbs */ for (i = 0; i < N_IN_URB; i++) usb_kill_urb(portdata->in_urbs[i]); } - usb_kill_urb(port->interrupt_in_urb); - tty_port_tty_set(&port->port, NULL); } static int sierra_startup(struct usb_serial *serial) @@ -737,6 +725,7 @@ static struct usb_serial_driver sierra_device = { .probe = sierra_probe, .open = sierra_open, .close = sierra_close, + .dtr_rts = sierra_dtr_rts, .write = sierra_write, .write_room = sierra_write_room, .set_termios = sierra_set_termios, diff --git a/drivers/usb/serial/spcp8x5.c b/drivers/usb/serial/spcp8x5.c index 5e7528cc81a..8f7ed8f1399 100644 --- a/drivers/usb/serial/spcp8x5.c +++ b/drivers/usb/serial/spcp8x5.c @@ -446,66 +446,47 @@ static void spcp8x5_set_workMode(struct usb_device *dev, u16 value, "RTSCTS usb_control_msg(enable flowctrl) = %d\n", ret); } +static int spcp8x5_carrier_raised(struct usb_serial_port *port) +{ + struct spcp8x5_private *priv = usb_get_serial_port_data(port); + if (priv->line_status & MSR_STATUS_LINE_DCD) + return 1; + return 0; +} + +static void spcp8x5_dtr_rts(struct usb_serial_port *port, int on) +{ + struct spcp8x5_private *priv = usb_get_serial_port_data(port); + unsigned long flags; + u8 control; + + spin_lock_irqsave(&priv->lock, flags); + if (on) + priv->line_control = MCR_CONTROL_LINE_DTR + | MCR_CONTROL_LINE_RTS; + else + priv->line_control &= ~ (MCR_CONTROL_LINE_DTR + | MCR_CONTROL_LINE_RTS); + control = priv->line_control; + spin_unlock_irqrestore(&priv->lock, flags); + spcp8x5_set_ctrlLine(port->serial->dev, control , priv->type); +} + /* close the serial port. We should wait for data sending to device 1st and * then kill all urb. */ -static void spcp8x5_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void spcp8x5_close(struct usb_serial_port *port) { struct spcp8x5_private *priv = usb_get_serial_port_data(port); unsigned long flags; - unsigned int c_cflag; - int bps; - long timeout; - wait_queue_t wait; int result; dbg("%s - port %d", __func__, port->number); - /* wait for data to drain from the buffer */ spin_lock_irqsave(&priv->lock, flags); - timeout = SPCP8x5_CLOSING_WAIT; - init_waitqueue_entry(&wait, current); - add_wait_queue(&tty->write_wait, &wait); - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - if (ringbuf_avail_data(priv->buf) == 0 || - timeout == 0 || signal_pending(current)) - break; - spin_unlock_irqrestore(&priv->lock, flags); - timeout = schedule_timeout(timeout); - spin_lock_irqsave(&priv->lock, flags); - } - set_current_state(TASK_RUNNING); - remove_wait_queue(&tty->write_wait, &wait); - /* clear out any remaining data in the buffer */ clear_ringbuf(priv->buf); spin_unlock_irqrestore(&priv->lock, flags); - /* wait for characters to drain from the device (this is long enough - * for the entire all byte spcp8x5 hardware buffer to drain with no - * flow control for data rates of 1200 bps or more, for lower rates we - * should really know how much data is in the buffer to compute a delay - * that is not unnecessarily long) */ - bps = tty_get_baud_rate(tty); - if (bps > 1200) - timeout = max((HZ*2560) / bps, HZ/10); - else - timeout = 2*HZ; - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(timeout); - - /* clear control lines */ - if (tty) { - c_cflag = tty->termios->c_cflag; - if (c_cflag & HUPCL) { - spin_lock_irqsave(&priv->lock, flags); - priv->line_control = 0; - spin_unlock_irqrestore(&priv->lock, flags); - spcp8x5_set_ctrlLine(port->serial->dev, 0 , priv->type); - } - } - /* kill urb */ if (port->write_urb != NULL) { result = usb_unlink_urb(port->write_urb); @@ -665,13 +646,6 @@ static int spcp8x5_open(struct tty_struct *tty, if (ret) return ret; - spin_lock_irqsave(&priv->lock, flags); - if (tty && (tty->termios->c_cflag & CBAUD)) - priv->line_control = MCR_DTR | MCR_RTS; - else - priv->line_control = 0; - spin_unlock_irqrestore(&priv->lock, flags); - spcp8x5_set_ctrlLine(serial->dev, priv->line_control , priv->type); /* Setup termios */ @@ -691,9 +665,10 @@ static int spcp8x5_open(struct tty_struct *tty, port->read_urb->dev = serial->dev; ret = usb_submit_urb(port->read_urb, GFP_KERNEL); if (ret) { - spcp8x5_close(tty, port, NULL); + spcp8x5_close(port); return -EPROTO; } + port->port.drain_delay = 256; return 0; } @@ -1033,6 +1008,8 @@ static struct usb_serial_driver spcp8x5_device = { .num_ports = 1, .open = spcp8x5_open, .close = spcp8x5_close, + .dtr_rts = spcp8x5_dtr_rts, + .carrier_raised = spcp8x5_carrier_raised, .write = spcp8x5_write, .set_termios = spcp8x5_set_termios, .ioctl = spcp8x5_ioctl, diff --git a/drivers/usb/serial/symbolserial.c b/drivers/usb/serial/symbolserial.c index 69879e43794..8b07ebc6bae 100644 --- a/drivers/usb/serial/symbolserial.c +++ b/drivers/usb/serial/symbolserial.c @@ -152,8 +152,7 @@ static int symbol_open(struct tty_struct *tty, struct usb_serial_port *port, return result; } -static void symbol_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp) +static void symbol_close(struct usb_serial_port *port) { struct symbol_private *priv = usb_get_serial_data(port->serial); diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c index 0a64bac306e..42cb04c403b 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.c +++ b/drivers/usb/serial/ti_usb_3410_5052.c @@ -100,8 +100,7 @@ static int ti_startup(struct usb_serial *serial); static void ti_shutdown(struct usb_serial *serial); static int ti_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *file); -static void ti_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *file); +static void ti_close(struct usb_serial_port *port); static int ti_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *data, int count); static int ti_write_room(struct tty_struct *tty); @@ -647,8 +646,7 @@ release_lock: } -static void ti_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *file) +static void ti_close(struct usb_serial_port *port) { struct ti_device *tdev; struct ti_port *tport; diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c index f331e2bde88..1967a7edc10 100644 --- a/drivers/usb/serial/usb-serial.c +++ b/drivers/usb/serial/usb-serial.c @@ -238,9 +238,11 @@ static int serial_open (struct tty_struct *tty, struct file *filp) goto bailout_interface_put; mutex_unlock(&serial->disc_mutex); } - mutex_unlock(&port->mutex); - return 0; + /* Now do the correct tty layer semantics */ + retval = tty_port_block_til_ready(&port->port, tty, filp); + if (retval == 0) + return 0; bailout_interface_put: usb_autopm_put_interface(serial->interface); @@ -259,64 +261,89 @@ bailout_serial_put: return retval; } -static void serial_close(struct tty_struct *tty, struct file *filp) +/** + * serial_do_down - shut down hardware + * @port: port to shut down + * + * Shut down a USB port unless it is the console. We never shut down the + * console hardware as it will always be in use. + * + * Don't free any resources at this point + */ +static void serial_do_down(struct usb_serial_port *port) { - struct usb_serial_port *port = tty->driver_data; + struct usb_serial_driver *drv = port->serial->type; struct usb_serial *serial; struct module *owner; - int count; - if (!port) + /* The console is magical, do not hang up the console hardware + or there will be tears */ + if (port->console) return; - dbg("%s - port %d", __func__, port->number); - mutex_lock(&port->mutex); serial = port->serial; owner = serial->type->driver.owner; - if (port->port.count == 0) { - mutex_unlock(&port->mutex); - return; - } - - if (port->port.count == 1) - /* only call the device specific close if this - * port is being closed by the last owner. Ensure we do - * this before we drop the port count. The call is protected - * by the port mutex - */ - serial->type->close(tty, port, filp); - - if (port->port.count == (port->console ? 2 : 1)) { - struct tty_struct *tty = tty_port_tty_get(&port->port); - if (tty) { - /* We must do this before we drop the port count to - zero. */ - if (tty->driver_data) - tty->driver_data = NULL; - tty_port_tty_set(&port->port, NULL); - tty_kref_put(tty); - } - } + if (drv->close) + drv->close(port); - --port->port.count; - count = port->port.count; mutex_unlock(&port->mutex); - put_device(&port->dev); +} + +/** + * serial_do_free - free resources post close/hangup + * @port: port to free up + * + * Do the resource freeing and refcount dropping for the port. We must + * be careful about ordering and we must avoid freeing up the console. + */ +static void serial_do_free(struct usb_serial_port *port) +{ + struct usb_serial *serial; + struct module *owner; + + /* The console is magical, do not hang up the console hardware + or there will be tears */ + if (port->console) + return; + + serial = port->serial; + owner = serial->type->driver.owner; + put_device(&port->dev); /* Mustn't dereference port any more */ - if (count == 0) { - mutex_lock(&serial->disc_mutex); - if (!serial->disconnected) - usb_autopm_put_interface(serial->interface); - mutex_unlock(&serial->disc_mutex); - } + mutex_lock(&serial->disc_mutex); + if (!serial->disconnected) + usb_autopm_put_interface(serial->interface); + mutex_unlock(&serial->disc_mutex); usb_serial_put(serial); - /* Mustn't dereference serial any more */ - if (count == 0) - module_put(owner); + module_put(owner); +} + +static void serial_close(struct tty_struct *tty, struct file *filp) +{ + struct usb_serial_port *port = tty->driver_data; + + dbg("%s - port %d", __func__, port->number); + + + if (tty_port_close_start(&port->port, tty, filp) == 0) + return; + + serial_do_down(port); + tty_port_close_end(&port->port, tty); + tty_port_tty_set(&port->port, NULL); + serial_do_free(port); +} + +static void serial_hangup(struct tty_struct *tty) +{ + struct usb_serial_port *port = tty->driver_data; + serial_do_down(port); + tty_port_hangup(&port->port); + serial_do_free(port); } static int serial_write(struct tty_struct *tty, const unsigned char *buf, @@ -648,6 +675,29 @@ static struct usb_serial_driver *search_serial_device( return NULL; } +static int serial_carrier_raised(struct tty_port *port) +{ + struct usb_serial_port *p = container_of(port, struct usb_serial_port, port); + struct usb_serial_driver *drv = p->serial->type; + if (drv->carrier_raised) + return drv->carrier_raised(p); + /* No carrier control - don't block */ + return 1; +} + +static void serial_dtr_rts(struct tty_port *port, int on) +{ + struct usb_serial_port *p = container_of(port, struct usb_serial_port, port); + struct usb_serial_driver *drv = p->serial->type; + if (drv->dtr_rts) + drv->dtr_rts(p, on); +} + +static const struct tty_port_operations serial_port_ops = { + .carrier_raised = serial_carrier_raised, + .dtr_rts = serial_dtr_rts, +}; + int usb_serial_probe(struct usb_interface *interface, const struct usb_device_id *id) { @@ -841,6 +891,7 @@ int usb_serial_probe(struct usb_interface *interface, if (!port) goto probe_error; tty_port_init(&port->port); + port->port.ops = &serial_port_ops; port->serial = serial; spin_lock_init(&port->lock); mutex_init(&port->mutex); @@ -1071,6 +1122,9 @@ void usb_serial_disconnect(struct usb_interface *interface) if (port) { struct tty_struct *tty = tty_port_tty_get(&port->port); if (tty) { + /* The hangup will occur asynchronously but + the object refcounts will sort out all the + cleanup */ tty_hangup(tty); tty_kref_put(tty); } @@ -1135,6 +1189,7 @@ static const struct tty_operations serial_ops = { .open = serial_open, .close = serial_close, .write = serial_write, + .hangup = serial_hangup, .write_room = serial_write_room, .ioctl = serial_ioctl, .set_termios = serial_set_termios, @@ -1147,6 +1202,7 @@ static const struct tty_operations serial_ops = { .proc_fops = &serial_proc_fops, }; + struct tty_driver *usb_serial_tty_driver; static int __init usb_serial_init(void) diff --git a/drivers/usb/serial/visor.c b/drivers/usb/serial/visor.c index 5ac414bda71..b15f1c0e1d4 100644 --- a/drivers/usb/serial/visor.c +++ b/drivers/usb/serial/visor.c @@ -38,8 +38,7 @@ /* function prototypes for a handspring visor */ static int visor_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void visor_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp); +static void visor_close(struct usb_serial_port *port); static int visor_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); static int visor_write_room(struct tty_struct *tty); @@ -324,8 +323,7 @@ exit: } -static void visor_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void visor_close(struct usb_serial_port *port) { struct visor_private *priv = usb_get_serial_port_data(port); unsigned char *transfer_buffer; diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c index 5335d3211c0..7c7295d09f3 100644 --- a/drivers/usb/serial/whiteheat.c +++ b/drivers/usb/serial/whiteheat.c @@ -147,8 +147,7 @@ static int whiteheat_attach(struct usb_serial *serial); static void whiteheat_shutdown(struct usb_serial *serial); static int whiteheat_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void whiteheat_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); +static void whiteheat_close(struct usb_serial_port *port); static int whiteheat_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); @@ -712,8 +711,7 @@ exit: } -static void whiteheat_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void whiteheat_close(struct usb_serial_port *port) { struct whiteheat_private *info = usb_get_serial_port_data(port); struct whiteheat_urb_wrap *wrap; @@ -723,31 +721,7 @@ static void whiteheat_close(struct tty_struct *tty, dbg("%s - port %d", __func__, port->number); - mutex_lock(&port->serial->disc_mutex); - /* filp is NULL when called from usb_serial_disconnect */ - if ((filp && (tty_hung_up_p(filp))) || port->serial->disconnected) { - mutex_unlock(&port->serial->disc_mutex); - return; - } - mutex_unlock(&port->serial->disc_mutex); - - tty->closing = 1; - -/* - * Not currently in use; tty_wait_until_sent() calls - * serial_chars_in_buffer() which deadlocks on the second semaphore - * acquisition. This should be fixed at some point. Greg's been - * notified. - if ((filp->f_flags & (O_NDELAY | O_NONBLOCK)) == 0) { - tty_wait_until_sent(tty, CLOSING_DELAY); - } -*/ - - tty_driver_flush_buffer(tty); - tty_ldisc_flush(tty); - firm_report_tx_done(port); - firm_close(port); /* shutdown our bulk reads and writes */ @@ -775,10 +749,7 @@ static void whiteheat_close(struct tty_struct *tty, } spin_unlock_irq(&info->lock); mutex_unlock(&info->deathwarrant); - stop_command_port(port->serial); - - tty->closing = 0; } diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index 625e9e4639c..8cdfed738fe 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -224,8 +224,7 @@ struct usb_serial_driver { /* Called by console with tty = NULL and by tty */ int (*open)(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); - void (*close)(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); + void (*close)(struct usb_serial_port *port); int (*write)(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); /* Called only by the tty layer */ @@ -241,6 +240,10 @@ struct usb_serial_driver { int (*tiocmget)(struct tty_struct *tty, struct file *file); int (*tiocmset)(struct tty_struct *tty, struct file *file, unsigned int set, unsigned int clear); + /* Called by the tty layer for port level work. There may or may not + be an attached tty at this point */ + void (*dtr_rts)(struct usb_serial_port *port, int on); + int (*carrier_raised)(struct usb_serial_port *port); /* USB events */ void (*read_int_callback)(struct urb *urb); void (*write_int_callback)(struct urb *urb); @@ -283,8 +286,7 @@ extern int usb_serial_generic_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); extern int usb_serial_generic_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); -extern void usb_serial_generic_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); +extern void usb_serial_generic_close(struct usb_serial_port *port); extern int usb_serial_generic_resume(struct usb_serial *serial); extern int usb_serial_generic_write_room(struct tty_struct *tty); extern int usb_serial_generic_chars_in_buffer(struct tty_struct *tty); -- cgit v1.2.3-70-g09d2 From 97e87f8ebe978e881c7325ba490574bd5500b133 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 11 Jun 2009 12:29:27 +0100 Subject: tty: cyclades, plx9060 casts cleanup Remove ugly all-over-the-code casts of ctl_addr to 9060 space. Add an union to the cyclades_card structure, which contains a pointer to both 9050 and 9060 spaces. The 9050 space layout is unknown, so let it still as a void __iomem pointer. Signed-off-by: Jiri Slaby Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- drivers/char/cyclades.c | 65 ++++++++++++++++++++++++------------------------ include/linux/cyclades.h | 23 +++++++++-------- 2 files changed, 46 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c index ccf68a9e24b..2cbf74134f1 100644 --- a/drivers/char/cyclades.c +++ b/drivers/char/cyclades.c @@ -666,12 +666,10 @@ static void cy_send_xchar(struct tty_struct *tty, char ch); #define IS_CYC_Z(card) ((card).num_chips == (unsigned int)-1) #define Z_FPGA_CHECK(card) \ - ((readl(&((struct RUNTIME_9060 __iomem *) \ - ((card).ctl_addr))->init_ctrl) & (1<<17)) != 0) + ((readl(&(card).ctl_addr.p9060->init_ctrl) & (1<<17)) != 0) -#define ISZLOADED(card) (((ZO_V1 == readl(&((struct RUNTIME_9060 __iomem *) \ - ((card).ctl_addr))->mail_box_0)) || \ - Z_FPGA_CHECK(card)) && \ +#define ISZLOADED(card) (((ZO_V1 == readl(&(card).ctl_addr.p9060->mail_box_0)) \ + || Z_FPGA_CHECK(card)) && \ (ZFIRM_ID == readl(&((struct FIRM_ID __iomem *) \ ((card).base_addr+ID_ADDRESS))->signature))) @@ -1400,14 +1398,12 @@ cyz_fetch_msg(struct cyclades_card *cinfo, zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff); board_ctrl = &zfw_ctrl->board_ctrl; - loc_doorbell = readl(&((struct RUNTIME_9060 __iomem *) - (cinfo->ctl_addr))->loc_doorbell); + loc_doorbell = readl(&cinfo->ctl_addr.p9060->loc_doorbell); if (loc_doorbell) { *cmd = (char)(0xff & loc_doorbell); *channel = readl(&board_ctrl->fwcmd_channel); *param = (__u32) readl(&board_ctrl->fwcmd_param); - cy_writel(&((struct RUNTIME_9060 __iomem *)(cinfo->ctl_addr))-> - loc_doorbell, 0xffffffff); + cy_writel(&cinfo->ctl_addr.p9060->loc_doorbell, 0xffffffff); return 1; } return 0; @@ -1431,8 +1427,7 @@ cyz_issue_cmd(struct cyclades_card *cinfo, board_ctrl = &zfw_ctrl->board_ctrl; index = 0; - pci_doorbell = - &((struct RUNTIME_9060 __iomem *)(cinfo->ctl_addr))->pci_doorbell; + pci_doorbell = &cinfo->ctl_addr.p9060->pci_doorbell; while ((readl(pci_doorbell) & 0xff) != 0) { if (index++ == 1000) return (int)(readl(pci_doorbell) & 0xff); @@ -1635,8 +1630,7 @@ static void cyz_handle_cmd(struct cyclades_card *cinfo) zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff); board_ctrl = &zfw_ctrl->board_ctrl; fw_ver = readl(&board_ctrl->fw_version); - hw_ver = readl(&((struct RUNTIME_9060 __iomem *)(cinfo->ctl_addr))-> - mail_box_0); + hw_ver = readl(&cinfo->ctl_addr.p9060->mail_box_0); while (cyz_fetch_msg(cinfo, &channel, &cmd, ¶m) == 1) { special_count = 0; @@ -2394,8 +2388,8 @@ static int cy_open(struct tty_struct *tty, struct file *filp) struct FIRM_ID __iomem *firm_id = cinfo->base_addr + ID_ADDRESS; if (!ISZLOADED(*cinfo)) { - if (((ZE_V1 == readl(&((struct RUNTIME_9060 __iomem *) - (cinfo->ctl_addr))->mail_box_0)) && + if (((ZE_V1 == readl(&cinfo->ctl_addr.p9060-> + mail_box_0)) && Z_FPGA_CHECK(*cinfo)) && (ZFIRM_HLT == readl( &firm_id->signature))) { @@ -2417,6 +2411,7 @@ static int cy_open(struct tty_struct *tty, struct file *filp) if (!cinfo->intr_enabled) { struct ZFW_CTRL __iomem *zfw_ctrl; struct BOARD_CTRL __iomem *board_ctrl; + u16 intr; zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & @@ -2425,8 +2420,10 @@ static int cy_open(struct tty_struct *tty, struct file *filp) board_ctrl = &zfw_ctrl->board_ctrl; /* Enable interrupts on the PLX chip */ - cy_writew(cinfo->ctl_addr + 0x68, - readw(cinfo->ctl_addr + 0x68) | 0x0900); + intr = readw(&cinfo->ctl_addr.p9060-> + intr_ctrl_stat) | 0x0900; + cy_writew(&cinfo->ctl_addr.p9060-> + intr_ctrl_stat, intr); /* Enable interrupts on the FW */ retval = cyz_issue_cmd(cinfo, 0, C_CM_IRQ_ENBL, 0L); @@ -4347,8 +4344,7 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo) spin_lock_init(&cinfo->card_lock); if (IS_CYC_Z(*cinfo)) { /* Cyclades-Z */ - mailbox = readl(&((struct RUNTIME_9060 __iomem *) - cinfo->ctl_addr)->mail_box_0); + mailbox = readl(&cinfo->ctl_addr.p9060->mail_box_0); nports = (mailbox == ZE_V1) ? ZE_V1_NPORTS : 8; cinfo->intr_enabled = 0; cinfo->nports = 0; /* Will be correctly set later, after @@ -4613,7 +4609,7 @@ static int __init cy_detect_isa(void) /* set cy_card */ cy_card[j].base_addr = cy_isa_address; - cy_card[j].ctl_addr = NULL; + cy_card[j].ctl_addr.p9050 = NULL; cy_card[j].irq = (int)cy_isa_irq; cy_card[j].bus_index = 0; cy_card[j].first_line = cy_next_channel; @@ -5013,7 +5009,8 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev, } /* Disable interrupts on the PLX before resetting it */ - cy_writew(addr0 + 0x68, readw(addr0 + 0x68) & ~0x0900); + cy_writew(&ctl_addr->intr_ctrl_stat, + readw(&ctl_addr->intr_ctrl_stat) & ~0x0900); plx_init(pdev, irq, addr0); @@ -5109,7 +5106,7 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev, /* set cy_card */ cy_card[card_no].base_addr = addr2; - cy_card[card_no].ctl_addr = addr0; + cy_card[card_no].ctl_addr.p9050 = addr0; cy_card[card_no].irq = irq; cy_card[card_no].bus_index = 1; cy_card[card_no].first_line = cy_next_channel; @@ -5125,17 +5122,20 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev, plx_ver = readb(addr2 + CyPLX_VER) & 0x0f; switch (plx_ver) { case PLX_9050: - cy_writeb(addr0 + 0x4c, 0x43); break; case PLX_9060: case PLX_9080: default: /* Old boards, use PLX_9060 */ - plx_init(pdev, irq, addr0); - cy_writew(addr0 + 0x68, readw(addr0 + 0x68) | 0x0900); + { + struct RUNTIME_9060 __iomem *ctl_addr = addr0; + plx_init(pdev, irq, ctl_addr); + cy_writew(&ctl_addr->intr_ctrl_stat, + readw(&ctl_addr->intr_ctrl_stat) | 0x0900); break; } + } } dev_info(&pdev->dev, "%s/PCI #%d found: %d channels starting from " @@ -5168,17 +5168,18 @@ static void __devexit cy_pci_remove(struct pci_dev *pdev) /* non-Z with old PLX */ if (!IS_CYC_Z(*cinfo) && (readb(cinfo->base_addr + CyPLX_VER) & 0x0f) == PLX_9050) - cy_writeb(cinfo->ctl_addr + 0x4c, 0); + cy_writeb(cinfo->ctl_addr.p9050 + 0x4c, 0); else #ifndef CONFIG_CYZ_INTR if (!IS_CYC_Z(*cinfo)) #endif - cy_writew(cinfo->ctl_addr + 0x68, - readw(cinfo->ctl_addr + 0x68) & ~0x0900); + cy_writew(&cinfo->ctl_addr.p9060->intr_ctrl_stat, + readw(&cinfo->ctl_addr.p9060->intr_ctrl_stat) & + ~0x0900); iounmap(cinfo->base_addr); - if (cinfo->ctl_addr) - iounmap(cinfo->ctl_addr); + if (cinfo->ctl_addr.p9050) + iounmap(cinfo->ctl_addr.p9050); if (cinfo->irq #ifndef CONFIG_CYZ_INTR && !IS_CYC_Z(*cinfo) @@ -5373,8 +5374,8 @@ static void __exit cy_cleanup_module(void) /* clear interrupt */ cy_writeb(card->base_addr + Cy_ClrIntr, 0); iounmap(card->base_addr); - if (card->ctl_addr) - iounmap(card->ctl_addr); + if (card->ctl_addr.p9050) + iounmap(card->ctl_addr.p9050); if (card->irq #ifndef CONFIG_CYZ_INTR && !IS_CYC_Z(*card) diff --git a/include/linux/cyclades.h b/include/linux/cyclades.h index 788850ba4e7..9ae03d5b359 100644 --- a/include/linux/cyclades.h +++ b/include/linux/cyclades.h @@ -507,16 +507,19 @@ struct ZFW_CTRL { /* Per card data structure */ struct cyclades_card { - void __iomem *base_addr; - void __iomem *ctl_addr; - int irq; - unsigned int num_chips; /* 0 if card absent, -1 if Z/PCI, else Y */ - unsigned int first_line; /* minor number of first channel on card */ - unsigned int nports; /* Number of ports in the card */ - int bus_index; /* address shift - 0 for ISA, 1 for PCI */ - int intr_enabled; /* FW Interrupt flag - 0 disabled, 1 enabled */ - spinlock_t card_lock; - struct cyclades_port *ports; + void __iomem *base_addr; + union { + void __iomem *p9050; + struct RUNTIME_9060 __iomem *p9060; + } ctl_addr; + int irq; + unsigned int num_chips; /* 0 if card absent, -1 if Z/PCI, else Y */ + unsigned int first_line; /* minor number of first channel on card */ + unsigned int nports; /* Number of ports in the card */ + int bus_index; /* address shift - 0 for ISA, 1 for PCI */ + int intr_enabled; /* FW Interrupt flag - 0 disabled, 1 enabled */ + spinlock_t card_lock; + struct cyclades_port *ports; }; /*************************************** -- cgit v1.2.3-70-g09d2 From 101b81590d8df0a74c33cf739886247c0a13f4af Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 11 Jun 2009 12:30:10 +0100 Subject: tty: cyclades, cache HW version Store HW version locally to not read it all the time in interrupts and alike. Signed-off-by: Jiri Slaby Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- drivers/char/cyclades.c | 32 +++++++++++--------------------- include/linux/cyclades.h | 1 + 2 files changed, 12 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c index 2cbf74134f1..cf191cc1c92 100644 --- a/drivers/char/cyclades.c +++ b/drivers/char/cyclades.c @@ -668,8 +668,7 @@ static void cy_send_xchar(struct tty_struct *tty, char ch); #define Z_FPGA_CHECK(card) \ ((readl(&(card).ctl_addr.p9060->init_ctrl) & (1<<17)) != 0) -#define ISZLOADED(card) (((ZO_V1 == readl(&(card).ctl_addr.p9060->mail_box_0)) \ - || Z_FPGA_CHECK(card)) && \ +#define ISZLOADED(card) ((ZO_V1 == (card).hw_ver || Z_FPGA_CHECK(card)) && \ (ZFIRM_ID == readl(&((struct FIRM_ID __iomem *) \ ((card).base_addr+ID_ADDRESS))->signature))) @@ -1393,8 +1392,6 @@ cyz_fetch_msg(struct cyclades_card *cinfo, unsigned long loc_doorbell; firm_id = cinfo->base_addr + ID_ADDRESS; - if (!ISZLOADED(*cinfo)) - return -1; zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff); board_ctrl = &zfw_ctrl->board_ctrl; @@ -1619,10 +1616,8 @@ static void cyz_handle_cmd(struct cyclades_card *cinfo) static struct BOARD_CTRL __iomem *board_ctrl; static struct CH_CTRL __iomem *ch_ctrl; static struct BUF_CTRL __iomem *buf_ctrl; - __u32 channel; + __u32 channel, param, fw_ver; __u8 cmd; - __u32 param; - __u32 hw_ver, fw_ver; int special_count; int delta_count; @@ -1630,7 +1625,6 @@ static void cyz_handle_cmd(struct cyclades_card *cinfo) zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff); board_ctrl = &zfw_ctrl->board_ctrl; fw_ver = readl(&board_ctrl->fw_version); - hw_ver = readl(&cinfo->ctl_addr.p9060->mail_box_0); while (cyz_fetch_msg(cinfo, &channel, &cmd, ¶m) == 1) { special_count = 0; @@ -2388,11 +2382,9 @@ static int cy_open(struct tty_struct *tty, struct file *filp) struct FIRM_ID __iomem *firm_id = cinfo->base_addr + ID_ADDRESS; if (!ISZLOADED(*cinfo)) { - if (((ZE_V1 == readl(&cinfo->ctl_addr.p9060-> - mail_box_0)) && - Z_FPGA_CHECK(*cinfo)) && - (ZFIRM_HLT == readl( - &firm_id->signature))) { + if (cinfo->hw_ver == ZE_V1 && Z_FPGA_CHECK(*cinfo) && + readl(&firm_id->signature) == + ZFIRM_HLT) { printk(KERN_ERR "cyc:Cyclades-Z Error: you " "need an external power supply for " "this number of ports.\nFirmware " @@ -4336,7 +4328,6 @@ static void cy_hangup(struct tty_struct *tty) static int __devinit cy_init_card(struct cyclades_card *cinfo) { struct cyclades_port *info; - u32 uninitialized_var(mailbox); unsigned int nports, port; unsigned short chip_number; int uninitialized_var(index); @@ -4344,8 +4335,7 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo) spin_lock_init(&cinfo->card_lock); if (IS_CYC_Z(*cinfo)) { /* Cyclades-Z */ - mailbox = readl(&cinfo->ctl_addr.p9060->mail_box_0); - nports = (mailbox == ZE_V1) ? ZE_V1_NPORTS : 8; + nports = (cinfo->hw_ver == ZE_V1) ? ZE_V1_NPORTS : 8; cinfo->intr_enabled = 0; cinfo->nports = 0; /* Will be correctly set later, after Z FW is loaded */ @@ -4377,7 +4367,7 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo) if (IS_CYC_Z(*cinfo)) { info->type = PORT_STARTECH; - if (mailbox == ZO_V1) + if (cinfo->hw_ver == ZO_V1) info->xmit_fifo_size = CYZ_FIFO_SIZE; else info->xmit_fifo_size = 4 * CYZ_FIFO_SIZE; @@ -4932,7 +4922,7 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev, { void __iomem *addr0 = NULL, *addr2 = NULL; char *card_name = NULL; - u32 mailbox; + u32 uninitialized_var(mailbox); unsigned int device_id, nchan = 0, card_no, i; unsigned char plx_ver; int retval, irq; @@ -5014,7 +5004,7 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev, plx_init(pdev, irq, addr0); - mailbox = (u32)readl(&ctl_addr->mail_box_0); + mailbox = readl(&ctl_addr->mail_box_0); addr2 = ioremap_nocache(pci_resource_start(pdev, 2), mailbox == ZE_V1 ? CyPCI_Ze_win : CyPCI_Zwin); @@ -5026,7 +5016,6 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev, if (mailbox == ZE_V1) { card_name = "Cyclades-Ze"; - readl(&ctl_addr->mail_box_0); nchan = ZE_V1_NPORTS; } else { card_name = "Cyclades-8Zo"; @@ -5089,6 +5078,8 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev, } cy_card[card_no].num_chips = nchan / 4; } else { + cy_card[card_no].hw_ver = mailbox; + cy_card[card_no].num_chips = (unsigned int)-1; #ifdef CONFIG_CYZ_INTR /* allocate IRQ only if board has an IRQ */ if (irq != 0 && irq != 255) { @@ -5101,7 +5092,6 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev, } } #endif /* CONFIG_CYZ_INTR */ - cy_card[card_no].num_chips = (unsigned int)-1; } /* set cy_card */ diff --git a/include/linux/cyclades.h b/include/linux/cyclades.h index 9ae03d5b359..a14fe307976 100644 --- a/include/linux/cyclades.h +++ b/include/linux/cyclades.h @@ -518,6 +518,7 @@ struct cyclades_card { unsigned int nports; /* Number of ports in the card */ int bus_index; /* address shift - 0 for ISA, 1 for PCI */ int intr_enabled; /* FW Interrupt flag - 0 disabled, 1 enabled */ + u32 hw_ver; spinlock_t card_lock; struct cyclades_port *ports; }; -- cgit v1.2.3-70-g09d2 From 08a951e1693e324bd035b194002a82b9057fd9c5 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 11 Jun 2009 12:33:40 +0100 Subject: tty: cyclades, remove typedefs They are unused anyway. Signed-off-by: Jiri Slaby Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/cyclades.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include') diff --git a/include/linux/cyclades.h b/include/linux/cyclades.h index a14fe307976..1fbdea4f08e 100644 --- a/include/linux/cyclades.h +++ b/include/linux/cyclades.h @@ -142,19 +142,6 @@ struct CYZ_BOOT_CTRL { #ifndef DP_WINDOW_SIZE -/* #include "cyclomz.h" */ -/****************** ****************** *******************/ -/* - * The data types defined below are used in all ZFIRM interface - * data structures. They accomodate differences between HW - * architectures and compilers. - */ - -typedef __u64 ucdouble; /* 64 bits, unsigned */ -typedef __u32 uclong; /* 32 bits, unsigned */ -typedef __u16 ucshort; /* 16 bits, unsigned */ -typedef __u8 ucchar; /* 8 bits, unsigned */ - /* * Memory Window Sizes */ -- cgit v1.2.3-70-g09d2 From 70beaed22cbe12979e55d99b370e147e2e168562 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 11 Jun 2009 12:39:12 +0100 Subject: serial: refactor ASYNC_ flags Define ASYNCB_* flags which are bit numbers of the ASYNC_* flags. This is useful for {test,set,clear}_bit. Also convert each ASYNC_% to be (1 << ASYNCB_%) and define masks with the macros, not constants. Tested with: #include "PATH_TO_KERNEL/include/linux/serial.h" static struct { unsigned int new, old; } as[] = { { ASYNC_HUP_NOTIFY, 0x0001 }, { ASYNC_FOURPORT, 0x0002 }, ... { ASYNC_BOOT_ONLYMCA, 0x00400000 }, { ASYNC_INTERNAL_FLAGS, 0xFFC00000 } }; ... for (a = 0; a < ARRAY_SIZE(as); a++) if (as[a].old != as[a].new) printf("%.8x != %.8x\n", as[a].old, as[a].new); Signed-off-by: Jiri Slaby Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/serial.h | 116 +++++++++++++++++++++++++++++-------------------- 1 file changed, 69 insertions(+), 47 deletions(-) (limited to 'include') diff --git a/include/linux/serial.h b/include/linux/serial.h index 9136cc5608c..e5bb75a6380 100644 --- a/include/linux/serial.h +++ b/include/linux/serial.h @@ -96,54 +96,76 @@ struct serial_uart_config { /* * Definitions for async_struct (and serial_struct) flags field + * + * Define ASYNCB_* for convenient use with {test,set,clear}_bit. */ -#define ASYNC_HUP_NOTIFY 0x0001 /* Notify getty on hangups and closes - on the callout port */ -#define ASYNC_FOURPORT 0x0002 /* Set OU1, OUT2 per AST Fourport settings */ -#define ASYNC_SAK 0x0004 /* Secure Attention Key (Orange book) */ -#define ASYNC_SPLIT_TERMIOS 0x0008 /* Separate termios for dialin/callout */ - -#define ASYNC_SPD_MASK 0x1030 -#define ASYNC_SPD_HI 0x0010 /* Use 56000 instead of 38400 bps */ - -#define ASYNC_SPD_VHI 0x0020 /* Use 115200 instead of 38400 bps */ -#define ASYNC_SPD_CUST 0x0030 /* Use user-specified divisor */ - -#define ASYNC_SKIP_TEST 0x0040 /* Skip UART test during autoconfiguration */ -#define ASYNC_AUTO_IRQ 0x0080 /* Do automatic IRQ during autoconfiguration */ -#define ASYNC_SESSION_LOCKOUT 0x0100 /* Lock out cua opens based on session */ -#define ASYNC_PGRP_LOCKOUT 0x0200 /* Lock out cua opens based on pgrp */ -#define ASYNC_CALLOUT_NOHUP 0x0400 /* Don't do hangups for cua device */ - -#define ASYNC_HARDPPS_CD 0x0800 /* Call hardpps when CD goes high */ - -#define ASYNC_SPD_SHI 0x1000 /* Use 230400 instead of 38400 bps */ -#define ASYNC_SPD_WARP 0x1010 /* Use 460800 instead of 38400 bps */ - -#define ASYNC_LOW_LATENCY 0x2000 /* Request low latency behaviour */ - -#define ASYNC_BUGGY_UART 0x4000 /* This is a buggy UART, skip some safety - * checks. Note: can be dangerous! */ - -#define ASYNC_AUTOPROBE 0x8000 /* Port was autoprobed by PCI or PNP code */ - -#define ASYNC_FLAGS 0x7FFF /* Possible legal async flags */ -#define ASYNC_USR_MASK 0x3430 /* Legal flags that non-privileged - * users can set or reset */ - -/* Internal flags used only by kernel/chr_drv/serial.c */ -#define ASYNC_INITIALIZED 0x80000000 /* Serial port was initialized */ -#define ASYNC_NORMAL_ACTIVE 0x20000000 /* Normal device is active */ -#define ASYNC_BOOT_AUTOCONF 0x10000000 /* Autoconfigure port on bootup */ -#define ASYNC_CLOSING 0x08000000 /* Serial port is closing */ -#define ASYNC_CTS_FLOW 0x04000000 /* Do CTS flow control */ -#define ASYNC_CHECK_CD 0x02000000 /* i.e., CLOCAL */ -#define ASYNC_SHARE_IRQ 0x01000000 /* for multifunction cards - --- no longer used */ -#define ASYNC_CONS_FLOW 0x00800000 /* flow control for console */ - -#define ASYNC_BOOT_ONLYMCA 0x00400000 /* Probe only if MCA bus */ -#define ASYNC_INTERNAL_FLAGS 0xFFC00000 /* Internal flags */ +#define ASYNCB_HUP_NOTIFY 0 /* Notify getty on hangups and closes + * on the callout port */ +#define ASYNCB_FOURPORT 1 /* Set OU1, OUT2 per AST Fourport settings */ +#define ASYNCB_SAK 2 /* Secure Attention Key (Orange book) */ +#define ASYNCB_SPLIT_TERMIOS 3 /* Separate termios for dialin/callout */ +#define ASYNCB_SPD_HI 4 /* Use 56000 instead of 38400 bps */ +#define ASYNCB_SPD_VHI 5 /* Use 115200 instead of 38400 bps */ +#define ASYNCB_SKIP_TEST 6 /* Skip UART test during autoconfiguration */ +#define ASYNCB_AUTO_IRQ 7 /* Do automatic IRQ during + * autoconfiguration */ +#define ASYNCB_SESSION_LOCKOUT 8 /* Lock out cua opens based on session */ +#define ASYNCB_PGRP_LOCKOUT 9 /* Lock out cua opens based on pgrp */ +#define ASYNCB_CALLOUT_NOHUP 10 /* Don't do hangups for cua device */ +#define ASYNCB_HARDPPS_CD 11 /* Call hardpps when CD goes high */ +#define ASYNCB_SPD_SHI 12 /* Use 230400 instead of 38400 bps */ +#define ASYNCB_LOW_LATENCY 13 /* Request low latency behaviour */ +#define ASYNCB_BUGGY_UART 14 /* This is a buggy UART, skip some safety + * checks. Note: can be dangerous! */ +#define ASYNCB_AUTOPROBE 15 /* Port was autoprobed by PCI or PNP code */ +#define ASYNCB_LAST_USER 15 + +/* Internal flags used only by kernel */ +#define ASYNCB_INITIALIZED 31 /* Serial port was initialized */ +#define ASYNCB_NORMAL_ACTIVE 29 /* Normal device is active */ +#define ASYNCB_BOOT_AUTOCONF 28 /* Autoconfigure port on bootup */ +#define ASYNCB_CLOSING 27 /* Serial port is closing */ +#define ASYNCB_CTS_FLOW 26 /* Do CTS flow control */ +#define ASYNCB_CHECK_CD 25 /* i.e., CLOCAL */ +#define ASYNCB_SHARE_IRQ 24 /* for multifunction cards, no longer used */ +#define ASYNCB_CONS_FLOW 23 /* flow control for console */ +#define ASYNCB_BOOT_ONLYMCA 22 /* Probe only if MCA bus */ +#define ASYNCB_FIRST_KERNEL 22 + +#define ASYNC_HUP_NOTIFY (1U << ASYNCB_HUP_NOTIFY) +#define ASYNC_FOURPORT (1U << ASYNCB_FOURPORT) +#define ASYNC_SAK (1U << ASYNCB_SAK) +#define ASYNC_SPLIT_TERMIOS (1U << ASYNCB_SPLIT_TERMIOS) +#define ASYNC_SPD_HI (1U << ASYNCB_SPD_HI) +#define ASYNC_SPD_VHI (1U << ASYNCB_SPD_VHI) +#define ASYNC_SKIP_TEST (1U << ASYNCB_SKIP_TEST) +#define ASYNC_AUTO_IRQ (1U << ASYNCB_AUTO_IRQ) +#define ASYNC_SESSION_LOCKOUT (1U << ASYNCB_SESSION_LOCKOUT) +#define ASYNC_PGRP_LOCKOUT (1U << ASYNCB_PGRP_LOCKOUT) +#define ASYNC_CALLOUT_NOHUP (1U << ASYNCB_CALLOUT_NOHUP) +#define ASYNC_HARDPPS_CD (1U << ASYNCB_HARDPPS_CD) +#define ASYNC_SPD_SHI (1U << ASYNCB_SPD_SHI) +#define ASYNC_LOW_LATENCY (1U << ASYNCB_LOW_LATENCY) +#define ASYNC_BUGGY_UART (1U << ASYNCB_BUGGY_UART) +#define ASYNC_AUTOPROBE (1U << ASYNCB_AUTOPROBE) + +#define ASYNC_FLAGS ((1U << ASYNCB_LAST_USER) - 1) +#define ASYNC_USR_MASK (ASYNC_SPD_HI|ASYNC_SPD_VHI| \ + ASYNC_CALLOUT_NOHUP|ASYNC_SPD_SHI|ASYNC_LOW_LATENCY) +#define ASYNC_SPD_CUST (ASYNC_SPD_HI|ASYNC_SPD_VHI) +#define ASYNC_SPD_WARP (ASYNC_SPD_HI|ASYNC_SPD_SHI) +#define ASYNC_SPD_MASK (ASYNC_SPD_HI|ASYNC_SPD_VHI|ASYNC_SPD_SHI) + +#define ASYNC_INITIALIZED (1U << ASYNCB_INITIALIZED) +#define ASYNC_NORMAL_ACTIVE (1U << ASYNCB_NORMAL_ACTIVE) +#define ASYNC_BOOT_AUTOCONF (1U << ASYNCB_BOOT_AUTOCONF) +#define ASYNC_CLOSING (1U << ASYNCB_CLOSING) +#define ASYNC_CTS_FLOW (1U << ASYNCB_CTS_FLOW) +#define ASYNC_CHECK_CD (1U << ASYNCB_CHECK_CD) +#define ASYNC_SHARE_IRQ (1U << ASYNCB_SHARE_IRQ) +#define ASYNC_CONS_FLOW (1U << ASYNCB_CONS_FLOW) +#define ASYNC_BOOT_ONLYMCA (1U << ASYNCB_BOOT_ONLYMCA) +#define ASYNC_INTERNAL_FLAGS (~((1U << ASYNCB_FIRST_KERNEL) - 1)) /* * Multiport serial configuration structure --- external structure -- cgit v1.2.3-70-g09d2 From 70fd8fdecc4430ffcede7704dd812d4054d1faf9 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Thu, 11 Jun 2009 12:41:57 +0100 Subject: 8250_pci: add the OXCB950 chip to the 8250 PCI driver. This adds support for the following serial controller chip: Oxford Semiconductor OXCB950 for PCI Cardbus interface http://www.transdimension.com/products/serial/OXCB950.html on this card: ExSys EX-1370 1 port high-speed serial card for ExpressCard/34 slot Signed-off-by: Andre Przywara Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- drivers/serial/8250_pci.c | 3 +++ include/linux/pci_ids.h | 2 ++ 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c index 938bc1b6c3f..e371a9c1534 100644 --- a/drivers/serial/8250_pci.c +++ b/drivers/serial/8250_pci.c @@ -2776,6 +2776,9 @@ static struct pci_device_id serial_pci_tbl[] = { { PCI_VENDOR_ID_OXSEMI, 0x950a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_b0_2_1130000 }, + { PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_C950, + PCI_VENDOR_ID_OXSEMI, PCI_SUBDEVICE_ID_OXSEMI_C950, 0, 0, + pbn_b0_1_921600 }, { PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_16PCI954, PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_b0_4_115200 }, diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 0f71812d67d..d7d1c41a0b1 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1996,10 +1996,12 @@ #define PCI_DEVICE_ID_OXSEMI_PCIe952_1_U 0xC118 #define PCI_DEVICE_ID_OXSEMI_PCIe952_1_GU 0xC11C #define PCI_DEVICE_ID_OXSEMI_16PCI954 0x9501 +#define PCI_DEVICE_ID_OXSEMI_C950 0x950B #define PCI_DEVICE_ID_OXSEMI_16PCI95N 0x9511 #define PCI_DEVICE_ID_OXSEMI_16PCI954PP 0x9513 #define PCI_DEVICE_ID_OXSEMI_16PCI952 0x9521 #define PCI_DEVICE_ID_OXSEMI_16PCI952PP 0x9523 +#define PCI_SUBDEVICE_ID_OXSEMI_C950 0x0001 #define PCI_VENDOR_ID_CHELSIO 0x1425 -- cgit v1.2.3-70-g09d2 From 38db89799bdf11625a831c5af33938dcb11908b6 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 11 Jun 2009 12:44:17 +0100 Subject: tty: throttling race fix The tty throttling code can race due to the lock drops. It takes very high loads but this has been observed and verified by Rob Duncan. The basic problem is that on an SMP box we can go CPU #1 CPU #2 need to throttle ? suppose we should buffer space cleared are we throttled yes ? - unthrottle call throttle method This changeet take the termios lock to protect against this. The termios lock isn't the initial obvious candidate but many implementations of throttle methods already need to poke around their own termios structures (and nobody really locks them against a racing change of flow control). This does mean that anyone who is setting tty->low_latency = 1 and then calling tty_flip_buffer_push from their unthrottle method is going to end up collapsing in a pile of locks. However we've removed all the known bogus users of low_latency = 1 and such use isn't safe anyway for other reasons so catching it would be an improvement. Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- drivers/char/tty_ioctl.c | 13 +++++++++++++ include/linux/tty_driver.h | 6 ++++-- 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/char/tty_ioctl.c b/drivers/char/tty_ioctl.c index 6f4c7d0a53b..2401dbcbee9 100644 --- a/drivers/char/tty_ioctl.c +++ b/drivers/char/tty_ioctl.c @@ -97,14 +97,19 @@ EXPORT_SYMBOL(tty_driver_flush_buffer); * @tty: terminal * * Indicate that a tty should stop transmitting data down the stack. + * Takes the termios mutex to protect against parallel throttle/unthrottle + * and also to ensure the driver can consistently reference its own + * termios data at this point when implementing software flow control. */ void tty_throttle(struct tty_struct *tty) { + mutex_lock(&tty->termios_mutex); /* check TTY_THROTTLED first so it indicates our state */ if (!test_and_set_bit(TTY_THROTTLED, &tty->flags) && tty->ops->throttle) tty->ops->throttle(tty); + mutex_unlock(&tty->termios_mutex); } EXPORT_SYMBOL(tty_throttle); @@ -113,13 +118,21 @@ EXPORT_SYMBOL(tty_throttle); * @tty: terminal * * Indicate that a tty may continue transmitting data down the stack. + * Takes the termios mutex to protect against parallel throttle/unthrottle + * and also to ensure the driver can consistently reference its own + * termios data at this point when implementing software flow control. + * + * Drivers should however remember that the stack can issue a throttle, + * then change flow control method, then unthrottle. */ void tty_unthrottle(struct tty_struct *tty) { + mutex_lock(&tty->termios_mutex); if (test_and_clear_bit(TTY_THROTTLED, &tty->flags) && tty->ops->unthrottle) tty->ops->unthrottle(tty); + mutex_unlock(&tty->termios_mutex); } EXPORT_SYMBOL(tty_unthrottle); diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index bcba84ea2d8..3566129384a 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -127,7 +127,8 @@ * the line discipline are close to full, and it should somehow * signal that no more characters should be sent to the tty. * - * Optional: Always invoke via tty_throttle(); + * Optional: Always invoke via tty_throttle(), called under the + * termios lock. * * void (*unthrottle)(struct tty_struct * tty); * @@ -135,7 +136,8 @@ * that characters can now be sent to the tty without fear of * overrunning the input buffers of the line disciplines. * - * Optional: Always invoke via tty_unthrottle(); + * Optional: Always invoke via tty_unthrottle(), called under the + * termios lock. * * void (*stop)(struct tty_struct *tty); * -- cgit v1.2.3-70-g09d2 From e8b70e7d3e86319a8b2aaabde3866833d92cd80f Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 11 Jun 2009 12:48:02 +0100 Subject: tty: Extract various bits of ldisc code Before trying to tackle the ldisc bugs the code needs to be a good deal more readable, so do the simple extractions of routines first. Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- drivers/char/tty_io.c | 24 +++++++++--- drivers/char/tty_ldisc.c | 97 ++++++++++++++++++++++++++++++++---------------- include/linux/tty.h | 3 ++ 3 files changed, 88 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 6c817398232..be49d0730bb 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -2481,6 +2481,24 @@ static int tty_tiocmset(struct tty_struct *tty, struct file *file, unsigned int return tty->ops->tiocmset(tty, file, set, clear); } +struct tty_struct *tty_pair_get_tty(struct tty_struct *tty) +{ + if (tty->driver->type == TTY_DRIVER_TYPE_PTY && + tty->driver->subtype == PTY_TYPE_MASTER) + tty = tty->link; + return tty; +} +EXPORT_SYMBOL(tty_pair_get_tty); + +struct tty_struct *tty_pair_get_pty(struct tty_struct *tty) +{ + if (tty->driver->type == TTY_DRIVER_TYPE_PTY && + tty->driver->subtype == PTY_TYPE_MASTER) + return tty; + return tty->link; +} +EXPORT_SYMBOL(tty_pair_get_pty); + /* * Split this up, as gcc can choke on it otherwise.. */ @@ -2496,11 +2514,7 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (tty_paranoia_check(tty, inode, "tty_ioctl")) return -EINVAL; - real_tty = tty; - if (tty->driver->type == TTY_DRIVER_TYPE_PTY && - tty->driver->subtype == PTY_TYPE_MASTER) - real_tty = tty->link; - + real_tty = tty_pair_get_tty(tty); /* * Factor out some common prep work diff --git a/drivers/char/tty_ldisc.c b/drivers/char/tty_ldisc.c index f78f5b0127a..e3c6416aa86 100644 --- a/drivers/char/tty_ldisc.c +++ b/drivers/char/tty_ldisc.c @@ -443,6 +443,50 @@ static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old) } } +/** + * tty_ldisc_halt - shutdown the line discipline + * @tty: tty device + * + * Shut down the line discipline and work queue for this tty device. + * The TTY_LDISC flag being cleared ensures no further references can + * be obtained while the delayed work queue halt ensures that no more + * data is fed to the ldisc. + * + * In order to wait for any existing references to complete see + * tty_ldisc_wait_idle. + */ + +static void tty_ldisc_halt(struct tty_struct *tty) +{ + clear_bit(TTY_LDISC, &tty->flags); + cancel_delayed_work(&tty->buf.work); + /* + * Wait for ->hangup_work and ->buf.work handlers to terminate + */ + flush_scheduled_work(); +} + +/** + * tty_ldisc_wait_idle - wait for the ldisc to become idle + * @tty: tty to wait for + * + * Wait for the line discipline to become idle. The discipline must + * have been halted for this to guarantee it remains idle. + * + */ + +static void tty_ldisc_wait_idle(struct tty_struct *tty) +{ + unsigned long flags; + spin_lock_irqsave(&tty_ldisc_lock, flags); + while (tty->ldisc.refcount) { + spin_unlock_irqrestore(&tty_ldisc_lock, flags); + wait_event(tty_ldisc_wait, tty->ldisc.refcount == 0); + spin_lock_irqsave(&tty_ldisc_lock, flags); + } + spin_unlock_irqrestore(&tty_ldisc_lock, flags); +} + /** * tty_set_ldisc - set line discipline * @tty: the terminal to set @@ -636,6 +680,21 @@ int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty) return 0; } +static void tty_ldisc_reinit(struct tty_struct *tty) +{ + struct tty_ldisc ld; + + if (tty->ldisc.ops->close) + (tty->ldisc.ops->close)(tty); + tty_ldisc_put(tty->ldisc.ops); + /* + * Switch the line discipline back + */ + WARN_ON(tty_ldisc_get(N_TTY, &ld)); + tty_ldisc_assign(tty, &ld); + tty_set_termios_ldisc(tty, N_TTY); +} + /** * tty_ldisc_release - release line discipline * @tty: tty being shut down @@ -647,58 +706,34 @@ int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty) void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty) { - unsigned long flags; - struct tty_ldisc ld; + /* * Prevent flush_to_ldisc() from rescheduling the work for later. Then * kill any delayed work. As this is the final close it does not * race with the set_ldisc code path. */ - clear_bit(TTY_LDISC, &tty->flags); - cancel_delayed_work(&tty->buf.work); - /* - * Wait for ->hangup_work and ->buf.work handlers to terminate - */ - - flush_scheduled_work(); + tty_ldisc_halt(tty); /* * Wait for any short term users (we know they are just driver * side waiters as the file is closing so user count on the file * side is zero. */ - spin_lock_irqsave(&tty_ldisc_lock, flags); - while (tty->ldisc.refcount) { - spin_unlock_irqrestore(&tty_ldisc_lock, flags); - wait_event(tty_ldisc_wait, tty->ldisc.refcount == 0); - spin_lock_irqsave(&tty_ldisc_lock, flags); - } - spin_unlock_irqrestore(&tty_ldisc_lock, flags); + + tty_ldisc_wait_idle(tty); + /* * Shutdown the current line discipline, and reset it to N_TTY. * * FIXME: this MUST get fixed for the new reflocking */ - if (tty->ldisc.ops->close) - (tty->ldisc.ops->close)(tty); - tty_ldisc_put(tty->ldisc.ops); - /* - * Switch the line discipline back - */ - WARN_ON(tty_ldisc_get(N_TTY, &ld)); - tty_ldisc_assign(tty, &ld); - tty_set_termios_ldisc(tty, N_TTY); + tty_ldisc_reinit(tty); if (o_tty) { /* FIXME: could o_tty be in setldisc here ? */ clear_bit(TTY_LDISC, &o_tty->flags); - if (o_tty->ldisc.ops->close) - (o_tty->ldisc.ops->close)(o_tty); - tty_ldisc_put(o_tty->ldisc.ops); - WARN_ON(tty_ldisc_get(N_TTY, &ld)); - tty_ldisc_assign(o_tty, &ld); - tty_set_termios_ldisc(o_tty, N_TTY); + tty_ldisc_reinit(o_tty); } } diff --git a/include/linux/tty.h b/include/linux/tty.h index bed5a3d4030..f9c13c83790 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -428,6 +428,9 @@ extern struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx, extern void tty_release_dev(struct file *filp); extern int tty_init_termios(struct tty_struct *tty); +extern struct tty_struct *tty_pair_get_tty(struct tty_struct *tty); +extern struct tty_struct *tty_pair_get_pty(struct tty_struct *tty); + extern struct mutex tty_mutex; extern void tty_write_unlock(struct tty_struct *tty); -- cgit v1.2.3-70-g09d2 From c65c9bc3efa5589f691276bb9db689119a711222 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 11 Jun 2009 12:50:12 +0100 Subject: tty: rewrite the ldisc locking There are several pretty much unfixable races in the old ldisc code, especially with respect to pty behaviour and also to hangup. It's easier to rewrite the code than simply try and patch it up. This patch - splits the ldisc from the tty (so we will be able to refcount it more cleanly later) - introduces a mutex lock for ldisc changing on an active device - fixes the complete mess that hangup caused - implements hopefully correct setldisc/close/hangup locking There are still some problems around pty pairs that have always been there but at least it is now possible to understand the code and fix further problems. This fixes the following known bugs - hang up can leak ldisc references - hang up may not call open/close on ldisc in a matched way - pty/tty pairs can deadlock during an ldisc change - reading the ldisc proc files can cause every ldisc to be loaded and probably a few other of the mysterious ldisc race reports. I'm sure it also adds the odd new one. Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- drivers/bluetooth/hci_ldisc.c | 4 +- drivers/char/cyclades.c | 2 +- drivers/char/epca.c | 4 +- drivers/char/ip2/i2lib.c | 4 +- drivers/char/ip2/ip2main.c | 4 +- drivers/char/n_hdlc.c | 4 +- drivers/char/pty.c | 10 +- drivers/char/selection.c | 2 +- drivers/char/tty_io.c | 64 +----- drivers/char/tty_ldisc.c | 477 ++++++++++++++++++++++++++---------------- include/linux/tty.h | 9 +- 11 files changed, 330 insertions(+), 254 deletions(-) (limited to 'include') diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index af761dc434f..68801512859 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -277,8 +277,8 @@ static int hci_uart_tty_open(struct tty_struct *tty) /* FIXME: why is this needed. Note don't use ldisc_ref here as the open path is before the ldisc is referencable */ - if (tty->ldisc.ops->flush_buffer) - tty->ldisc.ops->flush_buffer(tty); + if (tty->ldisc->ops->flush_buffer) + tty->ldisc->ops->flush_buffer(tty); tty_driver_flush_buffer(tty); return 0; diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c index 45601905174..f3366d3f06c 100644 --- a/drivers/char/cyclades.c +++ b/drivers/char/cyclades.c @@ -5200,7 +5200,7 @@ static int cyclades_proc_show(struct seq_file *m, void *v) (cur_jifs - info->idle_stats.recv_idle)/ HZ, info->idle_stats.overruns, /* FIXME: double check locking */ - (long)info->port.tty->ldisc.ops->num); + (long)info->port.tty->ldisc->ops->num); else seq_printf(m, "%3d %8lu %10lu %8lu " "%10lu %8lu %9lu %6ld\n", diff --git a/drivers/char/epca.c b/drivers/char/epca.c index 710ee933305..abef1f7d84f 100644 --- a/drivers/char/epca.c +++ b/drivers/char/epca.c @@ -2114,8 +2114,8 @@ static int pc_ioctl(struct tty_struct *tty, struct file *file, tty_wait_until_sent(tty, 0); } else { /* ldisc lock already held in ioctl */ - if (tty->ldisc.ops->flush_buffer) - tty->ldisc.ops->flush_buffer(tty); + if (tty->ldisc->ops->flush_buffer) + tty->ldisc->ops->flush_buffer(tty); } unlock_kernel(); /* Fall Thru */ diff --git a/drivers/char/ip2/i2lib.c b/drivers/char/ip2/i2lib.c index 0061e18aff6..0d10b89218e 100644 --- a/drivers/char/ip2/i2lib.c +++ b/drivers/char/ip2/i2lib.c @@ -868,11 +868,11 @@ i2Input(i2ChanStrPtr pCh) amountToMove = count; } // Move the first block - pCh->pTTY->ldisc.ops->receive_buf( pCh->pTTY, + pCh->pTTY->ldisc->ops->receive_buf( pCh->pTTY, &(pCh->Ibuf[stripIndex]), NULL, amountToMove ); // If we needed to wrap, do the second data move if (count > amountToMove) { - pCh->pTTY->ldisc.ops->receive_buf( pCh->pTTY, + pCh->pTTY->ldisc->ops->receive_buf( pCh->pTTY, pCh->Ibuf, NULL, count - amountToMove ); } // Bump and wrap the stripIndex all at once by the amount of data read. This diff --git a/drivers/char/ip2/ip2main.c b/drivers/char/ip2/ip2main.c index afd9247cf08..517271c762e 100644 --- a/drivers/char/ip2/ip2main.c +++ b/drivers/char/ip2/ip2main.c @@ -1315,8 +1315,8 @@ static inline void isig(int sig, struct tty_struct *tty, int flush) if (tty->pgrp) kill_pgrp(tty->pgrp, sig, 1); if (flush || !L_NOFLSH(tty)) { - if ( tty->ldisc.ops->flush_buffer ) - tty->ldisc.ops->flush_buffer(tty); + if ( tty->ldisc->ops->flush_buffer ) + tty->ldisc->ops->flush_buffer(tty); i2InputFlush( tty->driver_data ); } } diff --git a/drivers/char/n_hdlc.c b/drivers/char/n_hdlc.c index bacb3e2872a..461ece591a5 100644 --- a/drivers/char/n_hdlc.c +++ b/drivers/char/n_hdlc.c @@ -342,8 +342,8 @@ static int n_hdlc_tty_open (struct tty_struct *tty) #endif /* Flush any pending characters in the driver and discipline. */ - if (tty->ldisc.ops->flush_buffer) - tty->ldisc.ops->flush_buffer(tty); + if (tty->ldisc->ops->flush_buffer) + tty->ldisc->ops->flush_buffer(tty); tty_driver_flush_buffer(tty); diff --git a/drivers/char/pty.c b/drivers/char/pty.c index da2cb8c70c1..5acd29e6e04 100644 --- a/drivers/char/pty.c +++ b/drivers/char/pty.c @@ -110,7 +110,7 @@ static int pty_write(struct tty_struct *tty, const unsigned char *buf, c = to->receive_room; if (c > count) c = count; - to->ldisc.ops->receive_buf(to, buf, NULL, c); + to->ldisc->ops->receive_buf(to, buf, NULL, c); return c; } @@ -148,11 +148,11 @@ static int pty_chars_in_buffer(struct tty_struct *tty) int count; /* We should get the line discipline lock for "tty->link" */ - if (!to || !to->ldisc.ops->chars_in_buffer) + if (!to || !to->ldisc->ops->chars_in_buffer) return 0; /* The ldisc must report 0 if no characters available to be read */ - count = to->ldisc.ops->chars_in_buffer(to); + count = to->ldisc->ops->chars_in_buffer(to); if (tty->driver->subtype == PTY_TYPE_SLAVE) return count; @@ -186,8 +186,8 @@ static void pty_flush_buffer(struct tty_struct *tty) if (!to) return; - if (to->ldisc.ops->flush_buffer) - to->ldisc.ops->flush_buffer(to); + if (to->ldisc->ops->flush_buffer) + to->ldisc->ops->flush_buffer(to); if (to->packet) { spin_lock_irqsave(&tty->ctrl_lock, flags); diff --git a/drivers/char/selection.c b/drivers/char/selection.c index cb8ca569896..f97b9e84806 100644 --- a/drivers/char/selection.c +++ b/drivers/char/selection.c @@ -327,7 +327,7 @@ int paste_selection(struct tty_struct *tty) } count = sel_buffer_lth - pasted; count = min(count, tty->receive_room); - tty->ldisc.ops->receive_buf(tty, sel_buffer + pasted, + tty->ldisc->ops->receive_buf(tty, sel_buffer + pasted, NULL, count); pasted += count; } diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index be49d0730bb..2f44b0b241c 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -491,22 +491,6 @@ void tty_ldisc_flush(struct tty_struct *tty) EXPORT_SYMBOL_GPL(tty_ldisc_flush); -/** - * tty_reset_termios - reset terminal state - * @tty: tty to reset - * - * Restore a terminal to the driver default state - */ - -static void tty_reset_termios(struct tty_struct *tty) -{ - mutex_lock(&tty->termios_mutex); - *tty->termios = tty->driver->init_termios; - tty->termios->c_ispeed = tty_termios_input_baud_rate(tty->termios); - tty->termios->c_ospeed = tty_termios_baud_rate(tty->termios); - mutex_unlock(&tty->termios_mutex); -} - /** * do_tty_hangup - actual handler for hangup events * @work: tty device @@ -536,7 +520,6 @@ static void do_tty_hangup(struct work_struct *work) struct file *cons_filp = NULL; struct file *filp, *f = NULL; struct task_struct *p; - struct tty_ldisc *ld; int closecount = 0, n; unsigned long flags; int refs = 0; @@ -567,40 +550,8 @@ static void do_tty_hangup(struct work_struct *work) filp->f_op = &hung_up_tty_fops; } file_list_unlock(); - /* - * FIXME! What are the locking issues here? This may me overdoing - * things... This question is especially important now that we've - * removed the irqlock. - */ - ld = tty_ldisc_ref(tty); - if (ld != NULL) { - /* We may have no line discipline at this point */ - if (ld->ops->flush_buffer) - ld->ops->flush_buffer(tty); - tty_driver_flush_buffer(tty); - if ((test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) && - ld->ops->write_wakeup) - ld->ops->write_wakeup(tty); - if (ld->ops->hangup) - ld->ops->hangup(tty); - } - /* - * FIXME: Once we trust the LDISC code better we can wait here for - * ldisc completion and fix the driver call race - */ - wake_up_interruptible_poll(&tty->write_wait, POLLOUT); - wake_up_interruptible_poll(&tty->read_wait, POLLIN); - /* - * Shutdown the current line discipline, and reset it to - * N_TTY. - */ - if (tty->driver->flags & TTY_DRIVER_RESET_TERMIOS) - tty_reset_termios(tty); - /* Defer ldisc switch */ - /* tty_deferred_ldisc_switch(N_TTY); - This should get done automatically when the port closes and - tty_release is called */ + tty_ldisc_hangup(tty); read_lock(&tasklist_lock); if (tty->session) { @@ -629,12 +580,15 @@ static void do_tty_hangup(struct work_struct *work) read_unlock(&tasklist_lock); spin_lock_irqsave(&tty->ctrl_lock, flags); - tty->flags = 0; + clear_bit(TTY_THROTTLED, &tty->flags); + clear_bit(TTY_PUSH, &tty->flags); + clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); put_pid(tty->session); put_pid(tty->pgrp); tty->session = NULL; tty->pgrp = NULL; tty->ctrl_status = 0; + set_bit(TTY_HUPPED, &tty->flags); spin_unlock_irqrestore(&tty->ctrl_lock, flags); /* Account for the p->signal references we killed */ @@ -660,10 +614,7 @@ static void do_tty_hangup(struct work_struct *work) * can't yet guarantee all that. */ set_bit(TTY_HUPPED, &tty->flags); - if (ld) { - tty_ldisc_enable(tty); - tty_ldisc_deref(ld); - } + tty_ldisc_enable(tty); unlock_kernel(); if (f) fput(f); @@ -2570,7 +2521,7 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case TIOCGSID: return tiocgsid(tty, real_tty, p); case TIOCGETD: - return put_user(tty->ldisc.ops->num, (int __user *)p); + return put_user(tty->ldisc->ops->num, (int __user *)p); case TIOCSETD: return tiocsetd(tty, p); /* @@ -2785,6 +2736,7 @@ void initialize_tty_struct(struct tty_struct *tty, tty->buf.head = tty->buf.tail = NULL; tty_buffer_init(tty); mutex_init(&tty->termios_mutex); + mutex_init(&tty->ldisc_mutex); init_waitqueue_head(&tty->write_wait); init_waitqueue_head(&tty->read_wait); INIT_WORK(&tty->hangup_work, do_tty_hangup); diff --git a/drivers/char/tty_ldisc.c b/drivers/char/tty_ldisc.c index e3c6416aa86..a58a19a6a5e 100644 --- a/drivers/char/tty_ldisc.c +++ b/drivers/char/tty_ldisc.c @@ -115,19 +115,22 @@ EXPORT_SYMBOL(tty_unregister_ldisc); /** * tty_ldisc_try_get - try and reference an ldisc * @disc: ldisc number - * @ld: tty ldisc structure to complete * * Attempt to open and lock a line discipline into place. Return - * the line discipline refcounted and assigned in ld. On an error - * report the error code back + * the line discipline refcounted or an error. */ -static int tty_ldisc_try_get(int disc, struct tty_ldisc *ld) +static struct tty_ldisc *tty_ldisc_try_get(int disc) { unsigned long flags; + struct tty_ldisc *ld; struct tty_ldisc_ops *ldops; int err = -EINVAL; + ld = kmalloc(sizeof(struct tty_ldisc), GFP_KERNEL); + if (ld == NULL) + return ERR_PTR(-ENOMEM); + spin_lock_irqsave(&tty_ldisc_lock, flags); ld->ops = NULL; ldops = tty_ldiscs[disc]; @@ -140,17 +143,19 @@ static int tty_ldisc_try_get(int disc, struct tty_ldisc *ld) /* lock it */ ldops->refcount++; ld->ops = ldops; + ld->refcount = 0; err = 0; } } spin_unlock_irqrestore(&tty_ldisc_lock, flags); - return err; + if (err) + return ERR_PTR(err); + return ld; } /** * tty_ldisc_get - take a reference to an ldisc * @disc: ldisc number - * @ld: tty line discipline structure to use * * Takes a reference to a line discipline. Deals with refcounts and * module locking counts. Returns NULL if the discipline is not available. @@ -161,44 +166,46 @@ static int tty_ldisc_try_get(int disc, struct tty_ldisc *ld) * takes tty_ldisc_lock to guard against ldisc races */ -static int tty_ldisc_get(int disc, struct tty_ldisc *ld) +static struct tty_ldisc *tty_ldisc_get(int disc) { - int err; + struct tty_ldisc *ld; if (disc < N_TTY || disc >= NR_LDISCS) - return -EINVAL; - err = tty_ldisc_try_get(disc, ld); - if (err < 0) { + return ERR_PTR(-EINVAL); + ld = tty_ldisc_try_get(disc); + if (IS_ERR(ld)) { request_module("tty-ldisc-%d", disc); - err = tty_ldisc_try_get(disc, ld); + ld = tty_ldisc_try_get(disc); } - return err; + return ld; } /** * tty_ldisc_put - drop ldisc reference - * @disc: ldisc number + * @ld: ldisc * * Drop a reference to a line discipline. Manage refcounts and - * module usage counts + * module usage counts. Free the ldisc once the recount hits zero. * * Locking: * takes tty_ldisc_lock to guard against ldisc races */ -static void tty_ldisc_put(struct tty_ldisc_ops *ld) +static void tty_ldisc_put(struct tty_ldisc *ld) { unsigned long flags; - int disc = ld->num; + int disc = ld->ops->num; + struct tty_ldisc_ops *ldo; BUG_ON(disc < N_TTY || disc >= NR_LDISCS); spin_lock_irqsave(&tty_ldisc_lock, flags); - ld = tty_ldiscs[disc]; - BUG_ON(ld->refcount == 0); - ld->refcount--; - module_put(ld->owner); + ldo = tty_ldiscs[disc]; + BUG_ON(ldo->refcount == 0); + ldo->refcount--; + module_put(ldo->owner); spin_unlock_irqrestore(&tty_ldisc_lock, flags); + kfree(ld); } static void * tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos) @@ -219,12 +226,13 @@ static void tty_ldiscs_seq_stop(struct seq_file *m, void *v) static int tty_ldiscs_seq_show(struct seq_file *m, void *v) { int i = *(loff_t *)v; - struct tty_ldisc ld; + struct tty_ldisc *ld; - if (tty_ldisc_get(i, &ld) < 0) + ld = tty_ldisc_try_get(i); + if (IS_ERR(ld)) return 0; - seq_printf(m, "%-10s %2d\n", ld.ops->name ? ld.ops->name : "???", i); - tty_ldisc_put(ld.ops); + seq_printf(m, "%-10s %2d\n", ld->ops->name ? ld->ops->name : "???", i); + tty_ldisc_put(ld); return 0; } @@ -263,8 +271,7 @@ const struct file_operations tty_ldiscs_proc_fops = { static void tty_ldisc_assign(struct tty_struct *tty, struct tty_ldisc *ld) { - ld->refcount = 0; - tty->ldisc = *ld; + tty->ldisc = ld; } /** @@ -286,7 +293,7 @@ static int tty_ldisc_try(struct tty_struct *tty) int ret = 0; spin_lock_irqsave(&tty_ldisc_lock, flags); - ld = &tty->ldisc; + ld = tty->ldisc; if (test_bit(TTY_LDISC, &tty->flags)) { ld->refcount++; ret = 1; @@ -315,8 +322,8 @@ struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty) { /* wait_event is a macro */ wait_event(tty_ldisc_wait, tty_ldisc_try(tty)); - WARN_ON(tty->ldisc.refcount == 0); - return &tty->ldisc; + WARN_ON(tty->ldisc->refcount == 0); + return tty->ldisc; } EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait); @@ -335,7 +342,7 @@ EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait); struct tty_ldisc *tty_ldisc_ref(struct tty_struct *tty) { if (tty_ldisc_try(tty)) - return &tty->ldisc; + return tty->ldisc; return NULL; } @@ -407,6 +414,39 @@ static void tty_set_termios_ldisc(struct tty_struct *tty, int num) mutex_unlock(&tty->termios_mutex); } +/** + * tty_ldisc_open - open a line discipline + * @tty: tty we are opening the ldisc on + * @ld: discipline to open + * + * A helper opening method. Also a convenient debugging and check + * point. + */ + +static int tty_ldisc_open(struct tty_struct *tty, struct tty_ldisc *ld) +{ + WARN_ON(test_and_set_bit(TTY_LDISC_OPEN, &tty->flags)); + if (ld->ops->open) + return ld->ops->open(tty); + return 0; +} + +/** + * tty_ldisc_close - close a line discipline + * @tty: tty we are opening the ldisc on + * @ld: discipline to close + * + * A helper close method. Also a convenient debugging and check + * point. + */ + +static void tty_ldisc_close(struct tty_struct *tty, struct tty_ldisc *ld) +{ + WARN_ON(!test_bit(TTY_LDISC_OPEN, &tty->flags)); + clear_bit(TTY_LDISC_OPEN, &tty->flags); + if (ld->ops->close) + ld->ops->close(tty); +} /** * tty_ldisc_restore - helper for tty ldisc change @@ -420,31 +460,32 @@ static void tty_set_termios_ldisc(struct tty_struct *tty, int num) static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old) { char buf[64]; - struct tty_ldisc new_ldisc; + struct tty_ldisc *new_ldisc; + int r; /* There is an outstanding reference here so this is safe */ - tty_ldisc_get(old->ops->num, old); + old = tty_ldisc_get(old->ops->num); + WARN_ON(IS_ERR(old)); tty_ldisc_assign(tty, old); tty_set_termios_ldisc(tty, old->ops->num); - if (old->ops->open && (old->ops->open(tty) < 0)) { - tty_ldisc_put(old->ops); + if (tty_ldisc_open(tty, old) < 0) { + tty_ldisc_put(old); /* This driver is always present */ - if (tty_ldisc_get(N_TTY, &new_ldisc) < 0) + new_ldisc =tty_ldisc_get(N_TTY); + if (IS_ERR(new_ldisc)) panic("n_tty: get"); - tty_ldisc_assign(tty, &new_ldisc); + tty_ldisc_assign(tty, new_ldisc); tty_set_termios_ldisc(tty, N_TTY); - if (new_ldisc.ops->open) { - int r = new_ldisc.ops->open(tty); - if (r < 0) - panic("Couldn't open N_TTY ldisc for " - "%s --- error %d.", - tty_name(tty, buf), r); - } + r = tty_ldisc_open(tty, new_ldisc); + if (r < 0) + panic("Couldn't open N_TTY ldisc for " + "%s --- error %d.", + tty_name(tty, buf), r); } } /** - * tty_ldisc_halt - shutdown the line discipline + * tty_ldisc_halt - shut down the line discipline * @tty: tty device * * Shut down the line discipline and work queue for this tty device. @@ -456,14 +497,10 @@ static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old) * tty_ldisc_wait_idle. */ -static void tty_ldisc_halt(struct tty_struct *tty) +static int tty_ldisc_halt(struct tty_struct *tty) { clear_bit(TTY_LDISC, &tty->flags); - cancel_delayed_work(&tty->buf.work); - /* - * Wait for ->hangup_work and ->buf.work handlers to terminate - */ - flush_scheduled_work(); + return cancel_delayed_work(&tty->buf.work); } /** @@ -473,18 +510,22 @@ static void tty_ldisc_halt(struct tty_struct *tty) * Wait for the line discipline to become idle. The discipline must * have been halted for this to guarantee it remains idle. * + * tty_ldisc_lock protects the ref counts currently. */ -static void tty_ldisc_wait_idle(struct tty_struct *tty) +static int tty_ldisc_wait_idle(struct tty_struct *tty) { unsigned long flags; spin_lock_irqsave(&tty_ldisc_lock, flags); - while (tty->ldisc.refcount) { + while (tty->ldisc->refcount) { spin_unlock_irqrestore(&tty_ldisc_lock, flags); - wait_event(tty_ldisc_wait, tty->ldisc.refcount == 0); + if (wait_event_timeout(tty_ldisc_wait, + tty->ldisc->refcount == 0, 5 * HZ) == 0) + return -EBUSY; spin_lock_irqsave(&tty_ldisc_lock, flags); } spin_unlock_irqrestore(&tty_ldisc_lock, flags); + return 0; } /** @@ -493,38 +534,63 @@ static void tty_ldisc_wait_idle(struct tty_struct *tty) * @ldisc: the line discipline * * Set the discipline of a tty line. Must be called from a process - * context. + * context. The ldisc change logic has to protect itself against any + * overlapping ldisc change (including on the other end of pty pairs), + * the close of one side of a tty/pty pair, and eventually hangup. * - * Locking: takes tty_ldisc_lock. - * called functions take termios_mutex + * Locking: takes tty_ldisc_lock, termios_mutex */ int tty_set_ldisc(struct tty_struct *tty, int ldisc) { int retval; - struct tty_ldisc o_ldisc, new_ldisc; - int work; - unsigned long flags; + struct tty_ldisc *o_ldisc, *new_ldisc; + int work, o_work = 0; struct tty_struct *o_tty; -restart: - /* This is a bit ugly for now but means we can break the 'ldisc - is part of the tty struct' assumption later */ - retval = tty_ldisc_get(ldisc, &new_ldisc); - if (retval) - return retval; + new_ldisc = tty_ldisc_get(ldisc); + if (IS_ERR(new_ldisc)) + return PTR_ERR(new_ldisc); /* - * Problem: What do we do if this blocks ? + * We need to look at the tty locking here for pty/tty pairs + * when both sides try to change in parallel. */ - tty_wait_until_sent(tty, 0); + o_tty = tty->link; /* o_tty is the pty side or NULL */ + - if (tty->ldisc.ops->num == ldisc) { - tty_ldisc_put(new_ldisc.ops); + /* + * Check the no-op case + */ + + if (tty->ldisc->ops->num == ldisc) { + tty_ldisc_put(new_ldisc); return 0; } + /* + * Problem: What do we do if this blocks ? + * We could deadlock here + */ + + tty_wait_until_sent(tty, 0); + + mutex_lock(&tty->ldisc_mutex); + + /* + * We could be midstream of another ldisc change which has + * dropped the lock during processing. If so we need to wait. + */ + + while (test_bit(TTY_LDISC_CHANGING, &tty->flags)) { + mutex_unlock(&tty->ldisc_mutex); + wait_event(tty_ldisc_wait, + test_bit(TTY_LDISC_CHANGING, &tty->flags) == 0); + mutex_lock(&tty->ldisc_mutex); + } + set_bit(TTY_LDISC_CHANGING, &tty->flags); + /* * No more input please, we are switching. The new ldisc * will update this value in the ldisc open function @@ -533,8 +599,6 @@ restart: tty->receive_room = 0; o_ldisc = tty->ldisc; - o_tty = tty->link; - /* * Make sure we don't change while someone holds a * reference to the line discipline. The TTY_LDISC bit @@ -545,108 +609,181 @@ restart: * with a userspace app continually trying to use the tty in * parallel to the change and re-referencing the tty. */ - clear_bit(TTY_LDISC, &tty->flags); - if (o_tty) - clear_bit(TTY_LDISC, &o_tty->flags); - spin_lock_irqsave(&tty_ldisc_lock, flags); - if (tty->ldisc.refcount || (o_tty && o_tty->ldisc.refcount)) { - if (tty->ldisc.refcount) { - /* Free the new ldisc we grabbed. Must drop the lock - first. */ - spin_unlock_irqrestore(&tty_ldisc_lock, flags); - tty_ldisc_put(o_ldisc.ops); - /* - * There are several reasons we may be busy, including - * random momentary I/O traffic. We must therefore - * retry. We could distinguish between blocking ops - * and retries if we made tty_ldisc_wait() smarter. - * That is up for discussion. - */ - if (wait_event_interruptible(tty_ldisc_wait, tty->ldisc.refcount == 0) < 0) - return -ERESTARTSYS; - goto restart; - } - if (o_tty && o_tty->ldisc.refcount) { - spin_unlock_irqrestore(&tty_ldisc_lock, flags); - tty_ldisc_put(o_tty->ldisc.ops); - if (wait_event_interruptible(tty_ldisc_wait, o_tty->ldisc.refcount == 0) < 0) - return -ERESTARTSYS; - goto restart; - } - } - /* - * If the TTY_LDISC bit is set, then we are racing against - * another ldisc change - */ - if (test_bit(TTY_LDISC_CHANGING, &tty->flags)) { - struct tty_ldisc *ld; - spin_unlock_irqrestore(&tty_ldisc_lock, flags); - tty_ldisc_put(new_ldisc.ops); - ld = tty_ldisc_ref_wait(tty); - tty_ldisc_deref(ld); - goto restart; - } - /* - * This flag is used to avoid two parallel ldisc changes. Once - * open and close are fine grained locked this may work better - * as a mutex shared with the open/close/hup paths - */ - set_bit(TTY_LDISC_CHANGING, &tty->flags); + work = tty_ldisc_halt(tty); if (o_tty) - set_bit(TTY_LDISC_CHANGING, &o_tty->flags); - spin_unlock_irqrestore(&tty_ldisc_lock, flags); - - /* - * From this point on we know nobody has an ldisc - * usage reference, nor can they obtain one until - * we say so later on. - */ + o_work = tty_ldisc_halt(o_tty); - work = cancel_delayed_work(&tty->buf.work); /* - * Wait for ->hangup_work and ->buf.work handlers to terminate - * MUST NOT hold locks here. + * Wait for ->hangup_work and ->buf.work handlers to terminate. + * We must drop the mutex here in case a hangup is also in process. */ + + mutex_unlock(&tty->ldisc_mutex); + flush_scheduled_work(); + + /* Let any existing reference holders finish */ + retval = tty_ldisc_wait_idle(tty); + if (retval < 0) { + clear_bit(TTY_LDISC_CHANGING, &tty->flags); + tty_ldisc_put(new_ldisc); + return retval; + } + + mutex_lock(&tty->ldisc_mutex); + if (test_bit(TTY_HUPPED, &tty->flags)) { + /* We were raced by the hangup method. It will have stomped + the ldisc data and closed the ldisc down */ + clear_bit(TTY_LDISC_CHANGING, &tty->flags); + mutex_unlock(&tty->ldisc_mutex); + tty_ldisc_put(new_ldisc); + return -EIO; + } + /* Shutdown the current discipline. */ - if (o_ldisc.ops->close) - (o_ldisc.ops->close)(tty); + tty_ldisc_close(tty, o_ldisc); /* Now set up the new line discipline. */ - tty_ldisc_assign(tty, &new_ldisc); + tty_ldisc_assign(tty, new_ldisc); tty_set_termios_ldisc(tty, ldisc); - if (new_ldisc.ops->open) - retval = (new_ldisc.ops->open)(tty); + + retval = tty_ldisc_open(tty, new_ldisc); if (retval < 0) { - tty_ldisc_put(new_ldisc.ops); - tty_ldisc_restore(tty, &o_ldisc); + /* Back to the old one or N_TTY if we can't */ + tty_ldisc_put(new_ldisc); + tty_ldisc_restore(tty, o_ldisc); } + /* At this point we hold a reference to the new ldisc and a a reference to the old ldisc. If we ended up flipping back to the existing ldisc we have two references to it */ - if (tty->ldisc.ops->num != o_ldisc.ops->num && tty->ops->set_ldisc) + if (tty->ldisc->ops->num != o_ldisc->ops->num && tty->ops->set_ldisc) tty->ops->set_ldisc(tty); - tty_ldisc_put(o_ldisc.ops); + tty_ldisc_put(o_ldisc); /* - * Allow ldisc referencing to occur as soon as the driver - * ldisc callback completes. + * Allow ldisc referencing to occur again */ tty_ldisc_enable(tty); if (o_tty) tty_ldisc_enable(o_tty); - /* Restart it in case no characters kick it off. Safe if + /* Restart the work queue in case no characters kick it off. Safe if already running */ if (work) schedule_delayed_work(&tty->buf.work, 1); + if (o_work) + schedule_delayed_work(&o_tty->buf.work, 1); + mutex_unlock(&tty->ldisc_mutex); return retval; } +/** + * tty_reset_termios - reset terminal state + * @tty: tty to reset + * + * Restore a terminal to the driver default state. + */ + +static void tty_reset_termios(struct tty_struct *tty) +{ + mutex_lock(&tty->termios_mutex); + *tty->termios = tty->driver->init_termios; + tty->termios->c_ispeed = tty_termios_input_baud_rate(tty->termios); + tty->termios->c_ospeed = tty_termios_baud_rate(tty->termios); + mutex_unlock(&tty->termios_mutex); +} + + +/** + * tty_ldisc_reinit - reinitialise the tty ldisc + * @tty: tty to reinit + * + * Switch the tty back to N_TTY line discipline and leave the + * ldisc state closed + */ + +static void tty_ldisc_reinit(struct tty_struct *tty) +{ + struct tty_ldisc *ld; + + tty_ldisc_close(tty, tty->ldisc); + tty_ldisc_put(tty->ldisc); + tty->ldisc = NULL; + /* + * Switch the line discipline back + */ + ld = tty_ldisc_get(N_TTY); + BUG_ON(IS_ERR(ld)); + tty_ldisc_assign(tty, ld); + tty_set_termios_ldisc(tty, N_TTY); +} + +/** + * tty_ldisc_hangup - hangup ldisc reset + * @tty: tty being hung up + * + * Some tty devices reset their termios when they receive a hangup + * event. In that situation we must also switch back to N_TTY properly + * before we reset the termios data. + * + * Locking: We can take the ldisc mutex as the rest of the code is + * careful to allow for this. + * + * In the pty pair case this occurs in the close() path of the + * tty itself so we must be careful about locking rules. + */ + +void tty_ldisc_hangup(struct tty_struct *tty) +{ + struct tty_ldisc *ld; + + /* + * FIXME! What are the locking issues here? This may me overdoing + * things... This question is especially important now that we've + * removed the irqlock. + */ + ld = tty_ldisc_ref(tty); + if (ld != NULL) { + /* We may have no line discipline at this point */ + if (ld->ops->flush_buffer) + ld->ops->flush_buffer(tty); + tty_driver_flush_buffer(tty); + if ((test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) && + ld->ops->write_wakeup) + ld->ops->write_wakeup(tty); + if (ld->ops->hangup) + ld->ops->hangup(tty); + tty_ldisc_deref(ld); + } + /* + * FIXME: Once we trust the LDISC code better we can wait here for + * ldisc completion and fix the driver call race + */ + wake_up_interruptible_poll(&tty->write_wait, POLLOUT); + wake_up_interruptible_poll(&tty->read_wait, POLLIN); + /* + * Shutdown the current line discipline, and reset it to + * N_TTY. + */ + if (tty->driver->flags & TTY_DRIVER_RESET_TERMIOS) { + /* Avoid racing set_ldisc */ + mutex_lock(&tty->ldisc_mutex); + /* Switch back to N_TTY */ + tty_ldisc_reinit(tty); + /* At this point we have a closed ldisc and we want to + reopen it. We could defer this to the next open but + it means auditing a lot of other paths so this is a FIXME */ + WARN_ON(tty_ldisc_open(tty, tty->ldisc)); + tty_ldisc_enable(tty); + mutex_unlock(&tty->ldisc_mutex); + tty_reset_termios(tty); + } +} /** * tty_ldisc_setup - open line discipline @@ -654,24 +791,23 @@ restart: * @o_tty: pair tty for pty/tty pairs * * Called during the initial open of a tty/pty pair in order to set up the - * line discplines and bind them to the tty. + * line disciplines and bind them to the tty. This has no locking issues + * as the device isn't yet active. */ int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty) { - struct tty_ldisc *ld = &tty->ldisc; + struct tty_ldisc *ld = tty->ldisc; int retval; - if (ld->ops->open) { - retval = (ld->ops->open)(tty); - if (retval) - return retval; - } - if (o_tty && o_tty->ldisc.ops->open) { - retval = (o_tty->ldisc.ops->open)(o_tty); + retval = tty_ldisc_open(tty, ld); + if (retval) + return retval; + + if (o_tty) { + retval = tty_ldisc_open(o_tty, o_tty->ldisc); if (retval) { - if (ld->ops->close) - (ld->ops->close)(tty); + tty_ldisc_close(tty, ld); return retval; } tty_ldisc_enable(o_tty); @@ -679,34 +815,18 @@ int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty) tty_ldisc_enable(tty); return 0; } - -static void tty_ldisc_reinit(struct tty_struct *tty) -{ - struct tty_ldisc ld; - - if (tty->ldisc.ops->close) - (tty->ldisc.ops->close)(tty); - tty_ldisc_put(tty->ldisc.ops); - /* - * Switch the line discipline back - */ - WARN_ON(tty_ldisc_get(N_TTY, &ld)); - tty_ldisc_assign(tty, &ld); - tty_set_termios_ldisc(tty, N_TTY); -} - /** * tty_ldisc_release - release line discipline * @tty: tty being shut down * @o_tty: pair tty for pty/tty pairs * * Called during the final close of a tty/pty pair in order to shut down the - * line discpline layer. + * line discpline layer. On exit the ldisc assigned is N_TTY and the + * ldisc has not been opened. */ void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty) { - /* * Prevent flush_to_ldisc() from rescheduling the work for later. Then * kill any delayed work. As this is the final close it does not @@ -714,6 +834,7 @@ void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty) */ tty_ldisc_halt(tty); + flush_scheduled_work(); /* * Wait for any short term users (we know they are just driver @@ -730,11 +851,9 @@ void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty) */ tty_ldisc_reinit(tty); - if (o_tty) { - /* FIXME: could o_tty be in setldisc here ? */ - clear_bit(TTY_LDISC, &o_tty->flags); - tty_ldisc_reinit(o_tty); - } + /* This will need doing differently if we need to lock */ + if (o_tty) + tty_ldisc_release(o_tty, NULL); } /** @@ -747,10 +866,10 @@ void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty) void tty_ldisc_init(struct tty_struct *tty) { - struct tty_ldisc ld; - if (tty_ldisc_get(N_TTY, &ld) < 0) + struct tty_ldisc *ld = tty_ldisc_get(N_TTY); + if (IS_ERR(ld)) panic("n_tty: init_tty"); - tty_ldisc_assign(tty, &ld); + tty_ldisc_assign(tty, ld); } void tty_ldisc_begin(void) diff --git a/include/linux/tty.h b/include/linux/tty.h index f9c13c83790..1488d8c81aa 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -226,8 +226,11 @@ struct tty_struct { struct tty_driver *driver; const struct tty_operations *ops; int index; - /* The ldisc objects are protected by tty_ldisc_lock at the moment */ - struct tty_ldisc ldisc; + + /* Protects ldisc changes: Lock tty not pty */ + struct mutex ldisc_mutex; + struct tty_ldisc *ldisc; + struct mutex termios_mutex; spinlock_t ctrl_lock; /* Termios values are protected by the termios mutex */ @@ -314,6 +317,7 @@ struct tty_struct { #define TTY_CLOSING 7 /* ->close() in progress */ #define TTY_LDISC 9 /* Line discipline attached */ #define TTY_LDISC_CHANGING 10 /* Line discipline changing */ +#define TTY_LDISC_OPEN 11 /* Line discipline is open */ #define TTY_HW_COOK_OUT 14 /* Hardware can do output cooking */ #define TTY_HW_COOK_IN 15 /* Hardware can do input cooking */ #define TTY_PTY_LOCK 16 /* pty private */ @@ -406,6 +410,7 @@ extern int tty_termios_hw_change(struct ktermios *a, struct ktermios *b); extern struct tty_ldisc *tty_ldisc_ref(struct tty_struct *); extern void tty_ldisc_deref(struct tty_ldisc *); extern struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *); +extern void tty_ldisc_hangup(struct tty_struct *tty); extern const struct file_operations tty_ldiscs_proc_fops; extern void tty_wakeup(struct tty_struct *tty); -- cgit v1.2.3-70-g09d2 From 08e0992f60ad44025a8a8b8a821838ca4a562686 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 11 Jun 2009 13:21:24 +0100 Subject: serial: add support for the TI AR7 internal UART This patch adds support for the TI AR7 internal UART. Signed-off-by: Florian Fainelli Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- drivers/serial/8250.c | 7 +++++++ include/linux/serial_core.h | 3 ++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c index a0127e93ade..fb867a9f55e 100644 --- a/drivers/serial/8250.c +++ b/drivers/serial/8250.c @@ -287,6 +287,13 @@ static const struct serial8250_config uart_config[] = { .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, .flags = UART_CAP_FIFO, }, + [PORT_AR7] = { + .name = "AR7", + .fifo_size = 16, + .tx_loadsz = 16, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_00, + .flags = UART_CAP_FIFO | UART_CAP_AFE, + }, }; #if defined (CONFIG_SERIAL_8250_AU1X00) diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 57a97e52e58..48766ea845c 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -41,7 +41,8 @@ #define PORT_XSCALE 15 #define PORT_RM9000 16 /* PMC-Sierra RM9xxx internal UART */ #define PORT_OCTEON 17 /* Cavium OCTEON internal UART */ -#define PORT_MAX_8250 17 /* max port ID */ +#define PORT_AR7 18 /* Texas Instruments AR7 internal UART */ +#define PORT_MAX_8250 18 /* max port ID */ /* * ARM specific type numbers. These are not currently guaranteed -- cgit v1.2.3-70-g09d2 From 34aec591847c696339189b070cce2a11f901cfea Mon Sep 17 00:00:00 2001 From: Richard Röjfors Date: Thu, 11 Jun 2009 14:05:39 +0100 Subject: serial: Added Timberdale UART driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Driver for the UART found in the Timberdale FPGA Signed-off-by: Richard Röjfors Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- drivers/serial/Kconfig | 7 + drivers/serial/Makefile | 1 + drivers/serial/timbuart.c | 520 ++++++++++++++++++++++++++++++++++++++++++++ drivers/serial/timbuart.h | 58 +++++ include/linux/serial_core.h | 3 + 5 files changed, 589 insertions(+) create mode 100644 drivers/serial/timbuart.c create mode 100644 drivers/serial/timbuart.h (limited to 'include') diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig index 343e3a35b6a..3391ef0d761 100644 --- a/drivers/serial/Kconfig +++ b/drivers/serial/Kconfig @@ -1433,4 +1433,11 @@ config SPORT_BAUD_RATE default 19200 if (SERIAL_SPORT_BAUD_RATE_19200) default 9600 if (SERIAL_SPORT_BAUD_RATE_9600) +config SERIAL_TIMBERDALE + tristate "Support for timberdale UART" + depends on MFD_TIMBERDALE + select SERIAL_CORE + ---help--- + Add support for UART controller on timberdale. + endmenu diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile index d438eb2a73d..45a8658f54d 100644 --- a/drivers/serial/Makefile +++ b/drivers/serial/Makefile @@ -77,3 +77,4 @@ obj-$(CONFIG_SERIAL_OF_PLATFORM_NWPSERIAL) += nwpserial.o obj-$(CONFIG_SERIAL_KS8695) += serial_ks8695.o obj-$(CONFIG_KGDB_SERIAL_CONSOLE) += kgdboc.o obj-$(CONFIG_SERIAL_QE) += ucc_uart.o +obj-$(CONFIG_SERIAL_TIMBERDALE) += timbuart.o diff --git a/drivers/serial/timbuart.c b/drivers/serial/timbuart.c new file mode 100644 index 00000000000..30ba3c4cc18 --- /dev/null +++ b/drivers/serial/timbuart.c @@ -0,0 +1,520 @@ +/* + * timbuart.c timberdale FPGA UART driver + * Copyright (c) 2009 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* Supports: + * Timberdale FPGA UART + */ + +#include +#include +#include +#include +#include +#include + +#include "timbuart.h" + +struct timbuart_port { + struct uart_port port; + struct tasklet_struct tasklet; + int usedma; + u8 last_ier; + struct platform_device *dev; +}; + +static int baudrates[] = {9600, 19200, 38400, 57600, 115200, 230400, 460800, + 921600, 1843200, 3250000}; + +static void timbuart_mctrl_check(struct uart_port *port, u8 isr, u8 *ier); + +static irqreturn_t timbuart_handleinterrupt(int irq, void *devid); + +static void timbuart_stop_rx(struct uart_port *port) +{ + /* spin lock held by upper layer, disable all RX interrupts */ + u8 ier = ioread8(port->membase + TIMBUART_IER) & ~RXFLAGS; + iowrite8(ier, port->membase + TIMBUART_IER); +} + +static void timbuart_stop_tx(struct uart_port *port) +{ + /* spinlock held by upper layer, disable TX interrupt */ + u8 ier = ioread8(port->membase + TIMBUART_IER) & ~TXBAE; + iowrite8(ier, port->membase + TIMBUART_IER); +} + +static void timbuart_start_tx(struct uart_port *port) +{ + struct timbuart_port *uart = + container_of(port, struct timbuart_port, port); + + /* do not transfer anything here -> fire off the tasklet */ + tasklet_schedule(&uart->tasklet); +} + +static void timbuart_flush_buffer(struct uart_port *port) +{ + u8 ctl = ioread8(port->membase + TIMBUART_CTRL) | TIMBUART_CTRL_FLSHTX; + + iowrite8(ctl, port->membase + TIMBUART_CTRL); + iowrite8(TXBF, port->membase + TIMBUART_ISR); +} + +static void timbuart_rx_chars(struct uart_port *port) +{ + struct tty_struct *tty = port->info->port.tty; + + while (ioread8(port->membase + TIMBUART_ISR) & RXDP) { + u8 ch = ioread8(port->membase + TIMBUART_RXFIFO); + port->icount.rx++; + tty_insert_flip_char(tty, ch, TTY_NORMAL); + } + + spin_unlock(&port->lock); + tty_flip_buffer_push(port->info->port.tty); + spin_lock(&port->lock); + + dev_dbg(port->dev, "%s - total read %d bytes\n", + __func__, port->icount.rx); +} + +static void timbuart_tx_chars(struct uart_port *port) +{ + struct circ_buf *xmit = &port->info->xmit; + + while (!(ioread8(port->membase + TIMBUART_ISR) & TXBF) && + !uart_circ_empty(xmit)) { + iowrite8(xmit->buf[xmit->tail], + port->membase + TIMBUART_TXFIFO); + xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); + port->icount.tx++; + } + + dev_dbg(port->dev, + "%s - total written %d bytes, CTL: %x, RTS: %x, baud: %x\n", + __func__, + port->icount.tx, + ioread8(port->membase + TIMBUART_CTRL), + port->mctrl & TIOCM_RTS, + ioread8(port->membase + TIMBUART_BAUDRATE)); +} + +static void timbuart_handle_tx_port(struct uart_port *port, u8 isr, u8 *ier) +{ + struct timbuart_port *uart = + container_of(port, struct timbuart_port, port); + struct circ_buf *xmit = &port->info->xmit; + + if (uart_circ_empty(xmit) || uart_tx_stopped(port)) + return; + + if (port->x_char) + return; + + if (isr & TXFLAGS) { + timbuart_tx_chars(port); + /* clear all TX interrupts */ + iowrite8(TXFLAGS, port->membase + TIMBUART_ISR); + + if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) + uart_write_wakeup(port); + } else + /* Re-enable any tx interrupt */ + *ier |= uart->last_ier & TXFLAGS; + + /* enable interrupts if there are chars in the transmit buffer, + * Or if we delivered some bytes and want the almost empty interrupt + * we wake up the upper layer later when we got the interrupt + * to give it some time to go out... + */ + if (!uart_circ_empty(xmit)) + *ier |= TXBAE; + + dev_dbg(port->dev, "%s - leaving\n", __func__); +} + +void timbuart_handle_rx_port(struct uart_port *port, u8 isr, u8 *ier) +{ + if (isr & RXFLAGS) { + /* Some RX status is set */ + if (isr & RXBF) { + u8 ctl = ioread8(port->membase + TIMBUART_CTRL) | + TIMBUART_CTRL_FLSHRX; + iowrite8(ctl, port->membase + TIMBUART_CTRL); + port->icount.overrun++; + } else if (isr & (RXDP)) + timbuart_rx_chars(port); + + /* ack all RX interrupts */ + iowrite8(RXFLAGS, port->membase + TIMBUART_ISR); + } + + /* always have the RX interrupts enabled */ + *ier |= RXBAF | RXBF | RXTT; + + dev_dbg(port->dev, "%s - leaving\n", __func__); +} + +void timbuart_tasklet(unsigned long arg) +{ + struct timbuart_port *uart = (struct timbuart_port *)arg; + u8 isr, ier = 0; + + spin_lock(&uart->port.lock); + + isr = ioread8(uart->port.membase + TIMBUART_ISR); + dev_dbg(uart->port.dev, "%s ISR: %x\n", __func__, isr); + + if (!uart->usedma) + timbuart_handle_tx_port(&uart->port, isr, &ier); + + timbuart_mctrl_check(&uart->port, isr, &ier); + + if (!uart->usedma) + timbuart_handle_rx_port(&uart->port, isr, &ier); + + iowrite8(ier, uart->port.membase + TIMBUART_IER); + + spin_unlock(&uart->port.lock); + dev_dbg(uart->port.dev, "%s leaving\n", __func__); +} + +static unsigned int timbuart_tx_empty(struct uart_port *port) +{ + u8 isr = ioread8(port->membase + TIMBUART_ISR); + + return (isr & TXBAE) ? TIOCSER_TEMT : 0; +} + +static unsigned int timbuart_get_mctrl(struct uart_port *port) +{ + u8 cts = ioread8(port->membase + TIMBUART_CTRL); + dev_dbg(port->dev, "%s - cts %x\n", __func__, cts); + + if (cts & TIMBUART_CTRL_CTS) + return TIOCM_CTS | TIOCM_DSR | TIOCM_CAR; + else + return TIOCM_DSR | TIOCM_CAR; +} + +static void timbuart_set_mctrl(struct uart_port *port, unsigned int mctrl) +{ + dev_dbg(port->dev, "%s - %x\n", __func__, mctrl); + + if (mctrl & TIOCM_RTS) + iowrite8(TIMBUART_CTRL_RTS, port->membase + TIMBUART_CTRL); + else + iowrite8(TIMBUART_CTRL_RTS, port->membase + TIMBUART_CTRL); +} + +static void timbuart_mctrl_check(struct uart_port *port, u8 isr, u8 *ier) +{ + unsigned int cts; + + if (isr & CTS_DELTA) { + /* ack */ + iowrite8(CTS_DELTA, port->membase + TIMBUART_ISR); + cts = timbuart_get_mctrl(port); + uart_handle_cts_change(port, cts & TIOCM_CTS); + wake_up_interruptible(&port->info->delta_msr_wait); + } + + *ier |= CTS_DELTA; +} + +static void timbuart_enable_ms(struct uart_port *port) +{ + /* N/A */ +} + +static void timbuart_break_ctl(struct uart_port *port, int ctl) +{ + /* N/A */ +} + +static int timbuart_startup(struct uart_port *port) +{ + struct timbuart_port *uart = + container_of(port, struct timbuart_port, port); + + dev_dbg(port->dev, "%s\n", __func__); + + iowrite8(TIMBUART_CTRL_FLSHRX, port->membase + TIMBUART_CTRL); + iowrite8(0xff, port->membase + TIMBUART_ISR); + /* Enable all but TX interrupts */ + iowrite8(RXBAF | RXBF | RXTT | CTS_DELTA, + port->membase + TIMBUART_IER); + + return request_irq(port->irq, timbuart_handleinterrupt, IRQF_SHARED, + "timb-uart", uart); +} + +static void timbuart_shutdown(struct uart_port *port) +{ + struct timbuart_port *uart = + container_of(port, struct timbuart_port, port); + dev_dbg(port->dev, "%s\n", __func__); + free_irq(port->irq, uart); + iowrite8(0, port->membase + TIMBUART_IER); +} + +static int get_bindex(int baud) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(baudrates); i++) + if (baud == baudrates[i]) + return i; + + return -1; +} + +static void timbuart_set_termios(struct uart_port *port, + struct ktermios *termios, + struct ktermios *old) +{ + unsigned int baud; + short bindex; + unsigned long flags; + + baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk / 16); + bindex = get_bindex(baud); + dev_dbg(port->dev, "%s - bindex %d\n", __func__, bindex); + + if (bindex < 0) { + printk(KERN_ALERT "timbuart: Unsupported baud rate\n"); + } else { + spin_lock_irqsave(&port->lock, flags); + iowrite8((u8)bindex, port->membase + TIMBUART_BAUDRATE); + uart_update_timeout(port, termios->c_cflag, baud); + spin_unlock_irqrestore(&port->lock, flags); + } +} + +static const char *timbuart_type(struct uart_port *port) +{ + return port->type == PORT_UNKNOWN ? "timbuart" : NULL; +} + +/* We do not request/release mappings of the registers here, + * currently it's done in the proble function. + */ +static void timbuart_release_port(struct uart_port *port) +{ + struct platform_device *pdev = to_platform_device(port->dev); + int size = + resource_size(platform_get_resource(pdev, IORESOURCE_MEM, 0)); + + if (port->flags & UPF_IOREMAP) { + iounmap(port->membase); + port->membase = NULL; + } + + release_mem_region(port->mapbase, size); +} + +static int timbuart_request_port(struct uart_port *port) +{ + struct platform_device *pdev = to_platform_device(port->dev); + int size = + resource_size(platform_get_resource(pdev, IORESOURCE_MEM, 0)); + + if (!request_mem_region(port->mapbase, size, "timb-uart")) + return -EBUSY; + + if (port->flags & UPF_IOREMAP) { + port->membase = ioremap(port->mapbase, size); + if (port->membase == NULL) { + release_mem_region(port->mapbase, size); + return -ENOMEM; + } + } + + return 0; +} + +static irqreturn_t timbuart_handleinterrupt(int irq, void *devid) +{ + struct timbuart_port *uart = (struct timbuart_port *)devid; + + if (ioread8(uart->port.membase + TIMBUART_IPR)) { + uart->last_ier = ioread8(uart->port.membase + TIMBUART_IER); + + /* disable interrupts, the tasklet enables them again */ + iowrite8(0, uart->port.membase + TIMBUART_IER); + + /* fire off bottom half */ + tasklet_schedule(&uart->tasklet); + + return IRQ_HANDLED; + } else + return IRQ_NONE; +} + +/* + * Configure/autoconfigure the port. + */ +static void timbuart_config_port(struct uart_port *port, int flags) +{ + if (flags & UART_CONFIG_TYPE) { + port->type = PORT_TIMBUART; + timbuart_request_port(port); + } +} + +static int timbuart_verify_port(struct uart_port *port, + struct serial_struct *ser) +{ + /* we don't want the core code to modify any port params */ + return -EINVAL; +} + +static struct uart_ops timbuart_ops = { + .tx_empty = timbuart_tx_empty, + .set_mctrl = timbuart_set_mctrl, + .get_mctrl = timbuart_get_mctrl, + .stop_tx = timbuart_stop_tx, + .start_tx = timbuart_start_tx, + .flush_buffer = timbuart_flush_buffer, + .stop_rx = timbuart_stop_rx, + .enable_ms = timbuart_enable_ms, + .break_ctl = timbuart_break_ctl, + .startup = timbuart_startup, + .shutdown = timbuart_shutdown, + .set_termios = timbuart_set_termios, + .type = timbuart_type, + .release_port = timbuart_release_port, + .request_port = timbuart_request_port, + .config_port = timbuart_config_port, + .verify_port = timbuart_verify_port +}; + +static struct uart_driver timbuart_driver = { + .owner = THIS_MODULE, + .driver_name = "timberdale_uart", + .dev_name = "ttyTU", + .major = TIMBUART_MAJOR, + .minor = TIMBUART_MINOR, + .nr = 1 +}; + +static int timbuart_probe(struct platform_device *dev) +{ + int err; + struct timbuart_port *uart; + struct resource *iomem; + + dev_dbg(&dev->dev, "%s\n", __func__); + + uart = kzalloc(sizeof(*uart), GFP_KERNEL); + if (!uart) { + err = -EINVAL; + goto err_mem; + } + + uart->usedma = 0; + + uart->port.uartclk = 3250000 * 16; + uart->port.fifosize = TIMBUART_FIFO_SIZE; + uart->port.regshift = 2; + uart->port.iotype = UPIO_MEM; + uart->port.ops = &timbuart_ops; + uart->port.irq = 0; + uart->port.flags = UPF_BOOT_AUTOCONF | UPF_IOREMAP; + uart->port.line = 0; + uart->port.dev = &dev->dev; + + iomem = platform_get_resource(dev, IORESOURCE_MEM, 0); + if (!iomem) { + err = -ENOMEM; + goto err_register; + } + uart->port.mapbase = iomem->start; + uart->port.membase = NULL; + + uart->port.irq = platform_get_irq(dev, 0); + if (uart->port.irq < 0) { + err = -EINVAL; + goto err_register; + } + + tasklet_init(&uart->tasklet, timbuart_tasklet, (unsigned long)uart); + + err = uart_register_driver(&timbuart_driver); + if (err) + goto err_register; + + err = uart_add_one_port(&timbuart_driver, &uart->port); + if (err) + goto err_add_port; + + platform_set_drvdata(dev, uart); + + return 0; + +err_add_port: + uart_unregister_driver(&timbuart_driver); +err_register: + kfree(uart); +err_mem: + printk(KERN_ERR "timberdale: Failed to register Timberdale UART: %d\n", + err); + + return err; +} + +static int timbuart_remove(struct platform_device *dev) +{ + struct timbuart_port *uart = platform_get_drvdata(dev); + + tasklet_kill(&uart->tasklet); + uart_remove_one_port(&timbuart_driver, &uart->port); + uart_unregister_driver(&timbuart_driver); + kfree(uart); + + return 0; +} + +static struct platform_driver timbuart_platform_driver = { + .driver = { + .name = "timb-uart", + .owner = THIS_MODULE, + }, + .probe = timbuart_probe, + .remove = timbuart_remove, +}; + +/*--------------------------------------------------------------------------*/ + +static int __init timbuart_init(void) +{ + return platform_driver_register(&timbuart_platform_driver); +} + +static void __exit timbuart_exit(void) +{ + platform_driver_unregister(&timbuart_platform_driver); +} + +module_init(timbuart_init); +module_exit(timbuart_exit); + +MODULE_DESCRIPTION("Timberdale UART driver"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:timb-uart"); + diff --git a/drivers/serial/timbuart.h b/drivers/serial/timbuart.h new file mode 100644 index 00000000000..7e566766bc4 --- /dev/null +++ b/drivers/serial/timbuart.h @@ -0,0 +1,58 @@ +/* + * timbuart.c timberdale FPGA GPIO driver + * Copyright (c) 2009 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* Supports: + * Timberdale FPGA UART + */ + +#ifndef _TIMBUART_H +#define _TIMBUART_H + +#define TIMBUART_FIFO_SIZE 2048 + +#define TIMBUART_RXFIFO 0x08 +#define TIMBUART_TXFIFO 0x0c +#define TIMBUART_IER 0x10 +#define TIMBUART_IPR 0x14 +#define TIMBUART_ISR 0x18 +#define TIMBUART_CTRL 0x1c +#define TIMBUART_BAUDRATE 0x20 + +#define TIMBUART_CTRL_RTS 0x01 +#define TIMBUART_CTRL_CTS 0x02 +#define TIMBUART_CTRL_FLSHTX 0x40 +#define TIMBUART_CTRL_FLSHRX 0x80 + +#define TXBF 0x01 +#define TXBAE 0x02 +#define CTS_DELTA 0x04 +#define RXDP 0x08 +#define RXBAF 0x10 +#define RXBF 0x20 +#define RXTT 0x40 +#define RXBNAE 0x80 +#define TXBE 0x100 + +#define RXFLAGS (RXDP | RXBAF | RXBF | RXTT | RXBNAE) +#define TXFLAGS (TXBF | TXBAE) + +#define TIMBUART_MAJOR 204 +#define TIMBUART_MINOR 192 + +#endif /* _TIMBUART_H */ + diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 48766ea845c..6fd80c4243f 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -168,6 +168,9 @@ /* MAX3100 */ #define PORT_MAX3100 86 +/* Timberdale UART */ +#define PORT_TIMBUART 87 + #ifdef __KERNEL__ #include -- cgit v1.2.3-70-g09d2 From 8759ef32d992fc6c0bcbe40fca7aa302190918a5 Mon Sep 17 00:00:00 2001 From: Oskar Schirmer Date: Thu, 11 Jun 2009 14:51:15 +0100 Subject: lib: isolate rational fractions helper function Provide a helper function to determine optimum numerator denominator value pairs taking into account restricted register size. Useful especially with PLL and other clock configurations. Signed-off-by: Oskar Schirmer Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/rational.h | 19 +++++++++++++++ lib/Kconfig | 3 +++ lib/Makefile | 1 + lib/rational.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 85 insertions(+) create mode 100644 include/linux/rational.h create mode 100644 lib/rational.c (limited to 'include') diff --git a/include/linux/rational.h b/include/linux/rational.h new file mode 100644 index 00000000000..4f532fcd9ee --- /dev/null +++ b/include/linux/rational.h @@ -0,0 +1,19 @@ +/* + * rational fractions + * + * Copyright (C) 2009 emlix GmbH, Oskar Schirmer + * + * helper functions when coping with rational numbers, + * e.g. when calculating optimum numerator/denominator pairs for + * pll configuration taking into account restricted register size + */ + +#ifndef _LINUX_RATIONAL_H +#define _LINUX_RATIONAL_H + +void rational_best_approximation( + unsigned long given_numerator, unsigned long given_denominator, + unsigned long max_numerator, unsigned long max_denominator, + unsigned long *best_numerator, unsigned long *best_denominator); + +#endif /* _LINUX_RATIONAL_H */ diff --git a/lib/Kconfig b/lib/Kconfig index 8ade0a7a91e..9960be04cbb 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -10,6 +10,9 @@ menu "Library routines" config BITREVERSE tristate +config RATIONAL + boolean + config GENERIC_FIND_FIRST_BIT bool diff --git a/lib/Makefile b/lib/Makefile index 33a40e40e3e..1f6edefebff 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -50,6 +50,7 @@ ifneq ($(CONFIG_HAVE_DEC_LOCK),y) endif obj-$(CONFIG_BITREVERSE) += bitrev.o +obj-$(CONFIG_RATIONAL) += rational.o obj-$(CONFIG_CRC_CCITT) += crc-ccitt.o obj-$(CONFIG_CRC16) += crc16.o obj-$(CONFIG_CRC_T10DIF)+= crc-t10dif.o diff --git a/lib/rational.c b/lib/rational.c new file mode 100644 index 00000000000..b3c099b5478 --- /dev/null +++ b/lib/rational.c @@ -0,0 +1,62 @@ +/* + * rational fractions + * + * Copyright (C) 2009 emlix GmbH, Oskar Schirmer + * + * helper functions when coping with rational numbers + */ + +#include + +/* + * calculate best rational approximation for a given fraction + * taking into account restricted register size, e.g. to find + * appropriate values for a pll with 5 bit denominator and + * 8 bit numerator register fields, trying to set up with a + * frequency ratio of 3.1415, one would say: + * + * rational_best_approximation(31415, 10000, + * (1 << 8) - 1, (1 << 5) - 1, &n, &d); + * + * you may look at given_numerator as a fixed point number, + * with the fractional part size described in given_denominator. + * + * for theoretical background, see: + * http://en.wikipedia.org/wiki/Continued_fraction + */ + +void rational_best_approximation( + unsigned long given_numerator, unsigned long given_denominator, + unsigned long max_numerator, unsigned long max_denominator, + unsigned long *best_numerator, unsigned long *best_denominator) +{ + unsigned long n, d, n0, d0, n1, d1; + n = given_numerator; + d = given_denominator; + n0 = d1 = 0; + n1 = d0 = 1; + for (;;) { + unsigned long t, a; + if ((n1 > max_numerator) || (d1 > max_denominator)) { + n1 = n0; + d1 = d0; + break; + } + if (d == 0) + break; + t = d; + a = n / d; + d = n % d; + n = t; + t = n0 + a * n1; + n0 = n1; + n1 = t; + t = d0 + a * d1; + d0 = d1; + d1 = t; + } + *best_numerator = n1; + *best_denominator = d1; +} + +EXPORT_SYMBOL(rational_best_approximation); -- cgit v1.2.3-70-g09d2 From 1c432d899d32d36371ee4ee310fa3609cf0e5742 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Jun 2009 13:19:29 +0200 Subject: perf_counter: Rename enums Rename the perf enums to be in the 'perf_' namespace and strictly enumerate the ABI bits. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 53 +++++++++++++++++++++----------------------- kernel/perf_counter.c | 6 ++--- 2 files changed, 28 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 95c797c480e..d5911b02bc8 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -24,24 +24,21 @@ /* * attr.type */ -enum perf_event_types { +enum perf_type_id { PERF_TYPE_HARDWARE = 0, PERF_TYPE_SOFTWARE = 1, PERF_TYPE_TRACEPOINT = 2, PERF_TYPE_HW_CACHE = 3, + PERF_TYPE_RAW = 4, - /* - * available TYPE space, raw is the max value. - */ - - PERF_TYPE_RAW = 128, + PERF_TYPE_MAX, /* non ABI */ }; /* * Generalized performance counter event types, used by the attr.event_id * parameter of the sys_perf_counter_open() syscall: */ -enum attr_ids { +enum perf_hw_id { /* * Common hardware events, generalized by the kernel: */ @@ -53,7 +50,7 @@ enum attr_ids { PERF_COUNT_BRANCH_MISSES = 5, PERF_COUNT_BUS_CYCLES = 6, - PERF_HW_EVENTS_MAX = 7, + PERF_HW_EVENTS_MAX, /* non ABI */ }; /* @@ -63,30 +60,30 @@ enum attr_ids { * { read, write, prefetch } x * { accesses, misses } */ -enum hw_cache_id { - PERF_COUNT_HW_CACHE_L1D, - PERF_COUNT_HW_CACHE_L1I, - PERF_COUNT_HW_CACHE_L2, - PERF_COUNT_HW_CACHE_DTLB, - PERF_COUNT_HW_CACHE_ITLB, - PERF_COUNT_HW_CACHE_BPU, - - PERF_COUNT_HW_CACHE_MAX, +enum perf_hw_cache_id { + PERF_COUNT_HW_CACHE_L1D = 0, + PERF_COUNT_HW_CACHE_L1I = 1, + PERF_COUNT_HW_CACHE_L2 = 2, + PERF_COUNT_HW_CACHE_DTLB = 3, + PERF_COUNT_HW_CACHE_ITLB = 4, + PERF_COUNT_HW_CACHE_BPU = 5, + + PERF_COUNT_HW_CACHE_MAX, /* non ABI */ }; -enum hw_cache_op_id { - PERF_COUNT_HW_CACHE_OP_READ, - PERF_COUNT_HW_CACHE_OP_WRITE, - PERF_COUNT_HW_CACHE_OP_PREFETCH, +enum perf_hw_cache_op_id { + PERF_COUNT_HW_CACHE_OP_READ = 0, + PERF_COUNT_HW_CACHE_OP_WRITE = 1, + PERF_COUNT_HW_CACHE_OP_PREFETCH = 2, - PERF_COUNT_HW_CACHE_OP_MAX, + PERF_COUNT_HW_CACHE_OP_MAX, /* non ABI */ }; -enum hw_cache_op_result_id { - PERF_COUNT_HW_CACHE_RESULT_ACCESS, - PERF_COUNT_HW_CACHE_RESULT_MISS, +enum perf_hw_cache_op_result_id { + PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0, + PERF_COUNT_HW_CACHE_RESULT_MISS = 1, - PERF_COUNT_HW_CACHE_RESULT_MAX, + PERF_COUNT_HW_CACHE_RESULT_MAX, /* non ABI */ }; /* @@ -95,7 +92,7 @@ enum hw_cache_op_result_id { * physical and sw events of the kernel (and allow the profiling of them as * well): */ -enum sw_event_ids { +enum perf_sw_ids { PERF_COUNT_CPU_CLOCK = 0, PERF_COUNT_TASK_CLOCK = 1, PERF_COUNT_PAGE_FAULTS = 2, @@ -104,7 +101,7 @@ enum sw_event_ids { PERF_COUNT_PAGE_FAULTS_MIN = 5, PERF_COUNT_PAGE_FAULTS_MAJ = 6, - PERF_SW_EVENTS_MAX = 7, + PERF_SW_EVENTS_MAX, /* non ABI */ }; /* diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 3b2829de559..c02535bed26 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -3162,7 +3162,7 @@ static int perf_swcounter_is_counting(struct perf_counter *counter) } static int perf_swcounter_match(struct perf_counter *counter, - enum perf_event_types type, + enum perf_type_id type, u32 event, struct pt_regs *regs) { if (!perf_swcounter_is_counting(counter)) @@ -3194,7 +3194,7 @@ static void perf_swcounter_add(struct perf_counter *counter, u64 nr, } static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, - enum perf_event_types type, u32 event, + enum perf_type_id type, u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr) { @@ -3225,7 +3225,7 @@ static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx) return &cpuctx->recursion[0]; } -static void __perf_swcounter_event(enum perf_event_types type, u32 event, +static void __perf_swcounter_event(enum perf_type_id type, u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr) { -- cgit v1.2.3-70-g09d2 From f4dbfa8f3131a84257223393905f7efad0ca5996 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Jun 2009 14:06:28 +0200 Subject: perf_counter: Standardize event names Pure renames only, to PERF_COUNT_HW_* and PERF_COUNT_SW_*. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/power4-pmu.c | 12 +++++------ arch/powerpc/kernel/power5+-pmu.c | 12 +++++------ arch/powerpc/kernel/power5-pmu.c | 12 +++++------ arch/powerpc/kernel/power6-pmu.c | 12 +++++------ arch/powerpc/kernel/ppc970-pmu.c | 12 +++++------ arch/powerpc/mm/fault.c | 6 +++--- arch/x86/kernel/cpu/perf_counter.c | 32 +++++++++++++-------------- arch/x86/mm/fault.c | 6 +++--- include/linux/perf_counter.h | 36 +++++++++++++++---------------- kernel/perf_counter.c | 20 ++++++++--------- tools/perf/builtin-record.c | 4 ++-- tools/perf/builtin-stat.c | 31 ++++++++++++++------------- tools/perf/builtin-top.c | 4 ++-- tools/perf/design.txt | 28 ++++++++++++------------ tools/perf/util/parse-events.c | 44 +++++++++++++++++++------------------- 15 files changed, 136 insertions(+), 135 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c index 0e94b685722..73956f084b2 100644 --- a/arch/powerpc/kernel/power4-pmu.c +++ b/arch/powerpc/kernel/power4-pmu.c @@ -535,12 +535,12 @@ static void p4_disable_pmc(unsigned int pmc, u64 mmcr[]) } static int p4_generic_events[] = { - [PERF_COUNT_CPU_CYCLES] = 7, - [PERF_COUNT_INSTRUCTIONS] = 0x1001, - [PERF_COUNT_CACHE_REFERENCES] = 0x8c10, /* PM_LD_REF_L1 */ - [PERF_COUNT_CACHE_MISSES] = 0x3c10, /* PM_LD_MISS_L1 */ - [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x330, /* PM_BR_ISSUED */ - [PERF_COUNT_BRANCH_MISSES] = 0x331, /* PM_BR_MPRED_CR */ + [PERF_COUNT_HW_CPU_CYCLES] = 7, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x1001, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x8c10, /* PM_LD_REF_L1 */ + [PERF_COUNT_HW_CACHE_MISSES] = 0x3c10, /* PM_LD_MISS_L1 */ + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x330, /* PM_BR_ISSUED */ + [PERF_COUNT_HW_BRANCH_MISSES] = 0x331, /* PM_BR_MPRED_CR */ }; #define C(x) PERF_COUNT_HW_CACHE_##x diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c index bbf2cbb0738..5f8b7741e97 100644 --- a/arch/powerpc/kernel/power5+-pmu.c +++ b/arch/powerpc/kernel/power5+-pmu.c @@ -606,12 +606,12 @@ static void power5p_disable_pmc(unsigned int pmc, u64 mmcr[]) } static int power5p_generic_events[] = { - [PERF_COUNT_CPU_CYCLES] = 0xf, - [PERF_COUNT_INSTRUCTIONS] = 0x100009, - [PERF_COUNT_CACHE_REFERENCES] = 0x1c10a8, /* LD_REF_L1 */ - [PERF_COUNT_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */ - [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */ - [PERF_COUNT_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */ + [PERF_COUNT_HW_CPU_CYCLES] = 0xf, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x100009, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x1c10a8, /* LD_REF_L1 */ + [PERF_COUNT_HW_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */ + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */ + [PERF_COUNT_HW_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */ }; #define C(x) PERF_COUNT_HW_CACHE_##x diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c index 670cf10b91e..d54723ab627 100644 --- a/arch/powerpc/kernel/power5-pmu.c +++ b/arch/powerpc/kernel/power5-pmu.c @@ -548,12 +548,12 @@ static void power5_disable_pmc(unsigned int pmc, u64 mmcr[]) } static int power5_generic_events[] = { - [PERF_COUNT_CPU_CYCLES] = 0xf, - [PERF_COUNT_INSTRUCTIONS] = 0x100009, - [PERF_COUNT_CACHE_REFERENCES] = 0x4c1090, /* LD_REF_L1 */ - [PERF_COUNT_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */ - [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */ - [PERF_COUNT_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */ + [PERF_COUNT_HW_CPU_CYCLES] = 0xf, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x100009, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4c1090, /* LD_REF_L1 */ + [PERF_COUNT_HW_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */ + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */ + [PERF_COUNT_HW_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */ }; #define C(x) PERF_COUNT_HW_CACHE_##x diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c index 4da70786609..0cd406ee765 100644 --- a/arch/powerpc/kernel/power6-pmu.c +++ b/arch/powerpc/kernel/power6-pmu.c @@ -466,12 +466,12 @@ static void p6_disable_pmc(unsigned int pmc, u64 mmcr[]) } static int power6_generic_events[] = { - [PERF_COUNT_CPU_CYCLES] = 0x1e, - [PERF_COUNT_INSTRUCTIONS] = 2, - [PERF_COUNT_CACHE_REFERENCES] = 0x280030, /* LD_REF_L1 */ - [PERF_COUNT_CACHE_MISSES] = 0x30000c, /* LD_MISS_L1 */ - [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x410a0, /* BR_PRED */ - [PERF_COUNT_BRANCH_MISSES] = 0x400052, /* BR_MPRED */ + [PERF_COUNT_HW_CPU_CYCLES] = 0x1e, + [PERF_COUNT_HW_INSTRUCTIONS] = 2, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x280030, /* LD_REF_L1 */ + [PERF_COUNT_HW_CACHE_MISSES] = 0x30000c, /* LD_MISS_L1 */ + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x410a0, /* BR_PRED */ + [PERF_COUNT_HW_BRANCH_MISSES] = 0x400052, /* BR_MPRED */ }; #define C(x) PERF_COUNT_HW_CACHE_##x diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c index 336adf1736a..46a20640942 100644 --- a/arch/powerpc/kernel/ppc970-pmu.c +++ b/arch/powerpc/kernel/ppc970-pmu.c @@ -419,12 +419,12 @@ static void p970_disable_pmc(unsigned int pmc, u64 mmcr[]) } static int ppc970_generic_events[] = { - [PERF_COUNT_CPU_CYCLES] = 7, - [PERF_COUNT_INSTRUCTIONS] = 1, - [PERF_COUNT_CACHE_REFERENCES] = 0x8810, /* PM_LD_REF_L1 */ - [PERF_COUNT_CACHE_MISSES] = 0x3810, /* PM_LD_MISS_L1 */ - [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x431, /* PM_BR_ISSUED */ - [PERF_COUNT_BRANCH_MISSES] = 0x327, /* PM_GRP_BR_MPRED */ + [PERF_COUNT_HW_CPU_CYCLES] = 7, + [PERF_COUNT_HW_INSTRUCTIONS] = 1, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x8810, /* PM_LD_REF_L1 */ + [PERF_COUNT_HW_CACHE_MISSES] = 0x3810, /* PM_LD_MISS_L1 */ + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x431, /* PM_BR_ISSUED */ + [PERF_COUNT_HW_BRANCH_MISSES] = 0x327, /* PM_GRP_BR_MPRED */ }; #define C(x) PERF_COUNT_HW_CACHE_##x diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index ac0e112031b..5beffc8f481 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -171,7 +171,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, die("Weird page fault", regs, SIGSEGV); } - perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs, address); + perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the @@ -312,7 +312,7 @@ good_area: } if (ret & VM_FAULT_MAJOR) { current->maj_flt++; - perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, + perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, regs, address); #ifdef CONFIG_PPC_SMLPAR if (firmware_has_feature(FW_FEATURE_CMO)) { @@ -323,7 +323,7 @@ good_area: #endif } else { current->min_flt++; - perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, + perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, regs, address); } up_read(&mm->mmap_sem); diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 57ae1bec81b..572fb434a66 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -69,13 +69,13 @@ static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = { */ static const u64 intel_perfmon_event_map[] = { - [PERF_COUNT_CPU_CYCLES] = 0x003c, - [PERF_COUNT_INSTRUCTIONS] = 0x00c0, - [PERF_COUNT_CACHE_REFERENCES] = 0x4f2e, - [PERF_COUNT_CACHE_MISSES] = 0x412e, - [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x00c4, - [PERF_COUNT_BRANCH_MISSES] = 0x00c5, - [PERF_COUNT_BUS_CYCLES] = 0x013c, + [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, + [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, + [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, }; static u64 intel_pmu_event_map(int event) @@ -485,12 +485,12 @@ static const u64 amd_0f_hw_cache_event_ids */ static const u64 amd_perfmon_event_map[] = { - [PERF_COUNT_CPU_CYCLES] = 0x0076, - [PERF_COUNT_INSTRUCTIONS] = 0x00c0, - [PERF_COUNT_CACHE_REFERENCES] = 0x0080, - [PERF_COUNT_CACHE_MISSES] = 0x0081, - [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x00c4, - [PERF_COUNT_BRANCH_MISSES] = 0x00c5, + [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, + [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, }; static u64 amd_pmu_event_map(int event) @@ -970,11 +970,11 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc) event = hwc->config & ARCH_PERFMON_EVENT_MASK; - if (unlikely(event == x86_pmu.event_map(PERF_COUNT_INSTRUCTIONS))) + if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) return X86_PMC_IDX_FIXED_INSTRUCTIONS; - if (unlikely(event == x86_pmu.event_map(PERF_COUNT_CPU_CYCLES))) + if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) return X86_PMC_IDX_FIXED_CPU_CYCLES; - if (unlikely(event == x86_pmu.event_map(PERF_COUNT_BUS_CYCLES))) + if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES))) return X86_PMC_IDX_FIXED_BUS_CYCLES; return -1; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 6f9df2babe4..5c6d816f30b 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1045,7 +1045,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) if (unlikely(error_code & PF_RSVD)) pgtable_bad(regs, error_code, address); - perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs, address); + perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); /* * If we're in an interrupt, have no user context or are running @@ -1142,11 +1142,11 @@ good_area: if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; - perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, + perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, regs, address); } else { tsk->min_flt++; - perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, + perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, regs, address); } diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index d5911b02bc8..887df88a9c2 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -42,15 +42,15 @@ enum perf_hw_id { /* * Common hardware events, generalized by the kernel: */ - PERF_COUNT_CPU_CYCLES = 0, - PERF_COUNT_INSTRUCTIONS = 1, - PERF_COUNT_CACHE_REFERENCES = 2, - PERF_COUNT_CACHE_MISSES = 3, - PERF_COUNT_BRANCH_INSTRUCTIONS = 4, - PERF_COUNT_BRANCH_MISSES = 5, - PERF_COUNT_BUS_CYCLES = 6, - - PERF_HW_EVENTS_MAX, /* non ABI */ + PERF_COUNT_HW_CPU_CYCLES = 0, + PERF_COUNT_HW_INSTRUCTIONS = 1, + PERF_COUNT_HW_CACHE_REFERENCES = 2, + PERF_COUNT_HW_CACHE_MISSES = 3, + PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, + PERF_COUNT_HW_BRANCH_MISSES = 5, + PERF_COUNT_HW_BUS_CYCLES = 6, + + PERF_COUNT_HW_MAX, /* non ABI */ }; /* @@ -93,15 +93,15 @@ enum perf_hw_cache_op_result_id { * well): */ enum perf_sw_ids { - PERF_COUNT_CPU_CLOCK = 0, - PERF_COUNT_TASK_CLOCK = 1, - PERF_COUNT_PAGE_FAULTS = 2, - PERF_COUNT_CONTEXT_SWITCHES = 3, - PERF_COUNT_CPU_MIGRATIONS = 4, - PERF_COUNT_PAGE_FAULTS_MIN = 5, - PERF_COUNT_PAGE_FAULTS_MAJ = 6, - - PERF_SW_EVENTS_MAX, /* non ABI */ + PERF_COUNT_SW_CPU_CLOCK = 0, + PERF_COUNT_SW_TASK_CLOCK = 1, + PERF_COUNT_SW_PAGE_FAULTS = 2, + PERF_COUNT_SW_CONTEXT_SWITCHES = 3, + PERF_COUNT_SW_CPU_MIGRATIONS = 4, + PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, + PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, + + PERF_COUNT_SW_MAX, /* non ABI */ }; /* diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index c02535bed26..8859b97390e 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1024,7 +1024,7 @@ void perf_counter_task_sched_out(struct task_struct *task, int do_switch = 1; regs = task_pt_regs(task); - perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs, 0); + perf_swcounter_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, regs, 0); if (likely(!ctx || !cpuctx->task_ctx)) return; @@ -3411,13 +3411,13 @@ void perf_counter_task_migration(struct task_struct *task, int cpu) struct perf_counter_context *ctx; perf_swcounter_ctx_event(&cpuctx->ctx, PERF_TYPE_SOFTWARE, - PERF_COUNT_CPU_MIGRATIONS, + PERF_COUNT_SW_CPU_MIGRATIONS, 1, 1, NULL, 0); ctx = perf_pin_task_context(task); if (ctx) { perf_swcounter_ctx_event(ctx, PERF_TYPE_SOFTWARE, - PERF_COUNT_CPU_MIGRATIONS, + PERF_COUNT_SW_CPU_MIGRATIONS, 1, 1, NULL, 0); perf_unpin_context(ctx); } @@ -3475,11 +3475,11 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) * events. */ switch (counter->attr.config) { - case PERF_COUNT_CPU_CLOCK: + case PERF_COUNT_SW_CPU_CLOCK: pmu = &perf_ops_cpu_clock; break; - case PERF_COUNT_TASK_CLOCK: + case PERF_COUNT_SW_TASK_CLOCK: /* * If the user instantiates this as a per-cpu counter, * use the cpu_clock counter instead. @@ -3490,11 +3490,11 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) pmu = &perf_ops_cpu_clock; break; - case PERF_COUNT_PAGE_FAULTS: - case PERF_COUNT_PAGE_FAULTS_MIN: - case PERF_COUNT_PAGE_FAULTS_MAJ: - case PERF_COUNT_CONTEXT_SWITCHES: - case PERF_COUNT_CPU_MIGRATIONS: + case PERF_COUNT_SW_PAGE_FAULTS: + case PERF_COUNT_SW_PAGE_FAULTS_MIN: + case PERF_COUNT_SW_PAGE_FAULTS_MAJ: + case PERF_COUNT_SW_CONTEXT_SWITCHES: + case PERF_COUNT_SW_CPU_MIGRATIONS: pmu = &perf_ops_generic; break; } diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 84cd336ae79..29259e74dcf 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -378,12 +378,12 @@ try_again: * is always available even if no PMU support: */ if (attr->type == PERF_TYPE_HARDWARE - && attr->config == PERF_COUNT_CPU_CYCLES) { + && attr->config == PERF_COUNT_HW_CPU_CYCLES) { if (verbose) warning(" ... trying to fall back to cpu-clock-ticks\n"); attr->type = PERF_TYPE_SOFTWARE; - attr->config = PERF_COUNT_CPU_CLOCK; + attr->config = PERF_COUNT_SW_CPU_CLOCK; goto try_again; } printf("\n"); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 6404906924f..c43e4a97dc4 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -46,15 +46,16 @@ static struct perf_counter_attr default_attrs[MAX_COUNTERS] = { - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_TASK_CLOCK }, - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_CONTEXT_SWITCHES }, - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_CPU_MIGRATIONS }, - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_PAGE_FAULTS }, - - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_CPU_CYCLES }, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_INSTRUCTIONS }, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_CACHE_REFERENCES }, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_CACHE_MISSES }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES}, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, + + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES}, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES }, + }; static int system_wide = 0; @@ -120,10 +121,10 @@ static inline int nsec_counter(int counter) if (attrs[counter].type != PERF_TYPE_SOFTWARE) return 0; - if (attrs[counter].config == PERF_COUNT_CPU_CLOCK) + if (attrs[counter].config == PERF_COUNT_SW_CPU_CLOCK) return 1; - if (attrs[counter].config == PERF_COUNT_TASK_CLOCK) + if (attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) return 1; return 0; @@ -176,10 +177,10 @@ static void read_counter(int counter) * Save the full runtime - to allow normalization during printout: */ if (attrs[counter].type == PERF_TYPE_SOFTWARE && - attrs[counter].config == PERF_COUNT_TASK_CLOCK) + attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) runtime_nsecs = count[0]; if (attrs[counter].type == PERF_TYPE_HARDWARE && - attrs[counter].config == PERF_COUNT_CPU_CYCLES) + attrs[counter].config == PERF_COUNT_HW_CPU_CYCLES) runtime_cycles = count[0]; } @@ -206,7 +207,7 @@ static void print_counter(int counter) fprintf(stderr, " %14.6f %-20s", msecs, event_name(counter)); if (attrs[counter].type == PERF_TYPE_SOFTWARE && - attrs[counter].config == PERF_COUNT_TASK_CLOCK) { + attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) { if (walltime_nsecs) fprintf(stderr, " # %11.3f CPU utilization factor", @@ -220,7 +221,7 @@ static void print_counter(int counter) (double)count[0]/runtime_nsecs*1000.0); if (runtime_cycles && attrs[counter].type == PERF_TYPE_HARDWARE && - attrs[counter].config == PERF_COUNT_INSTRUCTIONS) { + attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) { fprintf(stderr, " # %1.3f per cycle", (double)count[0] / (double)runtime_cycles); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 309dbc76ec8..fe338d3c5d7 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -562,13 +562,13 @@ try_again: * is always available even if no PMU support: */ if (attr->type == PERF_TYPE_HARDWARE - && attr->config == PERF_COUNT_CPU_CYCLES) { + && attr->config == PERF_COUNT_HW_CPU_CYCLES) { if (verbose) warning(" ... trying to fall back to cpu-clock-ticks\n"); attr->type = PERF_TYPE_SOFTWARE; - attr->config = PERF_COUNT_CPU_CLOCK; + attr->config = PERF_COUNT_SW_CPU_CLOCK; goto try_again; } printf("\n"); diff --git a/tools/perf/design.txt b/tools/perf/design.txt index d3250763dc9..860e116d979 100644 --- a/tools/perf/design.txt +++ b/tools/perf/design.txt @@ -99,13 +99,13 @@ enum hw_event_ids { /* * Common hardware events, generalized by the kernel: */ - PERF_COUNT_CPU_CYCLES = 0, - PERF_COUNT_INSTRUCTIONS = 1, - PERF_COUNT_CACHE_REFERENCES = 2, - PERF_COUNT_CACHE_MISSES = 3, - PERF_COUNT_BRANCH_INSTRUCTIONS = 4, - PERF_COUNT_BRANCH_MISSES = 5, - PERF_COUNT_BUS_CYCLES = 6, + PERF_COUNT_HW_CPU_CYCLES = 0, + PERF_COUNT_HW_INSTRUCTIONS = 1, + PERF_COUNT_HW_CACHE_REFERENCES = 2, + PERF_COUNT_HW_CACHE_MISSES = 3, + PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, + PERF_COUNT_HW_BRANCH_MISSES = 5, + PERF_COUNT_HW_BUS_CYCLES = 6, }; These are standardized types of events that work relatively uniformly @@ -130,13 +130,13 @@ software events, selected by 'event_id': * well): */ enum sw_event_ids { - PERF_COUNT_CPU_CLOCK = 0, - PERF_COUNT_TASK_CLOCK = 1, - PERF_COUNT_PAGE_FAULTS = 2, - PERF_COUNT_CONTEXT_SWITCHES = 3, - PERF_COUNT_CPU_MIGRATIONS = 4, - PERF_COUNT_PAGE_FAULTS_MIN = 5, - PERF_COUNT_PAGE_FAULTS_MAJ = 6, + PERF_COUNT_SW_CPU_CLOCK = 0, + PERF_COUNT_SW_TASK_CLOCK = 1, + PERF_COUNT_SW_PAGE_FAULTS = 2, + PERF_COUNT_SW_CONTEXT_SWITCHES = 3, + PERF_COUNT_SW_CPU_MIGRATIONS = 4, + PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, + PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, }; Counters of the type PERF_TYPE_TRACEPOINT are available when the ftrace event diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index f18a9a006e1..9d5f1ca50e6 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -22,26 +22,26 @@ struct event_symbol { #define CR(x, y) .type = PERF_TYPE_##x, .config = y static struct event_symbol event_symbols[] = { - { C(HARDWARE, CPU_CYCLES), "cpu-cycles", }, - { C(HARDWARE, CPU_CYCLES), "cycles", }, - { C(HARDWARE, INSTRUCTIONS), "instructions", }, - { C(HARDWARE, CACHE_REFERENCES), "cache-references", }, - { C(HARDWARE, CACHE_MISSES), "cache-misses", }, - { C(HARDWARE, BRANCH_INSTRUCTIONS), "branch-instructions", }, - { C(HARDWARE, BRANCH_INSTRUCTIONS), "branches", }, - { C(HARDWARE, BRANCH_MISSES), "branch-misses", }, - { C(HARDWARE, BUS_CYCLES), "bus-cycles", }, - - { C(SOFTWARE, CPU_CLOCK), "cpu-clock", }, - { C(SOFTWARE, TASK_CLOCK), "task-clock", }, - { C(SOFTWARE, PAGE_FAULTS), "page-faults", }, - { C(SOFTWARE, PAGE_FAULTS), "faults", }, - { C(SOFTWARE, PAGE_FAULTS_MIN), "minor-faults", }, - { C(SOFTWARE, PAGE_FAULTS_MAJ), "major-faults", }, - { C(SOFTWARE, CONTEXT_SWITCHES), "context-switches", }, - { C(SOFTWARE, CONTEXT_SWITCHES), "cs", }, - { C(SOFTWARE, CPU_MIGRATIONS), "cpu-migrations", }, - { C(SOFTWARE, CPU_MIGRATIONS), "migrations", }, + { C(HARDWARE, HW_CPU_CYCLES), "cpu-cycles", }, + { C(HARDWARE, HW_CPU_CYCLES), "cycles", }, + { C(HARDWARE, HW_INSTRUCTIONS), "instructions", }, + { C(HARDWARE, HW_CACHE_REFERENCES), "cache-references", }, + { C(HARDWARE, HW_CACHE_MISSES), "cache-misses", }, + { C(HARDWARE, HW_BRANCH_INSTRUCTIONS),"branch-instructions", }, + { C(HARDWARE, HW_BRANCH_INSTRUCTIONS),"branches", }, + { C(HARDWARE, HW_BRANCH_MISSES), "branch-misses", }, + { C(HARDWARE, HW_BUS_CYCLES), "bus-cycles", }, + + { C(SOFTWARE, SW_CPU_CLOCK), "cpu-clock", }, + { C(SOFTWARE, SW_TASK_CLOCK), "task-clock", }, + { C(SOFTWARE, SW_PAGE_FAULTS), "page-faults", }, + { C(SOFTWARE, SW_PAGE_FAULTS), "faults", }, + { C(SOFTWARE, SW_PAGE_FAULTS_MIN), "minor-faults", }, + { C(SOFTWARE, SW_PAGE_FAULTS_MAJ), "major-faults", }, + { C(SOFTWARE, SW_CONTEXT_SWITCHES), "context-switches", }, + { C(SOFTWARE, SW_CONTEXT_SWITCHES), "cs", }, + { C(SOFTWARE, SW_CPU_MIGRATIONS), "cpu-migrations", }, + { C(SOFTWARE, SW_CPU_MIGRATIONS), "migrations", }, }; #define __PERF_COUNTER_FIELD(config, name) \ @@ -107,7 +107,7 @@ char *event_name(int counter) switch (type) { case PERF_TYPE_HARDWARE: - if (config < PERF_HW_EVENTS_MAX) + if (config < PERF_COUNT_HW_MAX) return hw_event_names[config]; return "unknown-hardware"; @@ -136,7 +136,7 @@ char *event_name(int counter) } case PERF_TYPE_SOFTWARE: - if (config < PERF_SW_EVENTS_MAX) + if (config < PERF_COUNT_SW_MAX) return sw_event_names[config]; return "unknown-software"; -- cgit v1.2.3-70-g09d2 From 8be6e8f3c3a13900169f1141870562d0c723b010 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Jun 2009 14:19:11 +0200 Subject: perf_counter: Rename L2 to LL cache The top (fastest) and last level (biggest) caches are the most interesting ones, performance wise. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: [ Fixed the Nehalem LL table to LLC Reference/Miss events ] Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/power4-pmu.c | 2 +- arch/powerpc/kernel/power5+-pmu.c | 2 +- arch/powerpc/kernel/power5-pmu.c | 2 +- arch/powerpc/kernel/power6-pmu.c | 2 +- arch/powerpc/kernel/power7-pmu.c | 2 +- arch/powerpc/kernel/ppc970-pmu.c | 2 +- arch/x86/kernel/cpu/perf_counter.c | 12 ++++++------ include/linux/perf_counter.h | 4 ++-- 8 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c index 73956f084b2..07bd308a5fa 100644 --- a/arch/powerpc/kernel/power4-pmu.c +++ b/arch/powerpc/kernel/power4-pmu.c @@ -561,7 +561,7 @@ static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(OP_WRITE)] = { -1, -1 }, [C(OP_PREFETCH)] = { 0, 0 }, }, - [C(L2)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ [C(OP_READ)] = { 0, 0 }, [C(OP_WRITE)] = { 0, 0 }, [C(OP_PREFETCH)] = { 0xc34, 0 }, diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c index 5f8b7741e97..41e5d2d958d 100644 --- a/arch/powerpc/kernel/power5+-pmu.c +++ b/arch/powerpc/kernel/power5+-pmu.c @@ -632,7 +632,7 @@ static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(OP_WRITE)] = { -1, -1 }, [C(OP_PREFETCH)] = { 0, 0 }, }, - [C(L2)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ [C(OP_READ)] = { 0, 0 }, [C(OP_WRITE)] = { 0, 0 }, [C(OP_PREFETCH)] = { 0xc50c3, 0 }, diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c index d54723ab627..05600b66221 100644 --- a/arch/powerpc/kernel/power5-pmu.c +++ b/arch/powerpc/kernel/power5-pmu.c @@ -574,7 +574,7 @@ static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(OP_WRITE)] = { -1, -1 }, [C(OP_PREFETCH)] = { 0, 0 }, }, - [C(L2)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ [C(OP_READ)] = { 0, 0x3c309b }, [C(OP_WRITE)] = { 0, 0 }, [C(OP_PREFETCH)] = { 0xc50c3, 0 }, diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c index 0cd406ee765..46f74bebcfd 100644 --- a/arch/powerpc/kernel/power6-pmu.c +++ b/arch/powerpc/kernel/power6-pmu.c @@ -493,7 +493,7 @@ static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(OP_WRITE)] = { -1, -1 }, [C(OP_PREFETCH)] = { 0x4008c, 0 }, }, - [C(L2)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ [C(OP_READ)] = { 0x150730, 0x250532 }, [C(OP_WRITE)] = { 0x250432, 0x150432 }, [C(OP_PREFETCH)] = { 0x810a6, 0 }, diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c index 060e0deb399..b3f7d1216ba 100644 --- a/arch/powerpc/kernel/power7-pmu.c +++ b/arch/powerpc/kernel/power7-pmu.c @@ -320,7 +320,7 @@ static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(OP_WRITE)] = { -1, -1 }, [C(OP_PREFETCH)] = { 0x408a, 0 }, }, - [C(L2)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ [C(OP_READ)] = { 0x6080, 0x6084 }, [C(OP_WRITE)] = { 0x6082, 0x6086 }, [C(OP_PREFETCH)] = { 0, 0 }, diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c index 46a20640942..ba0a357a89f 100644 --- a/arch/powerpc/kernel/ppc970-pmu.c +++ b/arch/powerpc/kernel/ppc970-pmu.c @@ -445,7 +445,7 @@ static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(OP_WRITE)] = { -1, -1 }, [C(OP_PREFETCH)] = { 0, 0 }, }, - [C(L2)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ [C(OP_READ)] = { 0, 0 }, [C(OP_WRITE)] = { 0, 0 }, [C(OP_PREFETCH)] = { 0x733, 0 }, diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 572fb434a66..895c82e7845 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -131,7 +131,7 @@ static const u64 nehalem_hw_cache_event_ids [ C(RESULT_MISS) ] = 0x0, }, }, - [ C(L2 ) ] = { + [ C(LL ) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ @@ -141,8 +141,8 @@ static const u64 nehalem_hw_cache_event_ids [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ }, [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0xc024, /* L2_RQSTS.PREFETCHES */ - [ C(RESULT_MISS) ] = 0x8024, /* L2_RQSTS.PREFETCH_MISS */ + [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ + [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ }, }, [ C(DTLB) ] = { @@ -222,7 +222,7 @@ static const u64 core2_hw_cache_event_ids [ C(RESULT_MISS) ] = 0, }, }, - [ C(L2 ) ] = { + [ C(LL ) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ @@ -313,7 +313,7 @@ static const u64 atom_hw_cache_event_ids [ C(RESULT_MISS) ] = 0, }, }, - [ C(L2 ) ] = { + [ C(LL ) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ @@ -422,7 +422,7 @@ static const u64 amd_0f_hw_cache_event_ids [ C(RESULT_MISS) ] = 0, }, }, - [ C(L2 ) ] = { + [ C(LL ) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0, [ C(RESULT_MISS) ] = 0, diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 887df88a9c2..20cf5af27ad 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -56,14 +56,14 @@ enum perf_hw_id { /* * Generalized hardware cache counters: * - * { L1-D, L1-I, L2, LLC, ITLB, DTLB, BPU } x + * { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x * { read, write, prefetch } x * { accesses, misses } */ enum perf_hw_cache_id { PERF_COUNT_HW_CACHE_L1D = 0, PERF_COUNT_HW_CACHE_L1I = 1, - PERF_COUNT_HW_CACHE_L2 = 2, + PERF_COUNT_HW_CACHE_LL = 2, PERF_COUNT_HW_CACHE_DTLB = 3, PERF_COUNT_HW_CACHE_ITLB = 4, PERF_COUNT_HW_CACHE_BPU = 5, -- cgit v1.2.3-70-g09d2 From a308444ceb576d3089f9ca0dfd097eba6f1e623f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 11 Jun 2009 14:44:26 +0200 Subject: perf_counter: Better align code Whitespace and comment bits. Also update copyrights. [ Impact: cleanup ] Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: --- include/linux/perf_counter.h | 165 ++++++++++++++++++++++--------------------- 1 file changed, 85 insertions(+), 80 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 20cf5af27ad..1fa1a26cb1b 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -1,12 +1,13 @@ /* * Performance counters: * - * Copyright(C) 2008, Thomas Gleixner - * Copyright(C) 2008, Red Hat, Inc., Ingo Molnar + * Copyright (C) 2008-2009, Thomas Gleixner + * Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar + * Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra * * Data type definitions, declarations, prototypes. * - * Started by: Thomas Gleixner and Ingo Molnar + * Started by: Thomas Gleixner and Ingo Molnar * * For licencing details see kernel-base/COPYING */ @@ -25,18 +26,19 @@ * attr.type */ enum perf_type_id { - PERF_TYPE_HARDWARE = 0, - PERF_TYPE_SOFTWARE = 1, - PERF_TYPE_TRACEPOINT = 2, - PERF_TYPE_HW_CACHE = 3, - PERF_TYPE_RAW = 4, + PERF_TYPE_HARDWARE = 0, + PERF_TYPE_SOFTWARE = 1, + PERF_TYPE_TRACEPOINT = 2, + PERF_TYPE_HW_CACHE = 3, + PERF_TYPE_RAW = 4, - PERF_TYPE_MAX, /* non ABI */ + PERF_TYPE_MAX, /* non-ABI */ }; /* - * Generalized performance counter event types, used by the attr.event_id - * parameter of the sys_perf_counter_open() syscall: + * Generalized performance counter event types, used by the + * attr.event_id parameter of the sys_perf_counter_open() + * syscall: */ enum perf_hw_id { /* @@ -50,7 +52,7 @@ enum perf_hw_id { PERF_COUNT_HW_BRANCH_MISSES = 5, PERF_COUNT_HW_BUS_CYCLES = 6, - PERF_COUNT_HW_MAX, /* non ABI */ + PERF_COUNT_HW_MAX, /* non-ABI */ }; /* @@ -61,29 +63,29 @@ enum perf_hw_id { * { accesses, misses } */ enum perf_hw_cache_id { - PERF_COUNT_HW_CACHE_L1D = 0, - PERF_COUNT_HW_CACHE_L1I = 1, - PERF_COUNT_HW_CACHE_LL = 2, - PERF_COUNT_HW_CACHE_DTLB = 3, - PERF_COUNT_HW_CACHE_ITLB = 4, - PERF_COUNT_HW_CACHE_BPU = 5, - - PERF_COUNT_HW_CACHE_MAX, /* non ABI */ + PERF_COUNT_HW_CACHE_L1D = 0, + PERF_COUNT_HW_CACHE_L1I = 1, + PERF_COUNT_HW_CACHE_LL = 2, + PERF_COUNT_HW_CACHE_DTLB = 3, + PERF_COUNT_HW_CACHE_ITLB = 4, + PERF_COUNT_HW_CACHE_BPU = 5, + + PERF_COUNT_HW_CACHE_MAX, /* non-ABI */ }; enum perf_hw_cache_op_id { - PERF_COUNT_HW_CACHE_OP_READ = 0, - PERF_COUNT_HW_CACHE_OP_WRITE = 1, - PERF_COUNT_HW_CACHE_OP_PREFETCH = 2, + PERF_COUNT_HW_CACHE_OP_READ = 0, + PERF_COUNT_HW_CACHE_OP_WRITE = 1, + PERF_COUNT_HW_CACHE_OP_PREFETCH = 2, - PERF_COUNT_HW_CACHE_OP_MAX, /* non ABI */ + PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */ }; enum perf_hw_cache_op_result_id { PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0, PERF_COUNT_HW_CACHE_RESULT_MISS = 1, - PERF_COUNT_HW_CACHE_RESULT_MAX, /* non ABI */ + PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */ }; /* @@ -93,15 +95,15 @@ enum perf_hw_cache_op_result_id { * well): */ enum perf_sw_ids { - PERF_COUNT_SW_CPU_CLOCK = 0, - PERF_COUNT_SW_TASK_CLOCK = 1, - PERF_COUNT_SW_PAGE_FAULTS = 2, - PERF_COUNT_SW_CONTEXT_SWITCHES = 3, - PERF_COUNT_SW_CPU_MIGRATIONS = 4, - PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, - PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, - - PERF_COUNT_SW_MAX, /* non ABI */ + PERF_COUNT_SW_CPU_CLOCK = 0, + PERF_COUNT_SW_TASK_CLOCK = 1, + PERF_COUNT_SW_PAGE_FAULTS = 2, + PERF_COUNT_SW_CONTEXT_SWITCHES = 3, + PERF_COUNT_SW_CPU_MIGRATIONS = 4, + PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, + PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, + + PERF_COUNT_SW_MAX, /* non-ABI */ }; /* @@ -109,15 +111,15 @@ enum perf_sw_ids { * in the overflow packets. */ enum perf_counter_sample_format { - PERF_SAMPLE_IP = 1U << 0, - PERF_SAMPLE_TID = 1U << 1, - PERF_SAMPLE_TIME = 1U << 2, - PERF_SAMPLE_ADDR = 1U << 3, - PERF_SAMPLE_GROUP = 1U << 4, - PERF_SAMPLE_CALLCHAIN = 1U << 5, - PERF_SAMPLE_ID = 1U << 6, - PERF_SAMPLE_CPU = 1U << 7, - PERF_SAMPLE_PERIOD = 1U << 8, + PERF_SAMPLE_IP = 1U << 0, + PERF_SAMPLE_TID = 1U << 1, + PERF_SAMPLE_TIME = 1U << 2, + PERF_SAMPLE_ADDR = 1U << 3, + PERF_SAMPLE_GROUP = 1U << 4, + PERF_SAMPLE_CALLCHAIN = 1U << 5, + PERF_SAMPLE_ID = 1U << 6, + PERF_SAMPLE_CPU = 1U << 7, + PERF_SAMPLE_PERIOD = 1U << 8, }; /* @@ -126,9 +128,9 @@ enum perf_counter_sample_format { * in increasing order of bit value, after the counter value. */ enum perf_counter_read_format { - PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, - PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, - PERF_FORMAT_ID = 1U << 2, + PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, + PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, + PERF_FORMAT_ID = 1U << 2, }; /* @@ -229,12 +231,12 @@ struct perf_counter_mmap_page { __u64 data_head; /* head in the data section */ }; -#define PERF_EVENT_MISC_CPUMODE_MASK (3 << 0) -#define PERF_EVENT_MISC_CPUMODE_UNKNOWN (0 << 0) -#define PERF_EVENT_MISC_KERNEL (1 << 0) -#define PERF_EVENT_MISC_USER (2 << 0) -#define PERF_EVENT_MISC_HYPERVISOR (3 << 0) -#define PERF_EVENT_MISC_OVERFLOW (1 << 2) +#define PERF_EVENT_MISC_CPUMODE_MASK (3 << 0) +#define PERF_EVENT_MISC_CPUMODE_UNKNOWN (0 << 0) +#define PERF_EVENT_MISC_KERNEL (1 << 0) +#define PERF_EVENT_MISC_USER (2 << 0) +#define PERF_EVENT_MISC_HYPERVISOR (3 << 0) +#define PERF_EVENT_MISC_OVERFLOW (1 << 2) struct perf_event_header { __u32 type; @@ -351,14 +353,14 @@ struct hw_perf_counter { #ifdef CONFIG_PERF_COUNTERS union { struct { /* hardware */ - u64 config; - unsigned long config_base; - unsigned long counter_base; - int idx; + u64 config; + unsigned long config_base; + unsigned long counter_base; + int idx; }; union { /* software */ - atomic64_t count; - struct hrtimer hrtimer; + atomic64_t count; + struct hrtimer hrtimer; }; }; atomic64_t prev_count; @@ -523,37 +525,37 @@ struct perf_counter_context { * Protect the states of the counters in the list, * nr_active, and the list: */ - spinlock_t lock; + spinlock_t lock; /* * Protect the list of counters. Locking either mutex or lock * is sufficient to ensure the list doesn't change; to change * the list you need to lock both the mutex and the spinlock. */ - struct mutex mutex; + struct mutex mutex; - struct list_head counter_list; - struct list_head event_list; - int nr_counters; - int nr_active; - int is_active; - atomic_t refcount; - struct task_struct *task; + struct list_head counter_list; + struct list_head event_list; + int nr_counters; + int nr_active; + int is_active; + atomic_t refcount; + struct task_struct *task; /* * Context clock, runs when context enabled. */ - u64 time; - u64 timestamp; + u64 time; + u64 timestamp; /* * These fields let us detect when two contexts have both * been cloned (inherited) from a common ancestor. */ - struct perf_counter_context *parent_ctx; - u64 parent_gen; - u64 generation; - int pin_count; - struct rcu_head rcu_head; + struct perf_counter_context *parent_ctx; + u64 parent_gen; + u64 generation; + int pin_count; + struct rcu_head rcu_head; }; /** @@ -604,9 +606,9 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader, extern void perf_counter_update_userpage(struct perf_counter *counter); struct perf_sample_data { - struct pt_regs *regs; - u64 addr; - u64 period; + struct pt_regs *regs; + u64 addr; + u64 period; }; extern int perf_counter_overflow(struct perf_counter *counter, int nmi, @@ -636,11 +638,14 @@ extern void perf_counter_fork(struct task_struct *tsk); extern void perf_counter_task_migration(struct task_struct *task, int cpu); -#define MAX_STACK_DEPTH 255 +#define MAX_STACK_DEPTH 255 struct perf_callchain_entry { - u16 nr, hv, kernel, user; - u64 ip[MAX_STACK_DEPTH]; + u16 nr; + u16 hv; + u16 kernel; + u16 user; + u64 ip[MAX_STACK_DEPTH]; }; extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); -- cgit v1.2.3-70-g09d2 From cca3f454a85ff42d426401bce7ac804541b2bd03 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Jun 2009 14:57:55 +0200 Subject: perf_counter: Add counter->id to the throttle event So as to be able to distuinguish between multiple counters. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 1 + kernel/perf_counter.c | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 1fa1a26cb1b..6e133954e2e 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -286,6 +286,7 @@ enum perf_event_type { * struct { * struct perf_event_header header; * u64 time; + * u64 id; * }; */ PERF_EVENT_THROTTLE = 5, diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 8859b97390e..ef5d8a5b245 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -2950,13 +2950,15 @@ static void perf_log_throttle(struct perf_counter *counter, int enable) struct { struct perf_event_header header; u64 time; + u64 id; } throttle_event = { .header = { .type = PERF_EVENT_THROTTLE + 1, .misc = 0, .size = sizeof(throttle_event), }, - .time = sched_clock(), + .time = sched_clock(), + .id = counter->id, }; ret = perf_output_begin(&handle, counter, sizeof(throttle_event), 1, 0); -- cgit v1.2.3-70-g09d2 From 3c7b4e6b8be4c16f1e6e5c558e33b7ff0db2dfaf Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 11 Jun 2009 13:22:39 +0100 Subject: kmemleak: Add the base support This patch adds the base support for the kernel memory leak detector. It traces the memory allocation/freeing in a way similar to the Boehm's conservative garbage collector, the difference being that the unreferenced objects are not freed but only shown in /sys/kernel/debug/kmemleak. Enabling this feature introduces an overhead to memory allocations. Signed-off-by: Catalin Marinas Cc: Ingo Molnar Acked-by: Pekka Enberg Cc: Andrew Morton Reviewed-by: Paul E. McKenney --- include/linux/kmemleak.h | 96 +++ init/main.c | 4 +- mm/kmemleak.c | 1498 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1597 insertions(+), 1 deletion(-) create mode 100644 include/linux/kmemleak.h create mode 100644 mm/kmemleak.c (limited to 'include') diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h new file mode 100644 index 00000000000..7796aed6cdd --- /dev/null +++ b/include/linux/kmemleak.h @@ -0,0 +1,96 @@ +/* + * include/linux/kmemleak.h + * + * Copyright (C) 2008 ARM Limited + * Written by Catalin Marinas + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __KMEMLEAK_H +#define __KMEMLEAK_H + +#ifdef CONFIG_DEBUG_KMEMLEAK + +extern void kmemleak_init(void); +extern void kmemleak_alloc(const void *ptr, size_t size, int min_count, + gfp_t gfp); +extern void kmemleak_free(const void *ptr); +extern void kmemleak_padding(const void *ptr, unsigned long offset, + size_t size); +extern void kmemleak_not_leak(const void *ptr); +extern void kmemleak_ignore(const void *ptr); +extern void kmemleak_scan_area(const void *ptr, unsigned long offset, + size_t length, gfp_t gfp); +extern void kmemleak_no_scan(const void *ptr); + +static inline void kmemleak_alloc_recursive(const void *ptr, size_t size, + int min_count, unsigned long flags, + gfp_t gfp) +{ + if (!(flags & SLAB_NOLEAKTRACE)) + kmemleak_alloc(ptr, size, min_count, gfp); +} + +static inline void kmemleak_free_recursive(const void *ptr, unsigned long flags) +{ + if (!(flags & SLAB_NOLEAKTRACE)) + kmemleak_free(ptr); +} + +static inline void kmemleak_erase(void **ptr) +{ + *ptr = NULL; +} + +#else + +static inline void kmemleak_init(void) +{ +} +static inline void kmemleak_alloc(const void *ptr, size_t size, int min_count, + gfp_t gfp) +{ +} +static inline void kmemleak_alloc_recursive(const void *ptr, size_t size, + int min_count, unsigned long flags, + gfp_t gfp) +{ +} +static inline void kmemleak_free(const void *ptr) +{ +} +static inline void kmemleak_free_recursive(const void *ptr, unsigned long flags) +{ +} +static inline void kmemleak_not_leak(const void *ptr) +{ +} +static inline void kmemleak_ignore(const void *ptr) +{ +} +static inline void kmemleak_scan_area(const void *ptr, unsigned long offset, + size_t length, gfp_t gfp) +{ +} +static inline void kmemleak_erase(void **ptr) +{ +} +static inline void kmemleak_no_scan(const void *ptr) +{ +} + +#endif /* CONFIG_DEBUG_KMEMLEAK */ + +#endif /* __KMEMLEAK_H */ diff --git a/init/main.c b/init/main.c index bb7dc57eee3..9d759d68d7a 100644 --- a/init/main.c +++ b/init/main.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include #include @@ -603,6 +604,7 @@ asmlinkage void __init start_kernel(void) /* init some links before init_ISA_irqs() */ early_irq_init(); init_IRQ(); + prio_tree_init(); pidhash_init(); init_timers(); hrtimers_init(); @@ -654,6 +656,7 @@ asmlinkage void __init start_kernel(void) cpu_hotplug_init(); kmem_cache_init(); kmemtrace_init(); + kmemleak_init(); debug_objects_mem_init(); idr_init_cache(); setup_per_cpu_pageset(); @@ -663,7 +666,6 @@ asmlinkage void __init start_kernel(void) calibrate_delay(); pidmap_init(); pgtable_cache_init(); - prio_tree_init(); anon_vma_init(); #ifdef CONFIG_X86 if (efi_enabled) diff --git a/mm/kmemleak.c b/mm/kmemleak.c new file mode 100644 index 00000000000..58ec86c9e58 --- /dev/null +++ b/mm/kmemleak.c @@ -0,0 +1,1498 @@ +/* + * mm/kmemleak.c + * + * Copyright (C) 2008 ARM Limited + * Written by Catalin Marinas + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * + * For more information on the algorithm and kmemleak usage, please see + * Documentation/kmemleak.txt. + * + * Notes on locking + * ---------------- + * + * The following locks and mutexes are used by kmemleak: + * + * - kmemleak_lock (rwlock): protects the object_list modifications and + * accesses to the object_tree_root. The object_list is the main list + * holding the metadata (struct kmemleak_object) for the allocated memory + * blocks. The object_tree_root is a priority search tree used to look-up + * metadata based on a pointer to the corresponding memory block. The + * kmemleak_object structures are added to the object_list and + * object_tree_root in the create_object() function called from the + * kmemleak_alloc() callback and removed in delete_object() called from the + * kmemleak_free() callback + * - kmemleak_object.lock (spinlock): protects a kmemleak_object. Accesses to + * the metadata (e.g. count) are protected by this lock. Note that some + * members of this structure may be protected by other means (atomic or + * kmemleak_lock). This lock is also held when scanning the corresponding + * memory block to avoid the kernel freeing it via the kmemleak_free() + * callback. This is less heavyweight than holding a global lock like + * kmemleak_lock during scanning + * - scan_mutex (mutex): ensures that only one thread may scan the memory for + * unreferenced objects at a time. The gray_list contains the objects which + * are already referenced or marked as false positives and need to be + * scanned. This list is only modified during a scanning episode when the + * scan_mutex is held. At the end of a scan, the gray_list is always empty. + * Note that the kmemleak_object.use_count is incremented when an object is + * added to the gray_list and therefore cannot be freed + * - kmemleak_mutex (mutex): prevents multiple users of the "kmemleak" debugfs + * file together with modifications to the memory scanning parameters + * including the scan_thread pointer + * + * The kmemleak_object structures have a use_count incremented or decremented + * using the get_object()/put_object() functions. When the use_count becomes + * 0, this count can no longer be incremented and put_object() schedules the + * kmemleak_object freeing via an RCU callback. All calls to the get_object() + * function must be protected by rcu_read_lock() to avoid accessing a freed + * structure. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +/* + * Kmemleak configuration and common defines. + */ +#define MAX_TRACE 16 /* stack trace length */ +#define REPORTS_NR 50 /* maximum number of reported leaks */ +#define MSECS_MIN_AGE 5000 /* minimum object age for reporting */ +#define MSECS_SCAN_YIELD 10 /* CPU yielding period */ +#define SECS_FIRST_SCAN 60 /* delay before the first scan */ +#define SECS_SCAN_WAIT 600 /* subsequent auto scanning delay */ + +#define BYTES_PER_POINTER sizeof(void *) + +/* scanning area inside a memory block */ +struct kmemleak_scan_area { + struct hlist_node node; + unsigned long offset; + size_t length; +}; + +/* + * Structure holding the metadata for each allocated memory block. + * Modifications to such objects should be made while holding the + * object->lock. Insertions or deletions from object_list, gray_list or + * tree_node are already protected by the corresponding locks or mutex (see + * the notes on locking above). These objects are reference-counted + * (use_count) and freed using the RCU mechanism. + */ +struct kmemleak_object { + spinlock_t lock; + unsigned long flags; /* object status flags */ + struct list_head object_list; + struct list_head gray_list; + struct prio_tree_node tree_node; + struct rcu_head rcu; /* object_list lockless traversal */ + /* object usage count; object freed when use_count == 0 */ + atomic_t use_count; + unsigned long pointer; + size_t size; + /* minimum number of a pointers found before it is considered leak */ + int min_count; + /* the total number of pointers found pointing to this object */ + int count; + /* memory ranges to be scanned inside an object (empty for all) */ + struct hlist_head area_list; + unsigned long trace[MAX_TRACE]; + unsigned int trace_len; + unsigned long jiffies; /* creation timestamp */ + pid_t pid; /* pid of the current task */ + char comm[TASK_COMM_LEN]; /* executable name */ +}; + +/* flag representing the memory block allocation status */ +#define OBJECT_ALLOCATED (1 << 0) +/* flag set after the first reporting of an unreference object */ +#define OBJECT_REPORTED (1 << 1) +/* flag set to not scan the object */ +#define OBJECT_NO_SCAN (1 << 2) + +/* the list of all allocated objects */ +static LIST_HEAD(object_list); +/* the list of gray-colored objects (see color_gray comment below) */ +static LIST_HEAD(gray_list); +/* prio search tree for object boundaries */ +static struct prio_tree_root object_tree_root; +/* rw_lock protecting the access to object_list and prio_tree_root */ +static DEFINE_RWLOCK(kmemleak_lock); + +/* allocation caches for kmemleak internal data */ +static struct kmem_cache *object_cache; +static struct kmem_cache *scan_area_cache; + +/* set if tracing memory operations is enabled */ +static atomic_t kmemleak_enabled = ATOMIC_INIT(0); +/* set in the late_initcall if there were no errors */ +static atomic_t kmemleak_initialized = ATOMIC_INIT(0); +/* enables or disables early logging of the memory operations */ +static atomic_t kmemleak_early_log = ATOMIC_INIT(1); +/* set if a fata kmemleak error has occurred */ +static atomic_t kmemleak_error = ATOMIC_INIT(0); + +/* minimum and maximum address that may be valid pointers */ +static unsigned long min_addr = ULONG_MAX; +static unsigned long max_addr; + +/* used for yielding the CPU to other tasks during scanning */ +static unsigned long next_scan_yield; +static struct task_struct *scan_thread; +static unsigned long jiffies_scan_yield; +static unsigned long jiffies_min_age; +/* delay between automatic memory scannings */ +static signed long jiffies_scan_wait; +/* enables or disables the task stacks scanning */ +static int kmemleak_stack_scan; +/* mutex protecting the memory scanning */ +static DEFINE_MUTEX(scan_mutex); +/* mutex protecting the access to the /sys/kernel/debug/kmemleak file */ +static DEFINE_MUTEX(kmemleak_mutex); + +/* number of leaks reported (for limitation purposes) */ +static int reported_leaks; + +/* + * Early object allocation/freeing logging. Kkmemleak is initialized after the + * kernel allocator. However, both the kernel allocator and kmemleak may + * allocate memory blocks which need to be tracked. Kkmemleak defines an + * arbitrary buffer to hold the allocation/freeing information before it is + * fully initialized. + */ + +/* kmemleak operation type for early logging */ +enum { + KMEMLEAK_ALLOC, + KMEMLEAK_FREE, + KMEMLEAK_NOT_LEAK, + KMEMLEAK_IGNORE, + KMEMLEAK_SCAN_AREA, + KMEMLEAK_NO_SCAN +}; + +/* + * Structure holding the information passed to kmemleak callbacks during the + * early logging. + */ +struct early_log { + int op_type; /* kmemleak operation type */ + const void *ptr; /* allocated/freed memory block */ + size_t size; /* memory block size */ + int min_count; /* minimum reference count */ + unsigned long offset; /* scan area offset */ + size_t length; /* scan area length */ +}; + +/* early logging buffer and current position */ +static struct early_log early_log[200]; +static int crt_early_log; + +static void kmemleak_disable(void); + +/* + * Print a warning and dump the stack trace. + */ +#define kmemleak_warn(x...) do { \ + pr_warning(x); \ + dump_stack(); \ +} while (0) + +/* + * Macro invoked when a serious kmemleak condition occured and cannot be + * recovered from. Kkmemleak will be disabled and further allocation/freeing + * tracing no longer available. + */ +#define kmemleak_panic(x...) do { \ + kmemleak_warn(x); \ + kmemleak_disable(); \ +} while (0) + +/* + * Object colors, encoded with count and min_count: + * - white - orphan object, not enough references to it (count < min_count) + * - gray - not orphan, not marked as false positive (min_count == 0) or + * sufficient references to it (count >= min_count) + * - black - ignore, it doesn't contain references (e.g. text section) + * (min_count == -1). No function defined for this color. + * Newly created objects don't have any color assigned (object->count == -1) + * before the next memory scan when they become white. + */ +static int color_white(const struct kmemleak_object *object) +{ + return object->count != -1 && object->count < object->min_count; +} + +static int color_gray(const struct kmemleak_object *object) +{ + return object->min_count != -1 && object->count >= object->min_count; +} + +/* + * Objects are considered referenced if their color is gray and they have not + * been deleted. + */ +static int referenced_object(struct kmemleak_object *object) +{ + return (object->flags & OBJECT_ALLOCATED) && color_gray(object); +} + +/* + * Objects are considered unreferenced only if their color is white, they have + * not be deleted and have a minimum age to avoid false positives caused by + * pointers temporarily stored in CPU registers. + */ +static int unreferenced_object(struct kmemleak_object *object) +{ + return (object->flags & OBJECT_ALLOCATED) && color_white(object) && + time_is_before_eq_jiffies(object->jiffies + jiffies_min_age); +} + +/* + * Printing of the (un)referenced objects information, either to the seq file + * or to the kernel log. The print_referenced/print_unreferenced functions + * must be called with the object->lock held. + */ +#define print_helper(seq, x...) do { \ + struct seq_file *s = (seq); \ + if (s) \ + seq_printf(s, x); \ + else \ + pr_info(x); \ +} while (0) + +static void print_referenced(struct kmemleak_object *object) +{ + pr_info("kmemleak: referenced object 0x%08lx (size %zu)\n", + object->pointer, object->size); +} + +static void print_unreferenced(struct seq_file *seq, + struct kmemleak_object *object) +{ + int i; + + print_helper(seq, "kmemleak: unreferenced object 0x%08lx (size %zu):\n", + object->pointer, object->size); + print_helper(seq, " comm \"%s\", pid %d, jiffies %lu\n", + object->comm, object->pid, object->jiffies); + print_helper(seq, " backtrace:\n"); + + for (i = 0; i < object->trace_len; i++) { + void *ptr = (void *)object->trace[i]; + print_helper(seq, " [<%p>] %pS\n", ptr, ptr); + } +} + +/* + * Print the kmemleak_object information. This function is used mainly for + * debugging special cases when kmemleak operations. It must be called with + * the object->lock held. + */ +static void dump_object_info(struct kmemleak_object *object) +{ + struct stack_trace trace; + + trace.nr_entries = object->trace_len; + trace.entries = object->trace; + + pr_notice("kmemleak: Object 0x%08lx (size %zu):\n", + object->tree_node.start, object->size); + pr_notice(" comm \"%s\", pid %d, jiffies %lu\n", + object->comm, object->pid, object->jiffies); + pr_notice(" min_count = %d\n", object->min_count); + pr_notice(" count = %d\n", object->count); + pr_notice(" backtrace:\n"); + print_stack_trace(&trace, 4); +} + +/* + * Look-up a memory block metadata (kmemleak_object) in the priority search + * tree based on a pointer value. If alias is 0, only values pointing to the + * beginning of the memory block are allowed. The kmemleak_lock must be held + * when calling this function. + */ +static struct kmemleak_object *lookup_object(unsigned long ptr, int alias) +{ + struct prio_tree_node *node; + struct prio_tree_iter iter; + struct kmemleak_object *object; + + prio_tree_iter_init(&iter, &object_tree_root, ptr, ptr); + node = prio_tree_next(&iter); + if (node) { + object = prio_tree_entry(node, struct kmemleak_object, + tree_node); + if (!alias && object->pointer != ptr) { + kmemleak_warn("kmemleak: Found object by alias"); + object = NULL; + } + } else + object = NULL; + + return object; +} + +/* + * Increment the object use_count. Return 1 if successful or 0 otherwise. Note + * that once an object's use_count reached 0, the RCU freeing was already + * registered and the object should no longer be used. This function must be + * called under the protection of rcu_read_lock(). + */ +static int get_object(struct kmemleak_object *object) +{ + return atomic_inc_not_zero(&object->use_count); +} + +/* + * RCU callback to free a kmemleak_object. + */ +static void free_object_rcu(struct rcu_head *rcu) +{ + struct hlist_node *elem, *tmp; + struct kmemleak_scan_area *area; + struct kmemleak_object *object = + container_of(rcu, struct kmemleak_object, rcu); + + /* + * Once use_count is 0 (guaranteed by put_object), there is no other + * code accessing this object, hence no need for locking. + */ + hlist_for_each_entry_safe(area, elem, tmp, &object->area_list, node) { + hlist_del(elem); + kmem_cache_free(scan_area_cache, area); + } + kmem_cache_free(object_cache, object); +} + +/* + * Decrement the object use_count. Once the count is 0, free the object using + * an RCU callback. Since put_object() may be called via the kmemleak_free() -> + * delete_object() path, the delayed RCU freeing ensures that there is no + * recursive call to the kernel allocator. Lock-less RCU object_list traversal + * is also possible. + */ +static void put_object(struct kmemleak_object *object) +{ + if (!atomic_dec_and_test(&object->use_count)) + return; + + /* should only get here after delete_object was called */ + WARN_ON(object->flags & OBJECT_ALLOCATED); + + call_rcu(&object->rcu, free_object_rcu); +} + +/* + * Look up an object in the prio search tree and increase its use_count. + */ +static struct kmemleak_object *find_and_get_object(unsigned long ptr, int alias) +{ + unsigned long flags; + struct kmemleak_object *object = NULL; + + rcu_read_lock(); + read_lock_irqsave(&kmemleak_lock, flags); + if (ptr >= min_addr && ptr < max_addr) + object = lookup_object(ptr, alias); + read_unlock_irqrestore(&kmemleak_lock, flags); + + /* check whether the object is still available */ + if (object && !get_object(object)) + object = NULL; + rcu_read_unlock(); + + return object; +} + +/* + * Create the metadata (struct kmemleak_object) corresponding to an allocated + * memory block and add it to the object_list and object_tree_root. + */ +static void create_object(unsigned long ptr, size_t size, int min_count, + gfp_t gfp) +{ + unsigned long flags; + struct kmemleak_object *object; + struct prio_tree_node *node; + struct stack_trace trace; + + object = kmem_cache_alloc(object_cache, gfp & ~GFP_SLAB_BUG_MASK); + if (!object) { + kmemleak_panic("kmemleak: Cannot allocate a kmemleak_object " + "structure\n"); + return; + } + + INIT_LIST_HEAD(&object->object_list); + INIT_LIST_HEAD(&object->gray_list); + INIT_HLIST_HEAD(&object->area_list); + spin_lock_init(&object->lock); + atomic_set(&object->use_count, 1); + object->flags = OBJECT_ALLOCATED; + object->pointer = ptr; + object->size = size; + object->min_count = min_count; + object->count = -1; /* no color initially */ + object->jiffies = jiffies; + + /* task information */ + if (in_irq()) { + object->pid = 0; + strncpy(object->comm, "hardirq", sizeof(object->comm)); + } else if (in_softirq()) { + object->pid = 0; + strncpy(object->comm, "softirq", sizeof(object->comm)); + } else { + object->pid = current->pid; + /* + * There is a small chance of a race with set_task_comm(), + * however using get_task_comm() here may cause locking + * dependency issues with current->alloc_lock. In the worst + * case, the command line is not correct. + */ + strncpy(object->comm, current->comm, sizeof(object->comm)); + } + + /* kernel backtrace */ + trace.max_entries = MAX_TRACE; + trace.nr_entries = 0; + trace.entries = object->trace; + trace.skip = 1; + save_stack_trace(&trace); + object->trace_len = trace.nr_entries; + + INIT_PRIO_TREE_NODE(&object->tree_node); + object->tree_node.start = ptr; + object->tree_node.last = ptr + size - 1; + + write_lock_irqsave(&kmemleak_lock, flags); + min_addr = min(min_addr, ptr); + max_addr = max(max_addr, ptr + size); + node = prio_tree_insert(&object_tree_root, &object->tree_node); + /* + * The code calling the kernel does not yet have the pointer to the + * memory block to be able to free it. However, we still hold the + * kmemleak_lock here in case parts of the kernel started freeing + * random memory blocks. + */ + if (node != &object->tree_node) { + unsigned long flags; + + kmemleak_panic("kmemleak: Cannot insert 0x%lx into the object " + "search tree (already existing)\n", ptr); + object = lookup_object(ptr, 1); + spin_lock_irqsave(&object->lock, flags); + dump_object_info(object); + spin_unlock_irqrestore(&object->lock, flags); + + goto out; + } + list_add_tail_rcu(&object->object_list, &object_list); +out: + write_unlock_irqrestore(&kmemleak_lock, flags); +} + +/* + * Remove the metadata (struct kmemleak_object) for a memory block from the + * object_list and object_tree_root and decrement its use_count. + */ +static void delete_object(unsigned long ptr) +{ + unsigned long flags; + struct kmemleak_object *object; + + write_lock_irqsave(&kmemleak_lock, flags); + object = lookup_object(ptr, 0); + if (!object) { + kmemleak_warn("kmemleak: Freeing unknown object at 0x%08lx\n", + ptr); + write_unlock_irqrestore(&kmemleak_lock, flags); + return; + } + prio_tree_remove(&object_tree_root, &object->tree_node); + list_del_rcu(&object->object_list); + write_unlock_irqrestore(&kmemleak_lock, flags); + + WARN_ON(!(object->flags & OBJECT_ALLOCATED)); + WARN_ON(atomic_read(&object->use_count) < 1); + + /* + * Locking here also ensures that the corresponding memory block + * cannot be freed when it is being scanned. + */ + spin_lock_irqsave(&object->lock, flags); + if (object->flags & OBJECT_REPORTED) + print_referenced(object); + object->flags &= ~OBJECT_ALLOCATED; + spin_unlock_irqrestore(&object->lock, flags); + put_object(object); +} + +/* + * Make a object permanently as gray-colored so that it can no longer be + * reported as a leak. This is used in general to mark a false positive. + */ +static void make_gray_object(unsigned long ptr) +{ + unsigned long flags; + struct kmemleak_object *object; + + object = find_and_get_object(ptr, 0); + if (!object) { + kmemleak_warn("kmemleak: Graying unknown object at 0x%08lx\n", + ptr); + return; + } + + spin_lock_irqsave(&object->lock, flags); + object->min_count = 0; + spin_unlock_irqrestore(&object->lock, flags); + put_object(object); +} + +/* + * Mark the object as black-colored so that it is ignored from scans and + * reporting. + */ +static void make_black_object(unsigned long ptr) +{ + unsigned long flags; + struct kmemleak_object *object; + + object = find_and_get_object(ptr, 0); + if (!object) { + kmemleak_warn("kmemleak: Blacking unknown object at 0x%08lx\n", + ptr); + return; + } + + spin_lock_irqsave(&object->lock, flags); + object->min_count = -1; + spin_unlock_irqrestore(&object->lock, flags); + put_object(object); +} + +/* + * Add a scanning area to the object. If at least one such area is added, + * kmemleak will only scan these ranges rather than the whole memory block. + */ +static void add_scan_area(unsigned long ptr, unsigned long offset, + size_t length, gfp_t gfp) +{ + unsigned long flags; + struct kmemleak_object *object; + struct kmemleak_scan_area *area; + + object = find_and_get_object(ptr, 0); + if (!object) { + kmemleak_warn("kmemleak: Adding scan area to unknown " + "object at 0x%08lx\n", ptr); + return; + } + + area = kmem_cache_alloc(scan_area_cache, gfp & ~GFP_SLAB_BUG_MASK); + if (!area) { + kmemleak_warn("kmemleak: Cannot allocate a scan area\n"); + goto out; + } + + spin_lock_irqsave(&object->lock, flags); + if (offset + length > object->size) { + kmemleak_warn("kmemleak: Scan area larger than object " + "0x%08lx\n", ptr); + dump_object_info(object); + kmem_cache_free(scan_area_cache, area); + goto out_unlock; + } + + INIT_HLIST_NODE(&area->node); + area->offset = offset; + area->length = length; + + hlist_add_head(&area->node, &object->area_list); +out_unlock: + spin_unlock_irqrestore(&object->lock, flags); +out: + put_object(object); +} + +/* + * Set the OBJECT_NO_SCAN flag for the object corresponding to the give + * pointer. Such object will not be scanned by kmemleak but references to it + * are searched. + */ +static void object_no_scan(unsigned long ptr) +{ + unsigned long flags; + struct kmemleak_object *object; + + object = find_and_get_object(ptr, 0); + if (!object) { + kmemleak_warn("kmemleak: Not scanning unknown object at " + "0x%08lx\n", ptr); + return; + } + + spin_lock_irqsave(&object->lock, flags); + object->flags |= OBJECT_NO_SCAN; + spin_unlock_irqrestore(&object->lock, flags); + put_object(object); +} + +/* + * Log an early kmemleak_* call to the early_log buffer. These calls will be + * processed later once kmemleak is fully initialized. + */ +static void log_early(int op_type, const void *ptr, size_t size, + int min_count, unsigned long offset, size_t length) +{ + unsigned long flags; + struct early_log *log; + + if (crt_early_log >= ARRAY_SIZE(early_log)) { + kmemleak_panic("kmemleak: Early log buffer exceeded\n"); + return; + } + + /* + * There is no need for locking since the kernel is still in UP mode + * at this stage. Disabling the IRQs is enough. + */ + local_irq_save(flags); + log = &early_log[crt_early_log]; + log->op_type = op_type; + log->ptr = ptr; + log->size = size; + log->min_count = min_count; + log->offset = offset; + log->length = length; + crt_early_log++; + local_irq_restore(flags); +} + +/* + * Memory allocation function callback. This function is called from the + * kernel allocators when a new block is allocated (kmem_cache_alloc, kmalloc, + * vmalloc etc.). + */ +void kmemleak_alloc(const void *ptr, size_t size, int min_count, gfp_t gfp) +{ + pr_debug("%s(0x%p, %zu, %d)\n", __func__, ptr, size, min_count); + + if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) + create_object((unsigned long)ptr, size, min_count, gfp); + else if (atomic_read(&kmemleak_early_log)) + log_early(KMEMLEAK_ALLOC, ptr, size, min_count, 0, 0); +} +EXPORT_SYMBOL_GPL(kmemleak_alloc); + +/* + * Memory freeing function callback. This function is called from the kernel + * allocators when a block is freed (kmem_cache_free, kfree, vfree etc.). + */ +void kmemleak_free(const void *ptr) +{ + pr_debug("%s(0x%p)\n", __func__, ptr); + + if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) + delete_object((unsigned long)ptr); + else if (atomic_read(&kmemleak_early_log)) + log_early(KMEMLEAK_FREE, ptr, 0, 0, 0, 0); +} +EXPORT_SYMBOL_GPL(kmemleak_free); + +/* + * Mark an already allocated memory block as a false positive. This will cause + * the block to no longer be reported as leak and always be scanned. + */ +void kmemleak_not_leak(const void *ptr) +{ + pr_debug("%s(0x%p)\n", __func__, ptr); + + if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) + make_gray_object((unsigned long)ptr); + else if (atomic_read(&kmemleak_early_log)) + log_early(KMEMLEAK_NOT_LEAK, ptr, 0, 0, 0, 0); +} +EXPORT_SYMBOL(kmemleak_not_leak); + +/* + * Ignore a memory block. This is usually done when it is known that the + * corresponding block is not a leak and does not contain any references to + * other allocated memory blocks. + */ +void kmemleak_ignore(const void *ptr) +{ + pr_debug("%s(0x%p)\n", __func__, ptr); + + if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) + make_black_object((unsigned long)ptr); + else if (atomic_read(&kmemleak_early_log)) + log_early(KMEMLEAK_IGNORE, ptr, 0, 0, 0, 0); +} +EXPORT_SYMBOL(kmemleak_ignore); + +/* + * Limit the range to be scanned in an allocated memory block. + */ +void kmemleak_scan_area(const void *ptr, unsigned long offset, size_t length, + gfp_t gfp) +{ + pr_debug("%s(0x%p)\n", __func__, ptr); + + if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) + add_scan_area((unsigned long)ptr, offset, length, gfp); + else if (atomic_read(&kmemleak_early_log)) + log_early(KMEMLEAK_SCAN_AREA, ptr, 0, 0, offset, length); +} +EXPORT_SYMBOL(kmemleak_scan_area); + +/* + * Inform kmemleak not to scan the given memory block. + */ +void kmemleak_no_scan(const void *ptr) +{ + pr_debug("%s(0x%p)\n", __func__, ptr); + + if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) + object_no_scan((unsigned long)ptr); + else if (atomic_read(&kmemleak_early_log)) + log_early(KMEMLEAK_NO_SCAN, ptr, 0, 0, 0, 0); +} +EXPORT_SYMBOL(kmemleak_no_scan); + +/* + * Yield the CPU so that other tasks get a chance to run. The yielding is + * rate-limited to avoid excessive number of calls to the schedule() function + * during memory scanning. + */ +static void scan_yield(void) +{ + might_sleep(); + + if (time_is_before_eq_jiffies(next_scan_yield)) { + schedule(); + next_scan_yield = jiffies + jiffies_scan_yield; + } +} + +/* + * Memory scanning is a long process and it needs to be interruptable. This + * function checks whether such interrupt condition occured. + */ +static int scan_should_stop(void) +{ + if (!atomic_read(&kmemleak_enabled)) + return 1; + + /* + * This function may be called from either process or kthread context, + * hence the need to check for both stop conditions. + */ + if (current->mm) + return signal_pending(current); + else + return kthread_should_stop(); + + return 0; +} + +/* + * Scan a memory block (exclusive range) for valid pointers and add those + * found to the gray list. + */ +static void scan_block(void *_start, void *_end, + struct kmemleak_object *scanned) +{ + unsigned long *ptr; + unsigned long *start = PTR_ALIGN(_start, BYTES_PER_POINTER); + unsigned long *end = _end - (BYTES_PER_POINTER - 1); + + for (ptr = start; ptr < end; ptr++) { + unsigned long flags; + unsigned long pointer = *ptr; + struct kmemleak_object *object; + + if (scan_should_stop()) + break; + + /* + * When scanning a memory block with a corresponding + * kmemleak_object, the CPU yielding is handled in the calling + * code since it holds the object->lock to avoid the block + * freeing. + */ + if (!scanned) + scan_yield(); + + object = find_and_get_object(pointer, 1); + if (!object) + continue; + if (object == scanned) { + /* self referenced, ignore */ + put_object(object); + continue; + } + + /* + * Avoid the lockdep recursive warning on object->lock being + * previously acquired in scan_object(). These locks are + * enclosed by scan_mutex. + */ + spin_lock_irqsave_nested(&object->lock, flags, + SINGLE_DEPTH_NESTING); + if (!color_white(object)) { + /* non-orphan, ignored or new */ + spin_unlock_irqrestore(&object->lock, flags); + put_object(object); + continue; + } + + /* + * Increase the object's reference count (number of pointers + * to the memory block). If this count reaches the required + * minimum, the object's color will become gray and it will be + * added to the gray_list. + */ + object->count++; + if (color_gray(object)) + list_add_tail(&object->gray_list, &gray_list); + else + put_object(object); + spin_unlock_irqrestore(&object->lock, flags); + } +} + +/* + * Scan a memory block corresponding to a kmemleak_object. A condition is + * that object->use_count >= 1. + */ +static void scan_object(struct kmemleak_object *object) +{ + struct kmemleak_scan_area *area; + struct hlist_node *elem; + unsigned long flags; + + /* + * Once the object->lock is aquired, the corresponding memory block + * cannot be freed (the same lock is aquired in delete_object). + */ + spin_lock_irqsave(&object->lock, flags); + if (object->flags & OBJECT_NO_SCAN) + goto out; + if (!(object->flags & OBJECT_ALLOCATED)) + /* already freed object */ + goto out; + if (hlist_empty(&object->area_list)) + scan_block((void *)object->pointer, + (void *)(object->pointer + object->size), object); + else + hlist_for_each_entry(area, elem, &object->area_list, node) + scan_block((void *)(object->pointer + area->offset), + (void *)(object->pointer + area->offset + + area->length), object); +out: + spin_unlock_irqrestore(&object->lock, flags); +} + +/* + * Scan data sections and all the referenced memory blocks allocated via the + * kernel's standard allocators. This function must be called with the + * scan_mutex held. + */ +static void kmemleak_scan(void) +{ + unsigned long flags; + struct kmemleak_object *object, *tmp; + struct task_struct *task; + int i; + + /* prepare the kmemleak_object's */ + rcu_read_lock(); + list_for_each_entry_rcu(object, &object_list, object_list) { + spin_lock_irqsave(&object->lock, flags); +#ifdef DEBUG + /* + * With a few exceptions there should be a maximum of + * 1 reference to any object at this point. + */ + if (atomic_read(&object->use_count) > 1) { + pr_debug("kmemleak: object->use_count = %d\n", + atomic_read(&object->use_count)); + dump_object_info(object); + } +#endif + /* reset the reference count (whiten the object) */ + object->count = 0; + if (color_gray(object) && get_object(object)) + list_add_tail(&object->gray_list, &gray_list); + + spin_unlock_irqrestore(&object->lock, flags); + } + rcu_read_unlock(); + + /* data/bss scanning */ + scan_block(_sdata, _edata, NULL); + scan_block(__bss_start, __bss_stop, NULL); + +#ifdef CONFIG_SMP + /* per-cpu sections scanning */ + for_each_possible_cpu(i) + scan_block(__per_cpu_start + per_cpu_offset(i), + __per_cpu_end + per_cpu_offset(i), NULL); +#endif + + /* + * Struct page scanning for each node. The code below is not yet safe + * with MEMORY_HOTPLUG. + */ + for_each_online_node(i) { + pg_data_t *pgdat = NODE_DATA(i); + unsigned long start_pfn = pgdat->node_start_pfn; + unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages; + unsigned long pfn; + + for (pfn = start_pfn; pfn < end_pfn; pfn++) { + struct page *page; + + if (!pfn_valid(pfn)) + continue; + page = pfn_to_page(pfn); + /* only scan if page is in use */ + if (page_count(page) == 0) + continue; + scan_block(page, page + 1, NULL); + } + } + + /* + * Scanning the task stacks may introduce false negatives and it is + * not enabled by default. + */ + if (kmemleak_stack_scan) { + read_lock(&tasklist_lock); + for_each_process(task) + scan_block(task_stack_page(task), + task_stack_page(task) + THREAD_SIZE, NULL); + read_unlock(&tasklist_lock); + } + + /* + * Scan the objects already referenced from the sections scanned + * above. More objects will be referenced and, if there are no memory + * leaks, all the objects will be scanned. The list traversal is safe + * for both tail additions and removals from inside the loop. The + * kmemleak objects cannot be freed from outside the loop because their + * use_count was increased. + */ + object = list_entry(gray_list.next, typeof(*object), gray_list); + while (&object->gray_list != &gray_list) { + scan_yield(); + + /* may add new objects to the list */ + if (!scan_should_stop()) + scan_object(object); + + tmp = list_entry(object->gray_list.next, typeof(*object), + gray_list); + + /* remove the object from the list and release it */ + list_del(&object->gray_list); + put_object(object); + + object = tmp; + } + WARN_ON(!list_empty(&gray_list)); +} + +/* + * Thread function performing automatic memory scanning. Unreferenced objects + * at the end of a memory scan are reported but only the first time. + */ +static int kmemleak_scan_thread(void *arg) +{ + static int first_run = 1; + + pr_info("kmemleak: Automatic memory scanning thread started\n"); + + /* + * Wait before the first scan to allow the system to fully initialize. + */ + if (first_run) { + first_run = 0; + ssleep(SECS_FIRST_SCAN); + } + + while (!kthread_should_stop()) { + struct kmemleak_object *object; + signed long timeout = jiffies_scan_wait; + + mutex_lock(&scan_mutex); + + kmemleak_scan(); + reported_leaks = 0; + + rcu_read_lock(); + list_for_each_entry_rcu(object, &object_list, object_list) { + unsigned long flags; + + if (reported_leaks >= REPORTS_NR) + break; + spin_lock_irqsave(&object->lock, flags); + if (!(object->flags & OBJECT_REPORTED) && + unreferenced_object(object)) { + print_unreferenced(NULL, object); + object->flags |= OBJECT_REPORTED; + reported_leaks++; + } else if ((object->flags & OBJECT_REPORTED) && + referenced_object(object)) { + print_referenced(object); + object->flags &= ~OBJECT_REPORTED; + } + spin_unlock_irqrestore(&object->lock, flags); + } + rcu_read_unlock(); + + mutex_unlock(&scan_mutex); + /* wait before the next scan */ + while (timeout && !kthread_should_stop()) + timeout = schedule_timeout_interruptible(timeout); + } + + pr_info("kmemleak: Automatic memory scanning thread ended\n"); + + return 0; +} + +/* + * Start the automatic memory scanning thread. This function must be called + * with the kmemleak_mutex held. + */ +void start_scan_thread(void) +{ + if (scan_thread) + return; + scan_thread = kthread_run(kmemleak_scan_thread, NULL, "kmemleak"); + if (IS_ERR(scan_thread)) { + pr_warning("kmemleak: Failed to create the scan thread\n"); + scan_thread = NULL; + } +} + +/* + * Stop the automatic memory scanning thread. This function must be called + * with the kmemleak_mutex held. + */ +void stop_scan_thread(void) +{ + if (scan_thread) { + kthread_stop(scan_thread); + scan_thread = NULL; + } +} + +/* + * Iterate over the object_list and return the first valid object at or after + * the required position with its use_count incremented. The function triggers + * a memory scanning when the pos argument points to the first position. + */ +static void *kmemleak_seq_start(struct seq_file *seq, loff_t *pos) +{ + struct kmemleak_object *object; + loff_t n = *pos; + + if (!n) { + kmemleak_scan(); + reported_leaks = 0; + } + if (reported_leaks >= REPORTS_NR) + return NULL; + + rcu_read_lock(); + list_for_each_entry_rcu(object, &object_list, object_list) { + if (n-- > 0) + continue; + if (get_object(object)) + goto out; + } + object = NULL; +out: + rcu_read_unlock(); + return object; +} + +/* + * Return the next object in the object_list. The function decrements the + * use_count of the previous object and increases that of the next one. + */ +static void *kmemleak_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct kmemleak_object *prev_obj = v; + struct kmemleak_object *next_obj = NULL; + struct list_head *n = &prev_obj->object_list; + + ++(*pos); + if (reported_leaks >= REPORTS_NR) + goto out; + + rcu_read_lock(); + list_for_each_continue_rcu(n, &object_list) { + next_obj = list_entry(n, struct kmemleak_object, object_list); + if (get_object(next_obj)) + break; + } + rcu_read_unlock(); +out: + put_object(prev_obj); + return next_obj; +} + +/* + * Decrement the use_count of the last object required, if any. + */ +static void kmemleak_seq_stop(struct seq_file *seq, void *v) +{ + if (v) + put_object(v); +} + +/* + * Print the information for an unreferenced object to the seq file. + */ +static int kmemleak_seq_show(struct seq_file *seq, void *v) +{ + struct kmemleak_object *object = v; + unsigned long flags; + + spin_lock_irqsave(&object->lock, flags); + if (!unreferenced_object(object)) + goto out; + print_unreferenced(seq, object); + reported_leaks++; +out: + spin_unlock_irqrestore(&object->lock, flags); + return 0; +} + +static const struct seq_operations kmemleak_seq_ops = { + .start = kmemleak_seq_start, + .next = kmemleak_seq_next, + .stop = kmemleak_seq_stop, + .show = kmemleak_seq_show, +}; + +static int kmemleak_open(struct inode *inode, struct file *file) +{ + int ret = 0; + + if (!atomic_read(&kmemleak_enabled)) + return -EBUSY; + + ret = mutex_lock_interruptible(&kmemleak_mutex); + if (ret < 0) + goto out; + if (file->f_mode & FMODE_READ) { + ret = mutex_lock_interruptible(&scan_mutex); + if (ret < 0) + goto kmemleak_unlock; + ret = seq_open(file, &kmemleak_seq_ops); + if (ret < 0) + goto scan_unlock; + } + return ret; + +scan_unlock: + mutex_unlock(&scan_mutex); +kmemleak_unlock: + mutex_unlock(&kmemleak_mutex); +out: + return ret; +} + +static int kmemleak_release(struct inode *inode, struct file *file) +{ + int ret = 0; + + if (file->f_mode & FMODE_READ) { + seq_release(inode, file); + mutex_unlock(&scan_mutex); + } + mutex_unlock(&kmemleak_mutex); + + return ret; +} + +/* + * File write operation to configure kmemleak at run-time. The following + * commands can be written to the /sys/kernel/debug/kmemleak file: + * off - disable kmemleak (irreversible) + * stack=on - enable the task stacks scanning + * stack=off - disable the tasks stacks scanning + * scan=on - start the automatic memory scanning thread + * scan=off - stop the automatic memory scanning thread + * scan=... - set the automatic memory scanning period in seconds (0 to + * disable it) + */ +static ssize_t kmemleak_write(struct file *file, const char __user *user_buf, + size_t size, loff_t *ppos) +{ + char buf[64]; + int buf_size; + + if (!atomic_read(&kmemleak_enabled)) + return -EBUSY; + + buf_size = min(size, (sizeof(buf) - 1)); + if (strncpy_from_user(buf, user_buf, buf_size) < 0) + return -EFAULT; + buf[buf_size] = 0; + + if (strncmp(buf, "off", 3) == 0) + kmemleak_disable(); + else if (strncmp(buf, "stack=on", 8) == 0) + kmemleak_stack_scan = 1; + else if (strncmp(buf, "stack=off", 9) == 0) + kmemleak_stack_scan = 0; + else if (strncmp(buf, "scan=on", 7) == 0) + start_scan_thread(); + else if (strncmp(buf, "scan=off", 8) == 0) + stop_scan_thread(); + else if (strncmp(buf, "scan=", 5) == 0) { + unsigned long secs; + int err; + + err = strict_strtoul(buf + 5, 0, &secs); + if (err < 0) + return err; + stop_scan_thread(); + if (secs) { + jiffies_scan_wait = msecs_to_jiffies(secs * 1000); + start_scan_thread(); + } + } else + return -EINVAL; + + /* ignore the rest of the buffer, only one command at a time */ + *ppos += size; + return size; +} + +static const struct file_operations kmemleak_fops = { + .owner = THIS_MODULE, + .open = kmemleak_open, + .read = seq_read, + .write = kmemleak_write, + .llseek = seq_lseek, + .release = kmemleak_release, +}; + +/* + * Perform the freeing of the kmemleak internal objects after waiting for any + * current memory scan to complete. + */ +static int kmemleak_cleanup_thread(void *arg) +{ + struct kmemleak_object *object; + + mutex_lock(&kmemleak_mutex); + stop_scan_thread(); + mutex_unlock(&kmemleak_mutex); + + mutex_lock(&scan_mutex); + rcu_read_lock(); + list_for_each_entry_rcu(object, &object_list, object_list) + delete_object(object->pointer); + rcu_read_unlock(); + mutex_unlock(&scan_mutex); + + return 0; +} + +/* + * Start the clean-up thread. + */ +static void kmemleak_cleanup(void) +{ + struct task_struct *cleanup_thread; + + cleanup_thread = kthread_run(kmemleak_cleanup_thread, NULL, + "kmemleak-clean"); + if (IS_ERR(cleanup_thread)) + pr_warning("kmemleak: Failed to create the clean-up thread\n"); +} + +/* + * Disable kmemleak. No memory allocation/freeing will be traced once this + * function is called. Disabling kmemleak is an irreversible operation. + */ +static void kmemleak_disable(void) +{ + /* atomically check whether it was already invoked */ + if (atomic_cmpxchg(&kmemleak_error, 0, 1)) + return; + + /* stop any memory operation tracing */ + atomic_set(&kmemleak_early_log, 0); + atomic_set(&kmemleak_enabled, 0); + + /* check whether it is too early for a kernel thread */ + if (atomic_read(&kmemleak_initialized)) + kmemleak_cleanup(); + + pr_info("Kernel memory leak detector disabled\n"); +} + +/* + * Allow boot-time kmemleak disabling (enabled by default). + */ +static int kmemleak_boot_config(char *str) +{ + if (!str) + return -EINVAL; + if (strcmp(str, "off") == 0) + kmemleak_disable(); + else if (strcmp(str, "on") != 0) + return -EINVAL; + return 0; +} +early_param("kmemleak", kmemleak_boot_config); + +/* + * Kkmemleak initialization. + */ +void __init kmemleak_init(void) +{ + int i; + unsigned long flags; + + jiffies_scan_yield = msecs_to_jiffies(MSECS_SCAN_YIELD); + jiffies_min_age = msecs_to_jiffies(MSECS_MIN_AGE); + jiffies_scan_wait = msecs_to_jiffies(SECS_SCAN_WAIT * 1000); + + object_cache = KMEM_CACHE(kmemleak_object, SLAB_NOLEAKTRACE); + scan_area_cache = KMEM_CACHE(kmemleak_scan_area, SLAB_NOLEAKTRACE); + INIT_PRIO_TREE_ROOT(&object_tree_root); + + /* the kernel is still in UP mode, so disabling the IRQs is enough */ + local_irq_save(flags); + if (!atomic_read(&kmemleak_error)) { + atomic_set(&kmemleak_enabled, 1); + atomic_set(&kmemleak_early_log, 0); + } + local_irq_restore(flags); + + /* + * This is the point where tracking allocations is safe. Automatic + * scanning is started during the late initcall. Add the early logged + * callbacks to the kmemleak infrastructure. + */ + for (i = 0; i < crt_early_log; i++) { + struct early_log *log = &early_log[i]; + + switch (log->op_type) { + case KMEMLEAK_ALLOC: + kmemleak_alloc(log->ptr, log->size, log->min_count, + GFP_KERNEL); + break; + case KMEMLEAK_FREE: + kmemleak_free(log->ptr); + break; + case KMEMLEAK_NOT_LEAK: + kmemleak_not_leak(log->ptr); + break; + case KMEMLEAK_IGNORE: + kmemleak_ignore(log->ptr); + break; + case KMEMLEAK_SCAN_AREA: + kmemleak_scan_area(log->ptr, log->offset, log->length, + GFP_KERNEL); + break; + case KMEMLEAK_NO_SCAN: + kmemleak_no_scan(log->ptr); + break; + default: + WARN_ON(1); + } + } +} + +/* + * Late initialization function. + */ +static int __init kmemleak_late_init(void) +{ + struct dentry *dentry; + + atomic_set(&kmemleak_initialized, 1); + + if (atomic_read(&kmemleak_error)) { + /* + * Some error occured and kmemleak was disabled. There is a + * small chance that kmemleak_disable() was called immediately + * after setting kmemleak_initialized and we may end up with + * two clean-up threads but serialized by scan_mutex. + */ + kmemleak_cleanup(); + return -ENOMEM; + } + + dentry = debugfs_create_file("kmemleak", S_IRUGO, NULL, NULL, + &kmemleak_fops); + if (!dentry) + pr_warning("kmemleak: Failed to create the debugfs kmemleak " + "file\n"); + mutex_lock(&kmemleak_mutex); + start_scan_thread(); + mutex_unlock(&kmemleak_mutex); + + pr_info("Kernel memory leak detector initialized\n"); + + return 0; +} +late_initcall(kmemleak_late_init); -- cgit v1.2.3-70-g09d2 From d5cff635290aec9ad7e6ee546aa4fae895361cbb Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 11 Jun 2009 13:22:40 +0100 Subject: kmemleak: Add the slab memory allocation/freeing hooks This patch adds the callbacks to kmemleak_(alloc|free) functions from the slab allocator. The patch also adds the SLAB_NOLEAKTRACE flag to avoid recursive calls to kmemleak when it allocates its own data structures. Signed-off-by: Catalin Marinas Reviewed-by: Pekka Enberg --- include/linux/slab.h | 2 ++ mm/slab.c | 32 ++++++++++++++++++++++++++++++-- 2 files changed, 32 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index 24c5602bee9..48803064ced 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -62,6 +62,8 @@ # define SLAB_DEBUG_OBJECTS 0x00000000UL #endif +#define SLAB_NOLEAKTRACE 0x00800000UL /* Avoid kmemleak tracing */ + /* The following flags affect the page allocator grouping pages by mobility */ #define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ #define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */ diff --git a/mm/slab.c b/mm/slab.c index f85831da908..859067f8e4f 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -107,6 +107,7 @@ #include #include #include +#include #include #include #include @@ -178,13 +179,13 @@ SLAB_STORE_USER | \ SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \ - SLAB_DEBUG_OBJECTS) + SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE) #else # define CREATE_MASK (SLAB_HWCACHE_ALIGN | \ SLAB_CACHE_DMA | \ SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \ - SLAB_DEBUG_OBJECTS) + SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE) #endif /* @@ -964,6 +965,14 @@ static struct array_cache *alloc_arraycache(int node, int entries, struct array_cache *nc = NULL; nc = kmalloc_node(memsize, GFP_KERNEL, node); + /* + * The array_cache structures contain pointers to free object. + * However, when such objects are allocated or transfered to another + * cache the pointers are not cleared and they could be counted as + * valid references during a kmemleak scan. Therefore, kmemleak must + * not scan such objects. + */ + kmemleak_no_scan(nc); if (nc) { nc->avail = 0; nc->limit = entries; @@ -2621,6 +2630,14 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, /* Slab management obj is off-slab. */ slabp = kmem_cache_alloc_node(cachep->slabp_cache, local_flags, nodeid); + /* + * If the first object in the slab is leaked (it's allocated + * but no one has a reference to it), we want to make sure + * kmemleak does not treat the ->s_mem pointer as a reference + * to the object. Otherwise we will not report the leak. + */ + kmemleak_scan_area(slabp, offsetof(struct slab, list), + sizeof(struct list_head), local_flags); if (!slabp) return NULL; } else { @@ -3141,6 +3158,12 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) STATS_INC_ALLOCMISS(cachep); objp = cache_alloc_refill(cachep, flags); } + /* + * To avoid a false negative, if an object that is in one of the + * per-CPU caches is leaked, we need to make sure kmemleak doesn't + * treat the array pointers as a reference to the object. + */ + kmemleak_erase(&ac->entry[ac->avail]); return objp; } @@ -3360,6 +3383,8 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, out: local_irq_restore(save_flags); ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); + kmemleak_alloc_recursive(ptr, obj_size(cachep), 1, cachep->flags, + flags); if (unlikely((flags & __GFP_ZERO) && ptr)) memset(ptr, 0, obj_size(cachep)); @@ -3415,6 +3440,8 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) objp = __do_cache_alloc(cachep, flags); local_irq_restore(save_flags); objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); + kmemleak_alloc_recursive(objp, obj_size(cachep), 1, cachep->flags, + flags); prefetchw(objp); if (unlikely((flags & __GFP_ZERO) && objp)) @@ -3530,6 +3557,7 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp) struct array_cache *ac = cpu_cache_get(cachep); check_irq_off(); + kmemleak_free_recursive(objp, cachep->flags); objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0)); /* -- cgit v1.2.3-70-g09d2 From 2e1483c995bbd0fa6cbd055ad76088a520799ba4 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 11 Jun 2009 13:24:13 +0100 Subject: kmemleak: Remove some of the kmemleak false positives There are allocations for which the main pointer cannot be found but they are not memory leaks. This patch fixes some of them. For more information on false positives, see Documentation/kmemleak.txt. Signed-off-by: Catalin Marinas --- drivers/char/vt.c | 7 +++++++ fs/block_dev.c | 6 ++++++ include/linux/percpu.h | 5 +++++ 3 files changed, 18 insertions(+) (limited to 'include') diff --git a/drivers/char/vt.c b/drivers/char/vt.c index 08151d4de48..961c1a788c6 100644 --- a/drivers/char/vt.c +++ b/drivers/char/vt.c @@ -104,6 +104,7 @@ #include #include #include +#include #define MAX_NR_CON_DRIVER 16 @@ -2880,6 +2881,12 @@ static int __init con_init(void) */ for (currcons = 0; currcons < MIN_NR_CONSOLES; currcons++) { vc_cons[currcons].d = vc = alloc_bootmem(sizeof(struct vc_data)); + /* + * Kmemleak does not track the memory allocated via + * alloc_bootmem() but this block contains pointers to + * other blocks allocated via kmalloc. + */ + kmemleak_alloc(vc, sizeof(struct vc_data), 1, GFP_ATOMIC); INIT_WORK(&vc_cons[currcons].SAK_work, vc_SAK); visual_init(vc, currcons, 1); vc->vc_screenbuf = (unsigned short *)alloc_bootmem(vc->vc_screenbuf_size); diff --git a/fs/block_dev.c b/fs/block_dev.c index f45dbc18dd1..d250f807fd8 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include "internal.h" @@ -492,6 +493,11 @@ void __init bdev_cache_init(void) bd_mnt = kern_mount(&bd_type); if (IS_ERR(bd_mnt)) panic("Cannot create bdev pseudo-fs"); + /* + * This vfsmount structure is only used to obtain the + * blockdev_superblock, so tell kmemleak not to report it. + */ + kmemleak_not_leak(bd_mnt); blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */ } diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 1581ff235c7..26fd9d12f05 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -86,7 +86,12 @@ struct percpu_data { void *ptrs[1]; }; +/* pointer disguising messes up the kmemleak objects tracking */ +#ifndef CONFIG_DEBUG_KMEMLEAK #define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata) +#else +#define __percpu_disguise(pdata) (struct percpu_data *)(pdata) +#endif #define per_cpu_ptr(ptr, cpu) \ ({ \ -- cgit v1.2.3-70-g09d2 From 38c7fed2f5ffee17e1fa3e0f78b0e1bf43d52d13 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 25 May 2009 15:10:58 +0300 Subject: x86: remove some alloc_bootmem_cpumask_var calling Now that we set up the slab allocator earlier, we can get rid of some alloc_bootmem_cpumask_var() calls in boot code. Cc: Ingo Molnar Cc: Johannes Weiner Cc: Linus Torvalds Signed-off-by: Yinghai Lu Signed-off-by: Pekka Enberg --- arch/x86/kernel/apic/io_apic.c | 4 ++-- include/linux/irq.h | 18 +++++++----------- kernel/cpuset.c | 2 +- kernel/profile.c | 6 ------ lib/cpumask.c | 11 ++--------- 5 files changed, 12 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 1946fac42ab..139201a562b 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -185,8 +185,8 @@ int __init arch_early_irq_init(void) for (i = 0; i < count; i++) { desc = irq_to_desc(i); desc->chip_data = &cfg[i]; - alloc_bootmem_cpumask_var(&cfg[i].domain); - alloc_bootmem_cpumask_var(&cfg[i].old_domain); + alloc_cpumask_var(&cfg[i].domain, GFP_NOWAIT); + alloc_cpumask_var(&cfg[i].old_domain, GFP_NOWAIT); if (i < NR_IRQS_LEGACY) cpumask_setall(cfg[i].domain); } diff --git a/include/linux/irq.h b/include/linux/irq.h index eedbb8e5e0c..1e50c34f006 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -430,23 +430,19 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); * Returns true if successful (or not required). */ static inline bool alloc_desc_masks(struct irq_desc *desc, int node, - bool boot) + bool boot) { -#ifdef CONFIG_CPUMASK_OFFSTACK - if (boot) { - alloc_bootmem_cpumask_var(&desc->affinity); + gfp_t gfp = GFP_ATOMIC; -#ifdef CONFIG_GENERIC_PENDING_IRQ - alloc_bootmem_cpumask_var(&desc->pending_mask); -#endif - return true; - } + if (boot) + gfp = GFP_NOWAIT; - if (!alloc_cpumask_var_node(&desc->affinity, GFP_ATOMIC, node)) +#ifdef CONFIG_CPUMASK_OFFSTACK + if (!alloc_cpumask_var_node(&desc->affinity, gfp, node)) return false; #ifdef CONFIG_GENERIC_PENDING_IRQ - if (!alloc_cpumask_var_node(&desc->pending_mask, GFP_ATOMIC, node)) { + if (!alloc_cpumask_var_node(&desc->pending_mask, gfp, node)) { free_cpumask_var(desc->affinity); return false; } diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 026faccca86..d5a7e17474e 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1857,7 +1857,7 @@ struct cgroup_subsys cpuset_subsys = { int __init cpuset_init_early(void) { - alloc_bootmem_cpumask_var(&top_cpuset.cpus_allowed); + alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_NOWAIT); top_cpuset.mems_generation = cpuset_mems_generation++; return 0; diff --git a/kernel/profile.c b/kernel/profile.c index 7724e0409ba..28cf26ad2d2 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -111,12 +111,6 @@ int __ref profile_init(void) /* only text is profiled */ prof_len = (_etext - _stext) >> prof_shift; buffer_bytes = prof_len*sizeof(atomic_t); - if (!slab_is_available()) { - prof_buffer = alloc_bootmem(buffer_bytes); - alloc_bootmem_cpumask_var(&prof_cpu_mask); - cpumask_copy(prof_cpu_mask, cpu_possible_mask); - return 0; - } if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL)) return -ENOMEM; diff --git a/lib/cpumask.c b/lib/cpumask.c index eb23aaa0c7b..7bb4142a502 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -92,15 +92,8 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) */ bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) { - if (likely(slab_is_available())) - *mask = kmalloc_node(cpumask_size(), flags, node); - else { -#ifdef CONFIG_DEBUG_PER_CPU_MAPS - printk(KERN_ERR - "=> alloc_cpumask_var: kmalloc not available!\n"); -#endif - *mask = NULL; - } + *mask = kmalloc_node(cpumask_size(), flags, node); + #ifdef CONFIG_DEBUG_PER_CPU_MAPS if (!*mask) { printk(KERN_ERR "=> alloc_cpumask_var: failed!\n"); -- cgit v1.2.3-70-g09d2 From 670025478c2a687453cd1bac697d7d765843f59d Mon Sep 17 00:00:00 2001 From: Karsten Keil Date: Thu, 11 Jun 2009 15:52:10 +0200 Subject: mISDN: cleanup mISDNhw.h Remove unused stuff. Signed-off-by: Karsten Keil --- include/linux/mISDNhw.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/mISDNhw.h b/include/linux/mISDNhw.h index ce900f4c245..7f9831da847 100644 --- a/include/linux/mISDNhw.h +++ b/include/linux/mISDNhw.h @@ -89,11 +89,6 @@ struct dchannel { void (*phfunc) (struct dchannel *); u_int state; void *l1; - /* HW access */ - u_char (*read_reg) (void *, u_char); - void (*write_reg) (void *, u_char, u_char); - void (*read_fifo) (void *, u_char *, int); - void (*write_fifo) (void *, u_char *, int); void *hw; int slot; /* multiport card channel slot */ struct timer_list timer; @@ -151,11 +146,6 @@ struct bchannel { u_long Flags; struct work_struct workq; u_int state; - /* HW access */ - u_char (*read_reg) (void *, u_char); - void (*write_reg) (void *, u_char, u_char); - void (*read_fifo) (void *, u_char *, int); - void (*write_fifo) (void *, u_char *, int); void *hw; int slot; /* multiport card channel slot */ struct timer_list timer; -- cgit v1.2.3-70-g09d2 From 90586523eb4b349806887c62ee70685a49415124 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 21 May 2009 17:01:20 -0400 Subject: fsnotify: unified filesystem notification backend fsnotify is a backend for filesystem notification. fsnotify does not provide any userspace interface but does provide the basis needed for other notification schemes such as dnotify. fsnotify can be extended to be the backend for inotify or the upcoming fanotify. fsnotify provides a mechanism for "groups" to register for some set of filesystem events and to then deliver those events to those groups for processing. fsnotify has a number of benefits, the first being actually shrinking the size of an inode. Before fsnotify to support both dnotify and inotify an inode had unsigned long i_dnotify_mask; /* Directory notify events */ struct dnotify_struct *i_dnotify; /* for directory notifications */ struct list_head inotify_watches; /* watches on this inode */ struct mutex inotify_mutex; /* protects the watches list But with fsnotify this same functionallity (and more) is done with just __u32 i_fsnotify_mask; /* all events for this inode */ struct hlist_head i_fsnotify_mark_entries; /* marks on this inode */ That's right, inotify, dnotify, and fanotify all in 64 bits. We used that much space just in inotify_watches alone, before this patch set. fsnotify object lifetime and locking is MUCH better than what we have today. inotify locking is incredibly complex. See 8f7b0ba1c8539 as an example of what's been busted since inception. inotify needs to know internal semantics of superblock destruction and unmounting to function. The inode pinning and vfs contortions are horrible. no fsnotify implementers do allocation under locks. This means things like f04b30de3 which (due to an overabundance of caution) changes GFP_KERNEL to GFP_NOFS can be reverted. There are no longer any allocation rules when using or implementing your own fsnotify listener. fsnotify paves the way for fanotify. In brief fanotify is a notification mechanism that delivers the lisener both an 'event' and an open file descriptor to the object in question. This means that fanotify is pathname agnostic. Some on lkml may not care for the original companies or users that pushed for TALPA, but fanotify was designed with flexibility and input for other users in mind. The readahead group expressed interest in fanotify as it could be used to profile disk access on boot without breaking the audit system. The desktop search groups have also expressed interest in fanotify as it solves a number of the race conditions and problems present with managing inotify when more than a limited number of specific files are of interest. fanotify can provide for a userspace access control system which makes it a clean interface for AV vendors to hook without trying to do binary patching on the syscall table, LSM, and everywhere else they do their things today. With this patch series fanotify can be implemented in less than 1200 lines of easy to review code. Almost all of which is the socket based user interface. This patch series builds fsnotify to the point that it can implement dnotify and inotify_user. Patches exist and will be sent soon after acceptance to finish the in kernel inotify conversion (audit) and implement fanotify. Signed-off-by: Eric Paris Acked-by: Al Viro Cc: Christoph Hellwig --- fs/notify/Kconfig | 13 +++ fs/notify/Makefile | 2 + fs/notify/fsnotify.c | 79 ++++++++++++++++ fs/notify/fsnotify.h | 15 +++ fs/notify/group.c | 198 +++++++++++++++++++++++++++++++++++++++ fs/notify/inotify/inotify.c | 20 ++++ fs/notify/notification.c | 121 ++++++++++++++++++++++++ include/linux/fsnotify.h | 115 ++++++++++++++++------- include/linux/fsnotify_backend.h | 177 ++++++++++++++++++++++++++++++++++ 9 files changed, 705 insertions(+), 35 deletions(-) create mode 100644 fs/notify/fsnotify.c create mode 100644 fs/notify/fsnotify.h create mode 100644 fs/notify/group.c create mode 100644 fs/notify/notification.c create mode 100644 include/linux/fsnotify_backend.h (limited to 'include') diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig index 50914d7303c..31dac7e3b0f 100644 --- a/fs/notify/Kconfig +++ b/fs/notify/Kconfig @@ -1,2 +1,15 @@ +config FSNOTIFY + bool "Filesystem notification backend" + default y + ---help--- + fsnotify is a backend for filesystem notification. fsnotify does + not provide any userspace interface but does provide the basis + needed for other notification schemes such as dnotify, inotify, + and fanotify. + + Say Y here to enable fsnotify suport. + + If unsure, say Y. + source "fs/notify/dnotify/Kconfig" source "fs/notify/inotify/Kconfig" diff --git a/fs/notify/Makefile b/fs/notify/Makefile index 5a95b6010ce..db5467b5b58 100644 --- a/fs/notify/Makefile +++ b/fs/notify/Makefile @@ -1,2 +1,4 @@ +obj-$(CONFIG_FSNOTIFY) += fsnotify.o notification.o group.o + obj-y += dnotify/ obj-y += inotify/ diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c new file mode 100644 index 00000000000..56bee0f10c3 --- /dev/null +++ b/fs/notify/fsnotify.c @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2008 Red Hat, Inc., Eric Paris + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include + +#include +#include "fsnotify.h" + +/* + * This is the main call to fsnotify. The VFS calls into hook specific functions + * in linux/fsnotify.h. Those functions then in turn call here. Here will call + * out to all of the registered fsnotify_group. Those groups can then use the + * notification event in whatever means they feel necessary. + */ +void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is) +{ + struct fsnotify_group *group; + struct fsnotify_event *event = NULL; + int idx; + + if (list_empty(&fsnotify_groups)) + return; + + if (!(mask & fsnotify_mask)) + return; + + /* + * SRCU!! the groups list is very very much read only and the path is + * very hot. The VAST majority of events are not going to need to do + * anything other than walk the list so it's crazy to pre-allocate. + */ + idx = srcu_read_lock(&fsnotify_grp_srcu); + list_for_each_entry_rcu(group, &fsnotify_groups, group_list) { + if (mask & group->mask) { + if (!event) { + event = fsnotify_create_event(to_tell, mask, data, data_is); + /* shit, we OOM'd and now we can't tell, maybe + * someday someone else will want to do something + * here */ + if (!event) + break; + } + group->ops->handle_event(group, event); + } + } + srcu_read_unlock(&fsnotify_grp_srcu, idx); + /* + * fsnotify_create_event() took a reference so the event can't be cleaned + * up while we are still trying to add it to lists, drop that one. + */ + if (event) + fsnotify_put_event(event); +} +EXPORT_SYMBOL_GPL(fsnotify); + +static __init int fsnotify_init(void) +{ + return init_srcu_struct(&fsnotify_grp_srcu); +} +subsys_initcall(fsnotify_init); diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h new file mode 100644 index 00000000000..c6a8bd47657 --- /dev/null +++ b/fs/notify/fsnotify.h @@ -0,0 +1,15 @@ +#ifndef __FS_NOTIFY_FSNOTIFY_H_ +#define __FS_NOTIFY_FSNOTIFY_H_ + +#include +#include +#include +#include + +/* protects reads of fsnotify_groups */ +extern struct srcu_struct fsnotify_grp_srcu; +/* all groups which receive fsnotify events */ +extern struct list_head fsnotify_groups; +/* all bitwise OR of all event types (FS_*) for all fsnotify_groups */ +extern __u32 fsnotify_mask; +#endif /* __FS_NOTIFY_FSNOTIFY_H_ */ diff --git a/fs/notify/group.c b/fs/notify/group.c new file mode 100644 index 00000000000..c6812953b96 --- /dev/null +++ b/fs/notify/group.c @@ -0,0 +1,198 @@ +/* + * Copyright (C) 2008 Red Hat, Inc., Eric Paris + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include "fsnotify.h" + +#include + +/* protects writes to fsnotify_groups and fsnotify_mask */ +static DEFINE_MUTEX(fsnotify_grp_mutex); +/* protects reads while running the fsnotify_groups list */ +struct srcu_struct fsnotify_grp_srcu; +/* all groups registered to receive filesystem notifications */ +LIST_HEAD(fsnotify_groups); +/* bitwise OR of all events (FS_*) interesting to some group on this system */ +__u32 fsnotify_mask; + +/* + * When a new group registers or changes it's set of interesting events + * this function updates the fsnotify_mask to contain all interesting events + */ +void fsnotify_recalc_global_mask(void) +{ + struct fsnotify_group *group; + __u32 mask = 0; + int idx; + + idx = srcu_read_lock(&fsnotify_grp_srcu); + list_for_each_entry_rcu(group, &fsnotify_groups, group_list) + mask |= group->mask; + srcu_read_unlock(&fsnotify_grp_srcu, idx); + fsnotify_mask = mask; +} + +/* + * Take a reference to a group so things found under the fsnotify_grp_mutex + * can't get freed under us + */ +static void fsnotify_get_group(struct fsnotify_group *group) +{ + atomic_inc(&group->refcnt); +} + +/* + * Final freeing of a group + */ +static void fsnotify_destroy_group(struct fsnotify_group *group) +{ + if (group->ops->free_group_priv) + group->ops->free_group_priv(group); + + kfree(group); +} + +/* + * Remove this group from the global list of groups that will get events + * this can be done even if there are still references and things still using + * this group. This just stops the group from getting new events. + */ +static void __fsnotify_evict_group(struct fsnotify_group *group) +{ + BUG_ON(!mutex_is_locked(&fsnotify_grp_mutex)); + + if (group->on_group_list) + list_del_rcu(&group->group_list); + group->on_group_list = 0; +} + +/* + * Called when a group is no longer interested in getting events. This can be + * used if a group is misbehaving or if for some reason a group should no longer + * get any filesystem events. + */ +void fsnotify_evict_group(struct fsnotify_group *group) +{ + mutex_lock(&fsnotify_grp_mutex); + __fsnotify_evict_group(group); + mutex_unlock(&fsnotify_grp_mutex); +} + +/* + * Drop a reference to a group. Free it if it's through. + */ +void fsnotify_put_group(struct fsnotify_group *group) +{ + if (!atomic_dec_and_mutex_lock(&group->refcnt, &fsnotify_grp_mutex)) + return; + + /* + * OK, now we know that there's no other users *and* we hold mutex, + * so no new references will appear + */ + __fsnotify_evict_group(group); + + /* + * now it's off the list, so the only thing we might care about is + * srcu access.... + */ + mutex_unlock(&fsnotify_grp_mutex); + synchronize_srcu(&fsnotify_grp_srcu); + + /* and now it is really dead. _Nothing_ could be seeing it */ + fsnotify_recalc_global_mask(); + fsnotify_destroy_group(group); +} + +/* + * Simply run the fsnotify_groups list and find a group which matches + * the given parameters. If a group is found we take a reference to that + * group. + */ +static struct fsnotify_group *fsnotify_find_group(unsigned int group_num, __u32 mask, + const struct fsnotify_ops *ops) +{ + struct fsnotify_group *group_iter; + struct fsnotify_group *group = NULL; + + BUG_ON(!mutex_is_locked(&fsnotify_grp_mutex)); + + list_for_each_entry_rcu(group_iter, &fsnotify_groups, group_list) { + if (group_iter->group_num == group_num) { + if ((group_iter->mask == mask) && + (group_iter->ops == ops)) { + fsnotify_get_group(group_iter); + group = group_iter; + } else + group = ERR_PTR(-EEXIST); + } + } + return group; +} + +/* + * Either finds an existing group which matches the group_num, mask, and ops or + * creates a new group and adds it to the global group list. In either case we + * take a reference for the group returned. + */ +struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num, __u32 mask, + const struct fsnotify_ops *ops) +{ + struct fsnotify_group *group, *tgroup; + + /* very low use, simpler locking if we just always alloc */ + group = kmalloc(sizeof(struct fsnotify_group), GFP_KERNEL); + if (!group) + return ERR_PTR(-ENOMEM); + + atomic_set(&group->refcnt, 1); + + group->on_group_list = 0; + group->group_num = group_num; + group->mask = mask; + + group->ops = ops; + + mutex_lock(&fsnotify_grp_mutex); + tgroup = fsnotify_find_group(group_num, mask, ops); + if (tgroup) { + /* group already exists */ + mutex_unlock(&fsnotify_grp_mutex); + /* destroy the new one we made */ + fsnotify_put_group(group); + return tgroup; + } + + /* group not found, add a new one */ + list_add_rcu(&group->group_list, &fsnotify_groups); + group->on_group_list = 1; + + mutex_unlock(&fsnotify_grp_mutex); + + if (mask) + fsnotify_recalc_global_mask(); + + return group; +} diff --git a/fs/notify/inotify/inotify.c b/fs/notify/inotify/inotify.c index 220c13f0d73..40b1cf914cc 100644 --- a/fs/notify/inotify/inotify.c +++ b/fs/notify/inotify/inotify.c @@ -32,6 +32,7 @@ #include #include #include +#include static atomic_t inotify_cookie; @@ -905,6 +906,25 @@ EXPORT_SYMBOL_GPL(inotify_rm_watch); */ static int __init inotify_setup(void) { + BUILD_BUG_ON(IN_ACCESS != FS_ACCESS); + BUILD_BUG_ON(IN_MODIFY != FS_MODIFY); + BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB); + BUILD_BUG_ON(IN_CLOSE_WRITE != FS_CLOSE_WRITE); + BUILD_BUG_ON(IN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE); + BUILD_BUG_ON(IN_OPEN != FS_OPEN); + BUILD_BUG_ON(IN_MOVED_FROM != FS_MOVED_FROM); + BUILD_BUG_ON(IN_MOVED_TO != FS_MOVED_TO); + BUILD_BUG_ON(IN_CREATE != FS_CREATE); + BUILD_BUG_ON(IN_DELETE != FS_DELETE); + BUILD_BUG_ON(IN_DELETE_SELF != FS_DELETE_SELF); + BUILD_BUG_ON(IN_MOVE_SELF != FS_MOVE_SELF); + BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW); + + BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT); + BUILD_BUG_ON(IN_ISDIR != FS_IN_ISDIR); + BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED); + BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT); + atomic_set(&inotify_cookie, 0); return 0; diff --git a/fs/notify/notification.c b/fs/notify/notification.c new file mode 100644 index 00000000000..b8e9a87f8f5 --- /dev/null +++ b/fs/notify/notification.c @@ -0,0 +1,121 @@ +/* + * Copyright (C) 2008 Red Hat, Inc., Eric Paris + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include "fsnotify.h" + +static struct kmem_cache *fsnotify_event_cachep; + +void fsnotify_get_event(struct fsnotify_event *event) +{ + atomic_inc(&event->refcnt); +} + +void fsnotify_put_event(struct fsnotify_event *event) +{ + if (!event) + return; + + if (atomic_dec_and_test(&event->refcnt)) { + if (event->data_type == FSNOTIFY_EVENT_PATH) + path_put(&event->path); + + kmem_cache_free(fsnotify_event_cachep, event); + } +} + +/* + * Allocate a new event which will be sent to each group's handle_event function + * if the group was interested in this particular event. + */ +struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, + void *data, int data_type) +{ + struct fsnotify_event *event; + + event = kmem_cache_alloc(fsnotify_event_cachep, GFP_KERNEL); + if (!event) + return NULL; + + atomic_set(&event->refcnt, 1); + + spin_lock_init(&event->lock); + + event->path.dentry = NULL; + event->path.mnt = NULL; + event->inode = NULL; + + event->to_tell = to_tell; + + switch (data_type) { + case FSNOTIFY_EVENT_FILE: { + struct file *file = data; + struct path *path = &file->f_path; + event->path.dentry = path->dentry; + event->path.mnt = path->mnt; + path_get(&event->path); + event->data_type = FSNOTIFY_EVENT_PATH; + break; + } + case FSNOTIFY_EVENT_PATH: { + struct path *path = data; + event->path.dentry = path->dentry; + event->path.mnt = path->mnt; + path_get(&event->path); + event->data_type = FSNOTIFY_EVENT_PATH; + break; + } + case FSNOTIFY_EVENT_INODE: + event->inode = data; + event->data_type = FSNOTIFY_EVENT_INODE; + break; + case FSNOTIFY_EVENT_NONE: + event->inode = NULL; + event->path.dentry = NULL; + event->path.mnt = NULL; + break; + default: + BUG(); + } + + event->mask = mask; + + return event; +} + +__init int fsnotify_notification_init(void) +{ + fsnotify_event_cachep = KMEM_CACHE(fsnotify_event, SLAB_PANIC); + + return 0; +} +subsys_initcall(fsnotify_notification_init); + diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 00fbd5b245c..6c9ebefdac8 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -13,6 +13,7 @@ #include #include +#include #include /* @@ -34,6 +35,16 @@ static inline void fsnotify_d_move(struct dentry *entry) inotify_d_move(entry); } +/* + * fsnotify_link_count - inode's link count changed + */ +static inline void fsnotify_link_count(struct inode *inode) +{ + inotify_inode_queue_event(inode, IN_ATTRIB, 0, NULL, NULL); + + fsnotify(inode, FS_ATTRIB, inode, FSNOTIFY_EVENT_INODE); +} + /* * fsnotify_move - file old_name at old_dir was moved to new_name at new_dir */ @@ -43,28 +54,47 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, { struct inode *source = moved->d_inode; u32 cookie = inotify_get_cookie(); + __u32 old_dir_mask = 0; + __u32 new_dir_mask = 0; - if (old_dir == new_dir) + if (old_dir == new_dir) { inode_dir_notify(old_dir, DN_RENAME); - else { + old_dir_mask = FS_DN_RENAME; + } else { inode_dir_notify(old_dir, DN_DELETE); + old_dir_mask = FS_DELETE; inode_dir_notify(new_dir, DN_CREATE); + new_dir_mask = FS_CREATE; } - if (isdir) + if (isdir) { isdir = IN_ISDIR; + old_dir_mask |= FS_IN_ISDIR; + new_dir_mask |= FS_IN_ISDIR; + } + + old_dir_mask |= FS_MOVED_FROM; + new_dir_mask |= FS_MOVED_TO; + inotify_inode_queue_event(old_dir, IN_MOVED_FROM|isdir,cookie,old_name, source); inotify_inode_queue_event(new_dir, IN_MOVED_TO|isdir, cookie, new_name, source); + fsnotify(old_dir, old_dir_mask, old_dir, FSNOTIFY_EVENT_INODE); + fsnotify(new_dir, new_dir_mask, new_dir, FSNOTIFY_EVENT_INODE); + if (target) { inotify_inode_queue_event(target, IN_DELETE_SELF, 0, NULL, NULL); inotify_inode_is_dead(target); + + /* this is really a link_count change not a removal */ + fsnotify_link_count(target); } if (source) { inotify_inode_queue_event(source, IN_MOVE_SELF, 0, NULL, NULL); + fsnotify(source, FS_MOVE_SELF, moved->d_inode, FSNOTIFY_EVENT_INODE); } audit_inode_child(new_name, moved, new_dir); } @@ -74,10 +104,12 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, */ static inline void fsnotify_nameremove(struct dentry *dentry, int isdir) { + __u32 mask = FS_DELETE; + if (isdir) - isdir = IN_ISDIR; + mask |= FS_IN_ISDIR; dnotify_parent(dentry, DN_DELETE); - inotify_dentry_parent_queue_event(dentry, IN_DELETE|isdir, 0, dentry->d_name.name); + inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); } /* @@ -87,14 +119,8 @@ static inline void fsnotify_inoderemove(struct inode *inode) { inotify_inode_queue_event(inode, IN_DELETE_SELF, 0, NULL, NULL); inotify_inode_is_dead(inode); -} -/* - * fsnotify_link_count - inode's link count changed - */ -static inline void fsnotify_link_count(struct inode *inode) -{ - inotify_inode_queue_event(inode, IN_ATTRIB, 0, NULL, NULL); + fsnotify(inode, FS_DELETE_SELF, inode, FSNOTIFY_EVENT_INODE); } /* @@ -106,6 +132,8 @@ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry) inotify_inode_queue_event(inode, IN_CREATE, 0, dentry->d_name.name, dentry->d_inode); audit_inode_child(dentry->d_name.name, dentry, inode); + + fsnotify(inode, FS_CREATE, dentry->d_inode, FSNOTIFY_EVENT_INODE); } /* @@ -120,6 +148,8 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct inode); fsnotify_link_count(inode); audit_inode_child(new_dentry->d_name.name, new_dentry, dir); + + fsnotify(dir, FS_CREATE, inode, FSNOTIFY_EVENT_INODE); } /* @@ -127,10 +157,14 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct */ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry) { + __u32 mask = (FS_CREATE | FS_IN_ISDIR); + struct inode *d_inode = dentry->d_inode; + inode_dir_notify(inode, DN_CREATE); - inotify_inode_queue_event(inode, IN_CREATE | IN_ISDIR, 0, - dentry->d_name.name, dentry->d_inode); + inotify_inode_queue_event(inode, mask, 0, dentry->d_name.name, d_inode); audit_inode_child(dentry->d_name.name, dentry, inode); + + fsnotify(inode, mask, d_inode, FSNOTIFY_EVENT_INODE); } /* @@ -139,14 +173,16 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry) static inline void fsnotify_access(struct dentry *dentry) { struct inode *inode = dentry->d_inode; - u32 mask = IN_ACCESS; + __u32 mask = FS_ACCESS; if (S_ISDIR(inode->i_mode)) - mask |= IN_ISDIR; + mask |= FS_IN_ISDIR; dnotify_parent(dentry, DN_ACCESS); inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); inotify_inode_queue_event(inode, mask, 0, NULL, NULL); + + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE); } /* @@ -155,14 +191,16 @@ static inline void fsnotify_access(struct dentry *dentry) static inline void fsnotify_modify(struct dentry *dentry) { struct inode *inode = dentry->d_inode; - u32 mask = IN_MODIFY; + __u32 mask = FS_MODIFY; if (S_ISDIR(inode->i_mode)) - mask |= IN_ISDIR; + mask |= FS_IN_ISDIR; dnotify_parent(dentry, DN_MODIFY); inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); inotify_inode_queue_event(inode, mask, 0, NULL, NULL); + + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE); } /* @@ -171,13 +209,15 @@ static inline void fsnotify_modify(struct dentry *dentry) static inline void fsnotify_open(struct dentry *dentry) { struct inode *inode = dentry->d_inode; - u32 mask = IN_OPEN; + __u32 mask = FS_OPEN; if (S_ISDIR(inode->i_mode)) - mask |= IN_ISDIR; + mask |= FS_IN_ISDIR; inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); inotify_inode_queue_event(inode, mask, 0, NULL, NULL); + + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE); } /* @@ -189,13 +229,15 @@ static inline void fsnotify_close(struct file *file) struct inode *inode = dentry->d_inode; const char *name = dentry->d_name.name; fmode_t mode = file->f_mode; - u32 mask = (mode & FMODE_WRITE) ? IN_CLOSE_WRITE : IN_CLOSE_NOWRITE; + __u32 mask = (mode & FMODE_WRITE) ? FS_CLOSE_WRITE : FS_CLOSE_NOWRITE; if (S_ISDIR(inode->i_mode)) - mask |= IN_ISDIR; + mask |= FS_IN_ISDIR; inotify_dentry_parent_queue_event(dentry, mask, 0, name); inotify_inode_queue_event(inode, mask, 0, NULL, NULL); + + fsnotify(inode, mask, file, FSNOTIFY_EVENT_FILE); } /* @@ -204,13 +246,15 @@ static inline void fsnotify_close(struct file *file) static inline void fsnotify_xattr(struct dentry *dentry) { struct inode *inode = dentry->d_inode; - u32 mask = IN_ATTRIB; + __u32 mask = FS_ATTRIB; if (S_ISDIR(inode->i_mode)) - mask |= IN_ISDIR; + mask |= FS_IN_ISDIR; inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); inotify_inode_queue_event(inode, mask, 0, NULL, NULL); + + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE); } /* @@ -221,34 +265,34 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) { struct inode *inode = dentry->d_inode; int dn_mask = 0; - u32 in_mask = 0; + __u32 in_mask = 0; if (ia_valid & ATTR_UID) { - in_mask |= IN_ATTRIB; + in_mask |= FS_ATTRIB; dn_mask |= DN_ATTRIB; } if (ia_valid & ATTR_GID) { - in_mask |= IN_ATTRIB; + in_mask |= FS_ATTRIB; dn_mask |= DN_ATTRIB; } if (ia_valid & ATTR_SIZE) { - in_mask |= IN_MODIFY; + in_mask |= FS_MODIFY; dn_mask |= DN_MODIFY; } /* both times implies a utime(s) call */ if ((ia_valid & (ATTR_ATIME | ATTR_MTIME)) == (ATTR_ATIME | ATTR_MTIME)) { - in_mask |= IN_ATTRIB; + in_mask |= FS_ATTRIB; dn_mask |= DN_ATTRIB; } else if (ia_valid & ATTR_ATIME) { - in_mask |= IN_ACCESS; + in_mask |= FS_ACCESS; dn_mask |= DN_ACCESS; } else if (ia_valid & ATTR_MTIME) { - in_mask |= IN_MODIFY; + in_mask |= FS_MODIFY; dn_mask |= DN_MODIFY; } if (ia_valid & ATTR_MODE) { - in_mask |= IN_ATTRIB; + in_mask |= FS_ATTRIB; dn_mask |= DN_ATTRIB; } @@ -256,14 +300,15 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) dnotify_parent(dentry, dn_mask); if (in_mask) { if (S_ISDIR(inode->i_mode)) - in_mask |= IN_ISDIR; + in_mask |= FS_IN_ISDIR; inotify_inode_queue_event(inode, in_mask, 0, NULL, NULL); inotify_dentry_parent_queue_event(dentry, in_mask, 0, dentry->d_name.name); + fsnotify(inode, in_mask, inode, FSNOTIFY_EVENT_INODE); } } -#ifdef CONFIG_INOTIFY /* inotify helpers */ +#if defined(CONFIG_INOTIFY) || defined(CONFIG_FSNOTIFY) /* notify helpers */ /* * fsnotify_oldname_init - save off the old filename before we change it @@ -281,7 +326,7 @@ static inline void fsnotify_oldname_free(const char *old_name) kfree(old_name); } -#else /* CONFIG_INOTIFY */ +#else /* CONFIG_INOTIFY || CONFIG_FSNOTIFY */ static inline const char *fsnotify_oldname_init(const char *name) { diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h new file mode 100644 index 00000000000..1a55718b38a --- /dev/null +++ b/include/linux/fsnotify_backend.h @@ -0,0 +1,177 @@ +/* + * Filesystem access notification for Linux + * + * Copyright (C) 2008 Red Hat, Inc., Eric Paris + */ + +#ifndef __LINUX_FSNOTIFY_BACKEND_H +#define __LINUX_FSNOTIFY_BACKEND_H + +#ifdef __KERNEL__ + +#include /* struct inode */ +#include +#include /* struct path */ +#include +#include + +#include + +/* + * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily + * convert between them. dnotify only needs conversion at watch creation + * so no perf loss there. fanotify isn't defined yet, so it can use the + * wholes if it needs more events. + */ +#define FS_ACCESS 0x00000001 /* File was accessed */ +#define FS_MODIFY 0x00000002 /* File was modified */ +#define FS_ATTRIB 0x00000004 /* Metadata changed */ +#define FS_CLOSE_WRITE 0x00000008 /* Writtable file was closed */ +#define FS_CLOSE_NOWRITE 0x00000010 /* Unwrittable file closed */ +#define FS_OPEN 0x00000020 /* File was opened */ +#define FS_MOVED_FROM 0x00000040 /* File was moved from X */ +#define FS_MOVED_TO 0x00000080 /* File was moved to Y */ +#define FS_CREATE 0x00000100 /* Subfile was created */ +#define FS_DELETE 0x00000200 /* Subfile was deleted */ +#define FS_DELETE_SELF 0x00000400 /* Self was deleted */ +#define FS_MOVE_SELF 0x00000800 /* Self was moved */ + +#define FS_UNMOUNT 0x00002000 /* inode on umount fs */ +#define FS_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ +#define FS_IN_IGNORED 0x00008000 /* last inotify event here */ + +#define FS_IN_ISDIR 0x40000000 /* event occurred against dir */ +#define FS_IN_ONESHOT 0x80000000 /* only send event once */ + +#define FS_DN_RENAME 0x10000000 /* file renamed */ +#define FS_DN_MULTISHOT 0x20000000 /* dnotify multishot */ + +struct fsnotify_group; +struct fsnotify_event; + +/* + * Each group much define these ops. The fsnotify infrastructure will call + * these operations for each relevant group. + * + * handle_event - main call for a group to handle an fs event + * free_group_priv - called when a group refcnt hits 0 to clean up the private union + */ +struct fsnotify_ops { + int (*handle_event)(struct fsnotify_group *group, struct fsnotify_event *event); + void (*free_group_priv)(struct fsnotify_group *group); +}; + +/* + * A group is a "thing" that wants to receive notification about filesystem + * events. The mask holds the subset of event types this group cares about. + * refcnt on a group is up to the implementor and at any moment if it goes 0 + * everything will be cleaned up. + */ +struct fsnotify_group { + /* + * global list of all groups receiving events from fsnotify. + * anchored by fsnotify_groups and protected by either fsnotify_grp_mutex + * or fsnotify_grp_srcu depending on write vs read. + */ + struct list_head group_list; + + /* + * Defines all of the event types in which this group is interested. + * This mask is a bitwise OR of the FS_* events from above. Each time + * this mask changes for a group (if it changes) the correct functions + * must be called to update the global structures which indicate global + * interest in event types. + */ + __u32 mask; + + /* + * How the refcnt is used is up to each group. When the refcnt hits 0 + * fsnotify will clean up all of the resources associated with this group. + * As an example, the dnotify group will always have a refcnt=1 and that + * will never change. Inotify, on the other hand, has a group per + * inotify_init() and the refcnt will hit 0 only when that fd has been + * closed. + */ + atomic_t refcnt; /* things with interest in this group */ + unsigned int group_num; /* simply prevents accidental group collision */ + + const struct fsnotify_ops *ops; /* how this group handles things */ + + /* prevents double list_del of group_list. protected by global fsnotify_gr_mutex */ + bool on_group_list; + + /* groups can define private fields here or use the void *private */ + union { + void *private; + }; +}; + +/* + * all of the information about the original object we want to now send to + * a group. If you want to carry more info from the accessing task to the + * listener this structure is where you need to be adding fields. + */ +struct fsnotify_event { + spinlock_t lock; /* protection for the associated event_holder and private_list */ + /* to_tell may ONLY be dereferenced during handle_event(). */ + struct inode *to_tell; /* either the inode the event happened to or its parent */ + /* + * depending on the event type we should have either a path or inode + * We hold a reference on path, but NOT on inode. Since we have the ref on + * the path, it may be dereferenced at any point during this object's + * lifetime. That reference is dropped when this object's refcnt hits + * 0. If this event contains an inode instead of a path, the inode may + * ONLY be used during handle_event(). + */ + union { + struct path path; + struct inode *inode; + }; +/* when calling fsnotify tell it if the data is a path or inode */ +#define FSNOTIFY_EVENT_NONE 0 +#define FSNOTIFY_EVENT_PATH 1 +#define FSNOTIFY_EVENT_INODE 2 +#define FSNOTIFY_EVENT_FILE 3 + int data_type; /* which of the above union we have */ + atomic_t refcnt; /* how many groups still are using/need to send this event */ + __u32 mask; /* the type of access, bitwise OR for FS_* event types */ +}; + +#ifdef CONFIG_FSNOTIFY + +/* called from the vfs helpers */ + +/* main fsnotify call to send events */ +extern void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is); + + +/* called from fsnotify listeners, such as fanotify or dnotify */ + +/* must call when a group changes its ->mask */ +extern void fsnotify_recalc_global_mask(void); +/* get a reference to an existing or create a new group */ +extern struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num, + __u32 mask, + const struct fsnotify_ops *ops); +/* drop reference on a group from fsnotify_obtain_group */ +extern void fsnotify_put_group(struct fsnotify_group *group); + +/* take a reference to an event */ +extern void fsnotify_get_event(struct fsnotify_event *event); +extern void fsnotify_put_event(struct fsnotify_event *event); +/* find private data previously attached to an event */ +extern struct fsnotify_event_private_data *fsnotify_get_priv_from_event(struct fsnotify_group *group, + struct fsnotify_event *event); + +/* put here because inotify does some weird stuff when destroying watches */ +extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, + void *data, int data_is); +#else + +static inline void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is) +{} +#endif /* CONFIG_FSNOTIFY */ + +#endif /* __KERNEL __ */ + +#endif /* __LINUX_FSNOTIFY_BACKEND_H */ -- cgit v1.2.3-70-g09d2 From 3be25f49b9d6a97eae9bcb96d3292072b7658bd8 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 21 May 2009 17:01:26 -0400 Subject: fsnotify: add marks to inodes so groups can interpret how to handle those inodes This patch creates a way for fsnotify groups to attach marks to inodes. These marks have little meaning to the generic fsnotify infrastructure and thus their meaning should be interpreted by the group that attached them to the inode's list. dnotify and inotify will make use of these markings to indicate which inodes are of interest to their respective groups. But this implementation has the useful property that in the future other listeners could actually use the marks for the exact opposite reason, aka to indicate which inodes it had NO interest in. Signed-off-by: Eric Paris Acked-by: Al Viro Cc: Christoph Hellwig --- fs/inode.c | 9 ++ fs/notify/Makefile | 2 +- fs/notify/fsnotify.c | 13 ++ fs/notify/fsnotify.h | 5 + fs/notify/group.c | 49 +++++- fs/notify/inode_mark.c | 329 +++++++++++++++++++++++++++++++++++++++ include/linux/fs.h | 5 + include/linux/fsnotify.h | 9 ++ include/linux/fsnotify_backend.h | 65 +++++++- 9 files changed, 483 insertions(+), 3 deletions(-) create mode 100644 fs/notify/inode_mark.c (limited to 'include') diff --git a/fs/inode.c b/fs/inode.c index bca0c618fdb..54c63ce3de2 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -189,6 +190,10 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode) inode->i_private = NULL; inode->i_mapping = mapping; +#ifdef CONFIG_FSNOTIFY + inode->i_fsnotify_mask = 0; +#endif + return inode; out_free_security: @@ -221,6 +226,7 @@ void destroy_inode(struct inode *inode) BUG_ON(inode_has_buffers(inode)); ima_inode_free(inode); security_inode_free(inode); + fsnotify_inode_delete(inode); if (inode->i_sb->s_op->destroy_inode) inode->i_sb->s_op->destroy_inode(inode); else @@ -252,6 +258,9 @@ void inode_init_once(struct inode *inode) INIT_LIST_HEAD(&inode->inotify_watches); mutex_init(&inode->inotify_mutex); #endif +#ifdef CONFIG_FSNOTIFY + INIT_HLIST_HEAD(&inode->i_fsnotify_mark_entries); +#endif } EXPORT_SYMBOL(inode_init_once); diff --git a/fs/notify/Makefile b/fs/notify/Makefile index db5467b5b58..0922cc826c4 100644 --- a/fs/notify/Makefile +++ b/fs/notify/Makefile @@ -1,4 +1,4 @@ -obj-$(CONFIG_FSNOTIFY) += fsnotify.o notification.o group.o +obj-$(CONFIG_FSNOTIFY) += fsnotify.o notification.o group.o inode_mark.o obj-y += dnotify/ obj-y += inotify/ diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 56bee0f10c3..d5654629c65 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -25,6 +25,15 @@ #include #include "fsnotify.h" +/* + * Clear all of the marks on an inode when it is being evicted from core + */ +void __fsnotify_inode_delete(struct inode *inode) +{ + fsnotify_clear_marks_by_inode(inode); +} +EXPORT_SYMBOL_GPL(__fsnotify_inode_delete); + /* * This is the main call to fsnotify. The VFS calls into hook specific functions * in linux/fsnotify.h. Those functions then in turn call here. Here will call @@ -43,6 +52,8 @@ void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is) if (!(mask & fsnotify_mask)) return; + if (!(mask & to_tell->i_fsnotify_mask)) + return; /* * SRCU!! the groups list is very very much read only and the path is * very hot. The VAST majority of events are not going to need to do @@ -51,6 +62,8 @@ void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is) idx = srcu_read_lock(&fsnotify_grp_srcu); list_for_each_entry_rcu(group, &fsnotify_groups, group_list) { if (mask & group->mask) { + if (!group->ops->should_send_event(group, to_tell, mask)) + continue; if (!event) { event = fsnotify_create_event(to_tell, mask, data, data_is); /* shit, we OOM'd and now we can't tell, maybe diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index c6a8bd47657..8ebcbe893c9 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h @@ -12,4 +12,9 @@ extern struct srcu_struct fsnotify_grp_srcu; extern struct list_head fsnotify_groups; /* all bitwise OR of all event types (FS_*) for all fsnotify_groups */ extern __u32 fsnotify_mask; + +/* final kfree of a group */ +extern void fsnotify_final_destroy_group(struct fsnotify_group *group); +/* run the list of all marks associated with inode and flag them to be freed */ +extern void fsnotify_clear_marks_by_inode(struct inode *inode); #endif /* __FS_NOTIFY_FSNOTIFY_H_ */ diff --git a/fs/notify/group.c b/fs/notify/group.c index c6812953b96..a29d2fa6792 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c @@ -54,6 +54,29 @@ void fsnotify_recalc_global_mask(void) fsnotify_mask = mask; } +/* + * Update the group->mask by running all of the marks associated with this + * group and finding the bitwise | of all of the mark->mask. If we change + * the group->mask we need to update the global mask of events interesting + * to the system. + */ +void fsnotify_recalc_group_mask(struct fsnotify_group *group) +{ + __u32 mask = 0; + __u32 old_mask = group->mask; + struct fsnotify_mark_entry *entry; + + spin_lock(&group->mark_lock); + list_for_each_entry(entry, &group->mark_entries, g_list) + mask |= entry->mask; + spin_unlock(&group->mark_lock); + + group->mask = mask; + + if (old_mask != mask) + fsnotify_recalc_global_mask(); +} + /* * Take a reference to a group so things found under the fsnotify_grp_mutex * can't get freed under us @@ -66,7 +89,7 @@ static void fsnotify_get_group(struct fsnotify_group *group) /* * Final freeing of a group */ -static void fsnotify_destroy_group(struct fsnotify_group *group) +void fsnotify_final_destroy_group(struct fsnotify_group *group) { if (group->ops->free_group_priv) group->ops->free_group_priv(group); @@ -74,6 +97,24 @@ static void fsnotify_destroy_group(struct fsnotify_group *group) kfree(group); } +/* + * Trying to get rid of a group. We need to first get rid of any outstanding + * allocations and then free the group. Remember that fsnotify_clear_marks_by_group + * could miss marks that are being freed by inode and those marks could still + * hold a reference to this group (via group->num_marks) If we get into that + * situtation, the fsnotify_final_destroy_group will get called when that final + * mark is freed. + */ +static void fsnotify_destroy_group(struct fsnotify_group *group) +{ + /* clear all inode mark entries for this group */ + fsnotify_clear_marks_by_group(group); + + /* past the point of no return, matches the initial value of 1 */ + if (atomic_dec_and_test(&group->num_marks)) + fsnotify_final_destroy_group(group); +} + /* * Remove this group from the global list of groups that will get events * this can be done even if there are still references and things still using @@ -173,6 +214,10 @@ struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num, __u32 mask, group->group_num = group_num; group->mask = mask; + spin_lock_init(&group->mark_lock); + atomic_set(&group->num_marks, 0); + INIT_LIST_HEAD(&group->mark_entries); + group->ops = ops; mutex_lock(&fsnotify_grp_mutex); @@ -188,6 +233,8 @@ struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num, __u32 mask, /* group not found, add a new one */ list_add_rcu(&group->group_list, &fsnotify_groups); group->on_group_list = 1; + /* being on the fsnotify_groups list holds one num_marks */ + atomic_inc(&group->num_marks); mutex_unlock(&fsnotify_grp_mutex); diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c new file mode 100644 index 00000000000..cdc15414697 --- /dev/null +++ b/fs/notify/inode_mark.c @@ -0,0 +1,329 @@ +/* + * Copyright (C) 2008 Red Hat, Inc., Eric Paris + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* + * fsnotify inode mark locking/lifetime/and refcnting + * + * REFCNT: + * The mark->refcnt tells how many "things" in the kernel currently are + * referencing this object. The object typically will live inside the kernel + * with a refcnt of 2, one for each list it is on (i_list, g_list). Any task + * which can find this object holding the appropriete locks, can take a reference + * and the object itself is guarenteed to survive until the reference is dropped. + * + * LOCKING: + * There are 3 spinlocks involved with fsnotify inode marks and they MUST + * be taken in order as follows: + * + * entry->lock + * group->mark_lock + * inode->i_lock + * + * entry->lock protects 2 things, entry->group and entry->inode. You must hold + * that lock to dereference either of these things (they could be NULL even with + * the lock) + * + * group->mark_lock protects the mark_entries list anchored inside a given group + * and each entry is hooked via the g_list. It also sorta protects the + * free_g_list, which when used is anchored by a private list on the stack of the + * task which held the group->mark_lock. + * + * inode->i_lock protects the i_fsnotify_mark_entries list anchored inside a + * given inode and each entry is hooked via the i_list. (and sorta the + * free_i_list) + * + * + * LIFETIME: + * Inode marks survive between when they are added to an inode and when their + * refcnt==0. + * + * The inode mark can be cleared for a number of different reasons including: + * - The inode is unlinked for the last time. (fsnotify_inode_remove) + * - The inode is being evicted from cache. (fsnotify_inode_delete) + * - The fs the inode is on is unmounted. (fsnotify_inode_delete/fsnotify_unmount_inodes) + * - Something explicitly requests that it be removed. (fsnotify_destroy_mark_by_entry) + * - The fsnotify_group associated with the mark is going away and all such marks + * need to be cleaned up. (fsnotify_clear_marks_by_group) + * + * Worst case we are given an inode and need to clean up all the marks on that + * inode. We take i_lock and walk the i_fsnotify_mark_entries safely. For each + * mark on the list we take a reference (so the mark can't disappear under us). + * We remove that mark form the inode's list of marks and we add this mark to a + * private list anchored on the stack using i_free_list; At this point we no + * longer fear anything finding the mark using the inode's list of marks. + * + * We can safely and locklessly run the private list on the stack of everything + * we just unattached from the original inode. For each mark on the private list + * we grab the mark-> and can thus dereference mark->group and mark->inode. If + * we see the group and inode are not NULL we take those locks. Now holding all + * 3 locks we can completely remove the mark from other tasks finding it in the + * future. Remember, 10 things might already be referencing this mark, but they + * better be holding a ref. We drop our reference we took before we unhooked it + * from the inode. When the ref hits 0 we can free the mark. + * + * Very similarly for freeing by group, except we use free_g_list. + * + * This has the very interesting property of being able to run concurrently with + * any (or all) other directions. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include "fsnotify.h" + +void fsnotify_get_mark(struct fsnotify_mark_entry *entry) +{ + atomic_inc(&entry->refcnt); +} + +void fsnotify_put_mark(struct fsnotify_mark_entry *entry) +{ + if (atomic_dec_and_test(&entry->refcnt)) + entry->free_mark(entry); +} + +/* + * Recalculate the mask of events relevant to a given inode locked. + */ +static void fsnotify_recalc_inode_mask_locked(struct inode *inode) +{ + struct fsnotify_mark_entry *entry; + struct hlist_node *pos; + __u32 new_mask = 0; + + assert_spin_locked(&inode->i_lock); + + hlist_for_each_entry(entry, pos, &inode->i_fsnotify_mark_entries, i_list) + new_mask |= entry->mask; + inode->i_fsnotify_mask = new_mask; +} + +/* + * Recalculate the inode->i_fsnotify_mask, or the mask of all FS_* event types + * any notifier is interested in hearing for this inode. + */ +void fsnotify_recalc_inode_mask(struct inode *inode) +{ + spin_lock(&inode->i_lock); + fsnotify_recalc_inode_mask_locked(inode); + spin_unlock(&inode->i_lock); +} + +/* + * Any time a mark is getting freed we end up here. + * The caller had better be holding a reference to this mark so we don't actually + * do the final put under the entry->lock + */ +void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry) +{ + struct fsnotify_group *group; + struct inode *inode; + + spin_lock(&entry->lock); + + group = entry->group; + inode = entry->inode; + + BUG_ON(group && !inode); + BUG_ON(!group && inode); + + /* if !group something else already marked this to die */ + if (!group) { + spin_unlock(&entry->lock); + return; + } + + /* 1 from caller and 1 for being on i_list/g_list */ + BUG_ON(atomic_read(&entry->refcnt) < 2); + + spin_lock(&group->mark_lock); + spin_lock(&inode->i_lock); + + hlist_del_init(&entry->i_list); + entry->inode = NULL; + + list_del_init(&entry->g_list); + entry->group = NULL; + + fsnotify_put_mark(entry); /* for i_list and g_list */ + + /* + * this mark is now off the inode->i_fsnotify_mark_entries list and we + * hold the inode->i_lock, so this is the perfect time to update the + * inode->i_fsnotify_mask + */ + fsnotify_recalc_inode_mask_locked(inode); + + spin_unlock(&inode->i_lock); + spin_unlock(&group->mark_lock); + spin_unlock(&entry->lock); + + /* + * Some groups like to know that marks are being freed. This is a + * callback to the group function to let it know that this entry + * is being freed. + */ + group->ops->freeing_mark(entry, group); + + /* + * it's possible that this group tried to destroy itself, but this + * this mark was simultaneously being freed by inode. If that's the + * case, we finish freeing the group here. + */ + if (unlikely(atomic_dec_and_test(&group->num_marks))) + fsnotify_final_destroy_group(group); +} + +/* + * Given a group, destroy all of the marks associated with that group. + */ +void fsnotify_clear_marks_by_group(struct fsnotify_group *group) +{ + struct fsnotify_mark_entry *lentry, *entry; + LIST_HEAD(free_list); + + spin_lock(&group->mark_lock); + list_for_each_entry_safe(entry, lentry, &group->mark_entries, g_list) { + list_add(&entry->free_g_list, &free_list); + list_del_init(&entry->g_list); + fsnotify_get_mark(entry); + } + spin_unlock(&group->mark_lock); + + list_for_each_entry_safe(entry, lentry, &free_list, free_g_list) { + fsnotify_destroy_mark_by_entry(entry); + fsnotify_put_mark(entry); + } +} + +/* + * Given an inode, destroy all of the marks associated with that inode. + */ +void fsnotify_clear_marks_by_inode(struct inode *inode) +{ + struct fsnotify_mark_entry *entry, *lentry; + struct hlist_node *pos, *n; + LIST_HEAD(free_list); + + spin_lock(&inode->i_lock); + hlist_for_each_entry_safe(entry, pos, n, &inode->i_fsnotify_mark_entries, i_list) { + list_add(&entry->free_i_list, &free_list); + hlist_del_init(&entry->i_list); + fsnotify_get_mark(entry); + } + spin_unlock(&inode->i_lock); + + list_for_each_entry_safe(entry, lentry, &free_list, free_i_list) { + fsnotify_destroy_mark_by_entry(entry); + fsnotify_put_mark(entry); + } +} + +/* + * given a group and inode, find the mark associated with that combination. + * if found take a reference to that mark and return it, else return NULL + */ +struct fsnotify_mark_entry *fsnotify_find_mark_entry(struct fsnotify_group *group, + struct inode *inode) +{ + struct fsnotify_mark_entry *entry; + struct hlist_node *pos; + + assert_spin_locked(&inode->i_lock); + + hlist_for_each_entry(entry, pos, &inode->i_fsnotify_mark_entries, i_list) { + if (entry->group == group) { + fsnotify_get_mark(entry); + return entry; + } + } + return NULL; +} + +/* + * Nothing fancy, just initialize lists and locks and counters. + */ +void fsnotify_init_mark(struct fsnotify_mark_entry *entry, + void (*free_mark)(struct fsnotify_mark_entry *entry)) + +{ + spin_lock_init(&entry->lock); + atomic_set(&entry->refcnt, 1); + INIT_HLIST_NODE(&entry->i_list); + entry->group = NULL; + entry->mask = 0; + entry->inode = NULL; + entry->free_mark = free_mark; +} + +/* + * Attach an initialized mark entry to a given group and inode. + * These marks may be used for the fsnotify backend to determine which + * event types should be delivered to which group and for which inodes. + */ +int fsnotify_add_mark(struct fsnotify_mark_entry *entry, + struct fsnotify_group *group, struct inode *inode) +{ + struct fsnotify_mark_entry *lentry; + int ret = 0; + + /* + * LOCKING ORDER!!!! + * entry->lock + * group->mark_lock + * inode->i_lock + */ + spin_lock(&entry->lock); + spin_lock(&group->mark_lock); + spin_lock(&inode->i_lock); + + entry->group = group; + entry->inode = inode; + + lentry = fsnotify_find_mark_entry(group, inode); + if (!lentry) { + hlist_add_head(&entry->i_list, &inode->i_fsnotify_mark_entries); + list_add(&entry->g_list, &group->mark_entries); + + fsnotify_get_mark(entry); /* for i_list and g_list */ + + atomic_inc(&group->num_marks); + + fsnotify_recalc_inode_mask_locked(inode); + } + + spin_unlock(&inode->i_lock); + spin_unlock(&group->mark_lock); + spin_unlock(&entry->lock); + + if (lentry) { + ret = -EEXIST; + fsnotify_put_mark(lentry); + } + + return ret; +} diff --git a/include/linux/fs.h b/include/linux/fs.h index 83d6b439724..275b0860044 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -755,6 +755,11 @@ struct inode { __u32 i_generation; +#ifdef CONFIG_FSNOTIFY + __u32 i_fsnotify_mask; /* all events this inode cares about */ + struct hlist_head i_fsnotify_mark_entries; /* fsnotify mark entries */ +#endif + #ifdef CONFIG_DNOTIFY unsigned long i_dnotify_mask; /* Directory notify events */ struct dnotify_struct *i_dnotify; /* for directory notifications */ diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 6c9ebefdac8..3856eb6e597 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -99,6 +99,14 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, audit_inode_child(new_name, moved, new_dir); } +/* + * fsnotify_inode_delete - and inode is being evicted from cache, clean up is needed + */ +static inline void fsnotify_inode_delete(struct inode *inode) +{ + __fsnotify_inode_delete(inode); +} + /* * fsnotify_nameremove - a filename was removed from a directory */ @@ -121,6 +129,7 @@ static inline void fsnotify_inoderemove(struct inode *inode) inotify_inode_is_dead(inode); fsnotify(inode, FS_DELETE_SELF, inode, FSNOTIFY_EVENT_INODE); + __fsnotify_inode_delete(inode); } /* diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 1a55718b38a..cad5c4d75c1 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -48,17 +48,25 @@ struct fsnotify_group; struct fsnotify_event; +struct fsnotify_mark_entry; /* * Each group much define these ops. The fsnotify infrastructure will call * these operations for each relevant group. * + * should_send_event - given a group, inode, and mask this function determines + * if the group is interested in this event. * handle_event - main call for a group to handle an fs event * free_group_priv - called when a group refcnt hits 0 to clean up the private union + * freeing-mark - this means that a mark has been flagged to die when everything + * finishes using it. The function is supplied with what must be a + * valid group and inode to use to clean up. */ struct fsnotify_ops { + bool (*should_send_event)(struct fsnotify_group *group, struct inode *inode, __u32 mask); int (*handle_event)(struct fsnotify_group *group, struct fsnotify_event *event); void (*free_group_priv)(struct fsnotify_group *group); + void (*freeing_mark)(struct fsnotify_mark_entry *entry, struct fsnotify_group *group); }; /* @@ -97,7 +105,14 @@ struct fsnotify_group { const struct fsnotify_ops *ops; /* how this group handles things */ - /* prevents double list_del of group_list. protected by global fsnotify_gr_mutex */ + /* stores all fastapth entries assoc with this group so they can be cleaned on unregister */ + spinlock_t mark_lock; /* protect mark_entries list */ + atomic_t num_marks; /* 1 for each mark entry and 1 for not being + * past the point of no return when freeing + * a group */ + struct list_head mark_entries; /* all inode mark entries for this group */ + + /* prevents double list_del of group_list. protected by global fsnotify_grp_mutex */ bool on_group_list; /* groups can define private fields here or use the void *private */ @@ -137,12 +152,38 @@ struct fsnotify_event { __u32 mask; /* the type of access, bitwise OR for FS_* event types */ }; +/* + * a mark is simply an entry attached to an in core inode which allows an + * fsnotify listener to indicate they are either no longer interested in events + * of a type matching mask or only interested in those events. + * + * these are flushed when an inode is evicted from core and may be flushed + * when the inode is modified (as seen by fsnotify_access). Some fsnotify users + * (such as dnotify) will flush these when the open fd is closed and not at + * inode eviction or modification. + */ +struct fsnotify_mark_entry { + __u32 mask; /* mask this mark entry is for */ + /* we hold ref for each i_list and g_list. also one ref for each 'thing' + * in kernel that found and may be using this mark. */ + atomic_t refcnt; /* active things looking at this mark */ + struct inode *inode; /* inode this entry is associated with */ + struct fsnotify_group *group; /* group this mark entry is for */ + struct hlist_node i_list; /* list of mark_entries by inode->i_fsnotify_mark_entries */ + struct list_head g_list; /* list of mark_entries by group->i_fsnotify_mark_entries */ + spinlock_t lock; /* protect group, inode, and killme */ + struct list_head free_i_list; /* tmp list used when freeing this mark */ + struct list_head free_g_list; /* tmp list used when freeing this mark */ + void (*free_mark)(struct fsnotify_mark_entry *entry); /* called on final put+free */ +}; + #ifdef CONFIG_FSNOTIFY /* called from the vfs helpers */ /* main fsnotify call to send events */ extern void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is); +extern void __fsnotify_inode_delete(struct inode *inode); /* called from fsnotify listeners, such as fanotify or dnotify */ @@ -153,6 +194,8 @@ extern void fsnotify_recalc_global_mask(void); extern struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num, __u32 mask, const struct fsnotify_ops *ops); +/* run all marks associated with this group and update group->mask */ +extern void fsnotify_recalc_group_mask(struct fsnotify_group *group); /* drop reference on a group from fsnotify_obtain_group */ extern void fsnotify_put_group(struct fsnotify_group *group); @@ -163,6 +206,22 @@ extern void fsnotify_put_event(struct fsnotify_event *event); extern struct fsnotify_event_private_data *fsnotify_get_priv_from_event(struct fsnotify_group *group, struct fsnotify_event *event); +/* functions used to manipulate the marks attached to inodes */ + +/* run all marks associated with an inode and update inode->i_fsnotify_mask */ +extern void fsnotify_recalc_inode_mask(struct inode *inode); +extern void fsnotify_init_mark(struct fsnotify_mark_entry *entry, void (*free_mark)(struct fsnotify_mark_entry *entry)); +/* find (and take a reference) to a mark associated with group and inode */ +extern struct fsnotify_mark_entry *fsnotify_find_mark_entry(struct fsnotify_group *group, struct inode *inode); +/* attach the mark to both the group and the inode */ +extern int fsnotify_add_mark(struct fsnotify_mark_entry *entry, struct fsnotify_group *group, struct inode *inode); +/* given a mark, flag it to be freed when all references are dropped */ +extern void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry); +/* run all the marks in a group, and flag them to be freed */ +extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group); +extern void fsnotify_get_mark(struct fsnotify_mark_entry *entry); +extern void fsnotify_put_mark(struct fsnotify_mark_entry *entry); + /* put here because inotify does some weird stuff when destroying watches */ extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, void *data, int data_is); @@ -170,6 +229,10 @@ extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 static inline void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is) {} + +static inline void __fsnotify_inode_delete(struct inode *inode) +{} + #endif /* CONFIG_FSNOTIFY */ #endif /* __KERNEL __ */ -- cgit v1.2.3-70-g09d2 From c28f7e56e9d95fb531dc3be8df2e7f52bee76d21 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 21 May 2009 17:01:29 -0400 Subject: fsnotify: parent event notification inotify and dnotify both use a similar parent notification mechanism. We add a generic parent notification mechanism to fsnotify for both of these to use. This new machanism also adds the dentry flag optimization which exists for inotify to dnotify. Signed-off-by: Eric Paris Acked-by: Al Viro Cc: Christoph Hellwig --- fs/notify/fsnotify.c | 91 ++++++++++++++++++++++++++++++++++++++++ fs/notify/fsnotify.h | 5 +++ fs/notify/inode_mark.c | 17 ++++++++ include/linux/dcache.h | 4 +- include/linux/fsnotify.h | 34 +++++++++++---- include/linux/fsnotify_backend.h | 64 ++++++++++++++++++++++++++++ 6 files changed, 205 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index d5654629c65..7fc760067a6 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -34,6 +34,97 @@ void __fsnotify_inode_delete(struct inode *inode) } EXPORT_SYMBOL_GPL(__fsnotify_inode_delete); +/* + * Given an inode, first check if we care what happens to our children. Inotify + * and dnotify both tell their parents about events. If we care about any event + * on a child we run all of our children and set a dentry flag saying that the + * parent cares. Thus when an event happens on a child it can quickly tell if + * if there is a need to find a parent and send the event to the parent. + */ +void __fsnotify_update_child_dentry_flags(struct inode *inode) +{ + struct dentry *alias; + int watched; + + if (!S_ISDIR(inode->i_mode)) + return; + + /* determine if the children should tell inode about their events */ + watched = fsnotify_inode_watches_children(inode); + + spin_lock(&dcache_lock); + /* run all of the dentries associated with this inode. Since this is a + * directory, there damn well better only be one item on this list */ + list_for_each_entry(alias, &inode->i_dentry, d_alias) { + struct dentry *child; + + /* run all of the children of the original inode and fix their + * d_flags to indicate parental interest (their parent is the + * original inode) */ + list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) { + if (!child->d_inode) + continue; + + spin_lock(&child->d_lock); + if (watched) + child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED; + else + child->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED; + spin_unlock(&child->d_lock); + } + } + spin_unlock(&dcache_lock); +} + +/* Notify this dentry's parent about a child's events. */ +void __fsnotify_parent(struct dentry *dentry, __u32 mask) +{ + struct dentry *parent; + struct inode *p_inode; + bool send = false; + bool should_update_children = false; + + if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED)) + return; + + spin_lock(&dentry->d_lock); + parent = dentry->d_parent; + p_inode = parent->d_inode; + + if (fsnotify_inode_watches_children(p_inode)) { + if (p_inode->i_fsnotify_mask & mask) { + dget(parent); + send = true; + } + } else { + /* + * The parent doesn't care about events on it's children but + * at least one child thought it did. We need to run all the + * children and update their d_flags to let them know p_inode + * doesn't care about them any more. + */ + dget(parent); + should_update_children = true; + } + + spin_unlock(&dentry->d_lock); + + if (send) { + /* we are notifying a parent so come up with the new mask which + * specifies these are events which came from a child. */ + mask |= FS_EVENT_ON_CHILD; + + fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE); + dput(parent); + } + + if (unlikely(should_update_children)) { + __fsnotify_update_child_dentry_flags(p_inode); + dput(parent); + } +} +EXPORT_SYMBOL_GPL(__fsnotify_parent); + /* * This is the main call to fsnotify. The VFS calls into hook specific functions * in linux/fsnotify.h. Those functions then in turn call here. Here will call diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index 8ebcbe893c9..83b8ec0a8ec 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h @@ -17,4 +17,9 @@ extern __u32 fsnotify_mask; extern void fsnotify_final_destroy_group(struct fsnotify_group *group); /* run the list of all marks associated with inode and flag them to be freed */ extern void fsnotify_clear_marks_by_inode(struct inode *inode); +/* + * update the dentry->d_flags of all of inode's children to indicate if inode cares + * about events that happen to its children. + */ +extern void __fsnotify_update_child_dentry_flags(struct inode *inode); #endif /* __FS_NOTIFY_FSNOTIFY_H_ */ diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index cdc15414697..a39534845b2 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c @@ -131,6 +131,8 @@ void fsnotify_recalc_inode_mask(struct inode *inode) spin_lock(&inode->i_lock); fsnotify_recalc_inode_mask_locked(inode); spin_unlock(&inode->i_lock); + + __fsnotify_update_child_dentry_flags(inode); } /* @@ -189,6 +191,19 @@ void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry) */ group->ops->freeing_mark(entry, group); + /* + * __fsnotify_update_child_dentry_flags(inode); + * + * I really want to call that, but we can't, we have no idea if the inode + * still exists the second we drop the entry->lock. + * + * The next time an event arrive to this inode from one of it's children + * __fsnotify_parent will see that the inode doesn't care about it's + * children and will update all of these flags then. So really this + * is just a lazy update (and could be a perf win...) + */ + + /* * it's possible that this group tried to destroy itself, but this * this mark was simultaneously being freed by inode. If that's the @@ -323,6 +338,8 @@ int fsnotify_add_mark(struct fsnotify_mark_entry *entry, if (lentry) { ret = -EEXIST; fsnotify_put_mark(lentry); + } else { + __fsnotify_update_child_dentry_flags(inode); } return ret; diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 15156364d19..97978004338 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -180,10 +180,12 @@ d_iput: no no no yes #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ #define DCACHE_UNHASHED 0x0010 -#define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 /* Parent inode is watched */ +#define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 /* Parent inode is watched by inotify */ #define DCACHE_COOKIE 0x0040 /* For use by dcookie subsystem */ +#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x0080 /* Parent inode is watched by some fsnotify listener */ + extern spinlock_t dcache_lock; extern seqlock_t rename_lock; diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 3856eb6e597..6a662ed0bc8 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -23,15 +23,31 @@ static inline void fsnotify_d_instantiate(struct dentry *entry, struct inode *inode) { + __fsnotify_d_instantiate(entry, inode); + inotify_d_instantiate(entry, inode); } +/* Notify this dentry's parent about a child's events. */ +static inline void fsnotify_parent(struct dentry *dentry, __u32 mask) +{ + __fsnotify_parent(dentry, mask); + + inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); +} + /* * fsnotify_d_move - entry has been moved * Called with dcache_lock and entry->d_lock held. */ static inline void fsnotify_d_move(struct dentry *entry) { + /* + * On move we need to update entry->d_flags to indicate if the new parent + * cares about events from this entry. + */ + __fsnotify_update_dcache_flags(entry); + inotify_d_move(entry); } @@ -117,7 +133,8 @@ static inline void fsnotify_nameremove(struct dentry *dentry, int isdir) if (isdir) mask |= FS_IN_ISDIR; dnotify_parent(dentry, DN_DELETE); - inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); + + fsnotify_parent(dentry, mask); } /* @@ -188,9 +205,9 @@ static inline void fsnotify_access(struct dentry *dentry) mask |= FS_IN_ISDIR; dnotify_parent(dentry, DN_ACCESS); - inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); inotify_inode_queue_event(inode, mask, 0, NULL, NULL); + fsnotify_parent(dentry, mask); fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE); } @@ -206,9 +223,9 @@ static inline void fsnotify_modify(struct dentry *dentry) mask |= FS_IN_ISDIR; dnotify_parent(dentry, DN_MODIFY); - inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); inotify_inode_queue_event(inode, mask, 0, NULL, NULL); + fsnotify_parent(dentry, mask); fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE); } @@ -223,9 +240,9 @@ static inline void fsnotify_open(struct dentry *dentry) if (S_ISDIR(inode->i_mode)) mask |= FS_IN_ISDIR; - inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); inotify_inode_queue_event(inode, mask, 0, NULL, NULL); + fsnotify_parent(dentry, mask); fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE); } @@ -236,16 +253,15 @@ static inline void fsnotify_close(struct file *file) { struct dentry *dentry = file->f_path.dentry; struct inode *inode = dentry->d_inode; - const char *name = dentry->d_name.name; fmode_t mode = file->f_mode; __u32 mask = (mode & FMODE_WRITE) ? FS_CLOSE_WRITE : FS_CLOSE_NOWRITE; if (S_ISDIR(inode->i_mode)) mask |= FS_IN_ISDIR; - inotify_dentry_parent_queue_event(dentry, mask, 0, name); inotify_inode_queue_event(inode, mask, 0, NULL, NULL); + fsnotify_parent(dentry, mask); fsnotify(inode, mask, file, FSNOTIFY_EVENT_FILE); } @@ -260,9 +276,9 @@ static inline void fsnotify_xattr(struct dentry *dentry) if (S_ISDIR(inode->i_mode)) mask |= FS_IN_ISDIR; - inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); inotify_inode_queue_event(inode, mask, 0, NULL, NULL); + fsnotify_parent(dentry, mask); fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE); } @@ -311,8 +327,8 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) if (S_ISDIR(inode->i_mode)) in_mask |= FS_IN_ISDIR; inotify_inode_queue_event(inode, in_mask, 0, NULL, NULL); - inotify_dentry_parent_queue_event(dentry, in_mask, 0, - dentry->d_name.name); + + fsnotify_parent(dentry, in_mask); fsnotify(inode, in_mask, inode, FSNOTIFY_EVENT_INODE); } } diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index cad5c4d75c1..13d2dd57004 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -46,6 +46,17 @@ #define FS_DN_RENAME 0x10000000 /* file renamed */ #define FS_DN_MULTISHOT 0x20000000 /* dnotify multishot */ +/* This inode cares about things that happen to its children. Always set for + * dnotify and inotify. */ +#define FS_EVENT_ON_CHILD 0x08000000 + +/* This is a list of all events that may get sent to a parernt based on fs event + * happening to inodes inside that directory */ +#define FS_EVENTS_POSS_ON_CHILD (FS_ACCESS | FS_MODIFY | FS_ATTRIB |\ + FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN |\ + FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE |\ + FS_DELETE) + struct fsnotify_group; struct fsnotify_event; struct fsnotify_mark_entry; @@ -183,8 +194,52 @@ struct fsnotify_mark_entry { /* main fsnotify call to send events */ extern void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is); +extern void __fsnotify_parent(struct dentry *dentry, __u32 mask); extern void __fsnotify_inode_delete(struct inode *inode); +static inline int fsnotify_inode_watches_children(struct inode *inode) +{ + /* FS_EVENT_ON_CHILD is set if the inode may care */ + if (!(inode->i_fsnotify_mask & FS_EVENT_ON_CHILD)) + return 0; + /* this inode might care about child events, does it care about the + * specific set of events that can happen on a child? */ + return inode->i_fsnotify_mask & FS_EVENTS_POSS_ON_CHILD; +} + +/* + * Update the dentry with a flag indicating the interest of its parent to receive + * filesystem events when those events happens to this dentry->d_inode. + */ +static inline void __fsnotify_update_dcache_flags(struct dentry *dentry) +{ + struct dentry *parent; + + assert_spin_locked(&dcache_lock); + assert_spin_locked(&dentry->d_lock); + + parent = dentry->d_parent; + if (fsnotify_inode_watches_children(parent->d_inode)) + dentry->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED; + else + dentry->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED; +} + +/* + * fsnotify_d_instantiate - instantiate a dentry for inode + * Called with dcache_lock held. + */ +static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode) +{ + if (!inode) + return; + + assert_spin_locked(&dcache_lock); + + spin_lock(&dentry->d_lock); + __fsnotify_update_dcache_flags(dentry); + spin_unlock(&dentry->d_lock); +} /* called from fsnotify listeners, such as fanotify or dnotify */ @@ -230,9 +285,18 @@ extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 static inline void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is) {} +static inline void __fsnotify_parent(struct dentry *dentry, __u32 mask) +{} + static inline void __fsnotify_inode_delete(struct inode *inode) {} +static inline void __fsnotify_update_dcache_flags(struct dentry *dentry) +{} + +static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode) +{} + #endif /* CONFIG_FSNOTIFY */ #endif /* __KERNEL __ */ -- cgit v1.2.3-70-g09d2 From 3c5119c05d624f95f4967d16b38c9624b816bdb9 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 21 May 2009 17:01:33 -0400 Subject: dnotify: reimplement dnotify using fsnotify Reimplement dnotify using fsnotify. Signed-off-by: Eric Paris Acked-by: Al Viro Cc: Christoph Hellwig --- MAINTAINERS | 6 +- fs/notify/dnotify/Kconfig | 1 + fs/notify/dnotify/dnotify.c | 469 ++++++++++++++++++++++++++++++--------- include/linux/dnotify.h | 29 +-- include/linux/fs.h | 5 - include/linux/fsnotify.h | 68 ++---- include/linux/fsnotify_backend.h | 3 + 7 files changed, 398 insertions(+), 183 deletions(-) (limited to 'include') diff --git a/MAINTAINERS b/MAINTAINERS index ccdb57524e3..96e0c8c6079 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1802,10 +1802,10 @@ F: drivers/char/epca* F: drivers/char/digi* DIRECTORY NOTIFICATION (DNOTIFY) -P: Stephen Rothwell -M: sfr@canb.auug.org.au +P: Eric Paris +M: eparis@parisplace.org L: linux-kernel@vger.kernel.org -S: Supported +S: Maintained F: Documentation/filesystems/dnotify.txt F: fs/notify/dnotify/ F: include/linux/dnotify.h diff --git a/fs/notify/dnotify/Kconfig b/fs/notify/dnotify/Kconfig index 26adf5dfa64..904ff8d5405 100644 --- a/fs/notify/dnotify/Kconfig +++ b/fs/notify/dnotify/Kconfig @@ -1,5 +1,6 @@ config DNOTIFY bool "Dnotify support" + depends on FSNOTIFY default y help Dnotify is a directory-based per-fd file change notification system diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index b0aa2cde80b..d9d80f502c6 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -3,6 +3,9 @@ * * Copyright (C) 2000,2001,2002 Stephen Rothwell * + * Copyright (C) 2009 Eric Paris + * dnotify was largly rewritten to use the new fsnotify infrastructure + * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2, or (at your option) any @@ -21,24 +24,178 @@ #include #include #include +#include int dir_notify_enable __read_mostly = 1; -static struct kmem_cache *dn_cache __read_mostly; +static struct kmem_cache *dnotify_struct_cache __read_mostly; +static struct kmem_cache *dnotify_mark_entry_cache __read_mostly; +static struct fsnotify_group *dnotify_group __read_mostly; +static DEFINE_MUTEX(dnotify_mark_mutex); + +/* + * dnotify will attach one of these to each inode (i_fsnotify_mark_entries) which + * is being watched by dnotify. If multiple userspace applications are watching + * the same directory with dnotify their information is chained in dn + */ +struct dnotify_mark_entry { + struct fsnotify_mark_entry fsn_entry; + struct dnotify_struct *dn; +}; -static void redo_inode_mask(struct inode *inode) +/* + * When a process starts or stops watching an inode the set of events which + * dnotify cares about for that inode may change. This function runs the + * list of everything receiving dnotify events about this directory and calculates + * the set of all those events. After it updates what dnotify is interested in + * it calls the fsnotify function so it can update the set of all events relevant + * to this inode. + */ +static void dnotify_recalc_inode_mask(struct fsnotify_mark_entry *entry) { - unsigned long new_mask; + __u32 new_mask, old_mask; struct dnotify_struct *dn; + struct dnotify_mark_entry *dnentry = container_of(entry, + struct dnotify_mark_entry, + fsn_entry); + + assert_spin_locked(&entry->lock); + old_mask = entry->mask; new_mask = 0; - for (dn = inode->i_dnotify; dn != NULL; dn = dn->dn_next) - new_mask |= dn->dn_mask & ~DN_MULTISHOT; - inode->i_dnotify_mask = new_mask; + for (dn = dnentry->dn; dn != NULL; dn = dn->dn_next) + new_mask |= (dn->dn_mask & ~FS_DN_MULTISHOT); + entry->mask = new_mask; + + if (old_mask == new_mask) + return; + + if (entry->inode) + fsnotify_recalc_inode_mask(entry->inode); } +/* + * Mains fsnotify call where events are delivered to dnotify. + * Find the dnotify mark on the relevant inode, run the list of dnotify structs + * on that mark and determine which of them has expressed interest in receiving + * events of this type. When found send the correct process and signal and + * destroy the dnotify struct if it was not registered to receive multiple + * events. + */ +static int dnotify_handle_event(struct fsnotify_group *group, + struct fsnotify_event *event) +{ + struct fsnotify_mark_entry *entry = NULL; + struct dnotify_mark_entry *dnentry; + struct inode *to_tell; + struct dnotify_struct *dn; + struct dnotify_struct **prev; + struct fown_struct *fown; + + to_tell = event->to_tell; + + spin_lock(&to_tell->i_lock); + entry = fsnotify_find_mark_entry(group, to_tell); + spin_unlock(&to_tell->i_lock); + + /* unlikely since we alreay passed dnotify_should_send_event() */ + if (unlikely(!entry)) + return 0; + dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry); + + spin_lock(&entry->lock); + prev = &dnentry->dn; + while ((dn = *prev) != NULL) { + if ((dn->dn_mask & event->mask) == 0) { + prev = &dn->dn_next; + continue; + } + fown = &dn->dn_filp->f_owner; + send_sigio(fown, dn->dn_fd, POLL_MSG); + if (dn->dn_mask & FS_DN_MULTISHOT) + prev = &dn->dn_next; + else { + *prev = dn->dn_next; + kmem_cache_free(dnotify_struct_cache, dn); + dnotify_recalc_inode_mask(entry); + } + } + + spin_unlock(&entry->lock); + fsnotify_put_mark(entry); + + return 0; +} + +/* + * Given an inode and mask determine if dnotify would be interested in sending + * userspace notification for that pair. + */ +static bool dnotify_should_send_event(struct fsnotify_group *group, + struct inode *inode, __u32 mask) +{ + struct fsnotify_mark_entry *entry; + bool send; + + /* !dir_notify_enable should never get here, don't waste time checking + if (!dir_notify_enable) + return 0; */ + + /* not a dir, dnotify doesn't care */ + if (!S_ISDIR(inode->i_mode)) + return false; + + spin_lock(&inode->i_lock); + entry = fsnotify_find_mark_entry(group, inode); + spin_unlock(&inode->i_lock); + + /* no mark means no dnotify watch */ + if (!entry) + return false; + + spin_lock(&entry->lock); + send = (mask & entry->mask) ? true : false; + spin_unlock(&entry->lock); + fsnotify_put_mark(entry); /* matches fsnotify_find_mark_entry */ + + return send; +} + +static void dnotify_freeing_mark(struct fsnotify_mark_entry *entry, + struct fsnotify_group *group) +{ + /* dnotify doesn't care than an inode is on the way out */ +} + +static void dnotify_free_mark(struct fsnotify_mark_entry *entry) +{ + struct dnotify_mark_entry *dnentry = container_of(entry, + struct dnotify_mark_entry, + fsn_entry); + + BUG_ON(dnentry->dn); + + kmem_cache_free(dnotify_mark_entry_cache, dnentry); +} + +static struct fsnotify_ops dnotify_fsnotify_ops = { + .handle_event = dnotify_handle_event, + .should_send_event = dnotify_should_send_event, + .free_group_priv = NULL, + .freeing_mark = dnotify_freeing_mark, +}; + +/* + * Called every time a file is closed. Looks first for a dnotify mark on the + * inode. If one is found run all of the ->dn entries attached to that + * mark for one relevant to this process closing the file and remove that + * dnotify_struct. If that was the last dnotify_struct also remove the + * fsnotify_mark_entry. + */ void dnotify_flush(struct file *filp, fl_owner_t id) { + struct fsnotify_mark_entry *entry; + struct dnotify_mark_entry *dnentry; struct dnotify_struct *dn; struct dnotify_struct **prev; struct inode *inode; @@ -46,145 +203,243 @@ void dnotify_flush(struct file *filp, fl_owner_t id) inode = filp->f_path.dentry->d_inode; if (!S_ISDIR(inode->i_mode)) return; + spin_lock(&inode->i_lock); - prev = &inode->i_dnotify; + entry = fsnotify_find_mark_entry(dnotify_group, inode); + spin_unlock(&inode->i_lock); + if (!entry) + return; + dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry); + + mutex_lock(&dnotify_mark_mutex); + + spin_lock(&entry->lock); + prev = &dnentry->dn; while ((dn = *prev) != NULL) { if ((dn->dn_owner == id) && (dn->dn_filp == filp)) { *prev = dn->dn_next; - redo_inode_mask(inode); - kmem_cache_free(dn_cache, dn); + kmem_cache_free(dnotify_struct_cache, dn); + dnotify_recalc_inode_mask(entry); break; } prev = &dn->dn_next; } - spin_unlock(&inode->i_lock); + + spin_unlock(&entry->lock); + + /* nothing else could have found us thanks to the dnotify_mark_mutex */ + if (dnentry->dn == NULL) + fsnotify_destroy_mark_by_entry(entry); + + fsnotify_recalc_group_mask(dnotify_group); + + mutex_unlock(&dnotify_mark_mutex); + + fsnotify_put_mark(entry); +} + +/* this conversion is done only at watch creation */ +static __u32 convert_arg(unsigned long arg) +{ + __u32 new_mask = FS_EVENT_ON_CHILD; + + if (arg & DN_MULTISHOT) + new_mask |= FS_DN_MULTISHOT; + if (arg & DN_DELETE) + new_mask |= (FS_DELETE | FS_MOVED_FROM); + if (arg & DN_MODIFY) + new_mask |= FS_MODIFY; + if (arg & DN_ACCESS) + new_mask |= FS_ACCESS; + if (arg & DN_ATTRIB) + new_mask |= FS_ATTRIB; + if (arg & DN_RENAME) + new_mask |= FS_DN_RENAME; + if (arg & DN_CREATE) + new_mask |= (FS_CREATE | FS_MOVED_TO); + + return new_mask; } +/* + * If multiple processes watch the same inode with dnotify there is only one + * dnotify mark in inode->i_fsnotify_mark_entries but we chain a dnotify_struct + * onto that mark. This function either attaches the new dnotify_struct onto + * that list, or it |= the mask onto an existing dnofiy_struct. + */ +static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark_entry *dnentry, + fl_owner_t id, int fd, struct file *filp, __u32 mask) +{ + struct dnotify_struct *odn; + + odn = dnentry->dn; + while (odn != NULL) { + /* adding more events to existing dnofiy_struct? */ + if ((odn->dn_owner == id) && (odn->dn_filp == filp)) { + odn->dn_fd = fd; + odn->dn_mask |= mask; + return -EEXIST; + } + odn = odn->dn_next; + } + + dn->dn_mask = mask; + dn->dn_fd = fd; + dn->dn_filp = filp; + dn->dn_owner = id; + dn->dn_next = dnentry->dn; + dnentry->dn = dn; + + return 0; +} + +/* + * When a process calls fcntl to attach a dnotify watch to a directory it ends + * up here. Allocate both a mark for fsnotify to add and a dnotify_struct to be + * attached to the fsnotify_mark. + */ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) { + struct dnotify_mark_entry *new_dnentry, *dnentry; + struct fsnotify_mark_entry *new_entry, *entry; struct dnotify_struct *dn; - struct dnotify_struct *odn; - struct dnotify_struct **prev; struct inode *inode; fl_owner_t id = current->files; struct file *f; - int error = 0; + int destroy = 0, error = 0; + __u32 mask; + + /* we use these to tell if we need to kfree */ + new_entry = NULL; + dn = NULL; + if (!dir_notify_enable) { + error = -EINVAL; + goto out_err; + } + + /* a 0 mask means we are explicitly removing the watch */ if ((arg & ~DN_MULTISHOT) == 0) { dnotify_flush(filp, id); - return 0; + error = 0; + goto out_err; } - if (!dir_notify_enable) - return -EINVAL; + + /* dnotify only works on directories */ inode = filp->f_path.dentry->d_inode; - if (!S_ISDIR(inode->i_mode)) - return -ENOTDIR; - dn = kmem_cache_alloc(dn_cache, GFP_KERNEL); - if (dn == NULL) - return -ENOMEM; - spin_lock(&inode->i_lock); - prev = &inode->i_dnotify; - while ((odn = *prev) != NULL) { - if ((odn->dn_owner == id) && (odn->dn_filp == filp)) { - odn->dn_fd = fd; - odn->dn_mask |= arg; - inode->i_dnotify_mask |= arg & ~DN_MULTISHOT; - goto out_free; - } - prev = &odn->dn_next; + if (!S_ISDIR(inode->i_mode)) { + error = -ENOTDIR; + goto out_err; } - rcu_read_lock(); - f = fcheck(fd); - rcu_read_unlock(); - /* we'd lost the race with close(), sod off silently */ - /* note that inode->i_lock prevents reordering problems - * between accesses to descriptor table and ->i_dnotify */ - if (f != filp) - goto out_free; + /* expect most fcntl to add new rather than augment old */ + dn = kmem_cache_alloc(dnotify_struct_cache, GFP_KERNEL); + if (!dn) { + error = -ENOMEM; + goto out_err; + } - error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); - if (error) - goto out_free; + /* new fsnotify mark, we expect most fcntl calls to add a new mark */ + new_dnentry = kmem_cache_alloc(dnotify_mark_entry_cache, GFP_KERNEL); + if (!new_dnentry) { + error = -ENOMEM; + goto out_err; + } - dn->dn_mask = arg; - dn->dn_fd = fd; - dn->dn_filp = filp; - dn->dn_owner = id; - inode->i_dnotify_mask |= arg & ~DN_MULTISHOT; - dn->dn_next = inode->i_dnotify; - inode->i_dnotify = dn; - spin_unlock(&inode->i_lock); - return 0; + /* convert the userspace DN_* "arg" to the internal FS_* defines in fsnotify */ + mask = convert_arg(arg); -out_free: - spin_unlock(&inode->i_lock); - kmem_cache_free(dn_cache, dn); - return error; -} + /* set up the new_entry and new_dnentry */ + new_entry = &new_dnentry->fsn_entry; + fsnotify_init_mark(new_entry, dnotify_free_mark); + new_entry->mask = mask; + new_dnentry->dn = NULL; -void __inode_dir_notify(struct inode *inode, unsigned long event) -{ - struct dnotify_struct * dn; - struct dnotify_struct **prev; - struct fown_struct * fown; - int changed = 0; + /* this is needed to prevent the fcntl/close race described below */ + mutex_lock(&dnotify_mark_mutex); + /* add the new_entry or find an old one. */ spin_lock(&inode->i_lock); - prev = &inode->i_dnotify; - while ((dn = *prev) != NULL) { - if ((dn->dn_mask & event) == 0) { - prev = &dn->dn_next; - continue; - } - fown = &dn->dn_filp->f_owner; - send_sigio(fown, dn->dn_fd, POLL_MSG); - if (dn->dn_mask & DN_MULTISHOT) - prev = &dn->dn_next; - else { - *prev = dn->dn_next; - changed = 1; - kmem_cache_free(dn_cache, dn); - } - } - if (changed) - redo_inode_mask(inode); + entry = fsnotify_find_mark_entry(dnotify_group, inode); spin_unlock(&inode->i_lock); -} - -EXPORT_SYMBOL(__inode_dir_notify); + if (entry) { + dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry); + spin_lock(&entry->lock); + } else { + fsnotify_add_mark(new_entry, dnotify_group, inode); + spin_lock(&new_entry->lock); + entry = new_entry; + dnentry = new_dnentry; + /* we used new_entry, so don't free it */ + new_entry = NULL; + } -/* - * This is hopelessly wrong, but unfixable without API changes. At - * least it doesn't oops the kernel... - * - * To safely access ->d_parent we need to keep d_move away from it. Use the - * dentry's d_lock for this. - */ -void dnotify_parent(struct dentry *dentry, unsigned long event) -{ - struct dentry *parent; + rcu_read_lock(); + f = fcheck(fd); + rcu_read_unlock(); - if (!dir_notify_enable) - return; + /* if (f != filp) means that we lost a race and another task/thread + * actually closed the fd we are still playing with before we grabbed + * the dnotify_mark_mutex and entry->lock. Since closing the fd is the + * only time we clean up the mark entries we need to get our mark off + * the list. */ + if (f != filp) { + /* if we added ourselves, shoot ourselves, it's possible that + * the flush actually did shoot this entry. That's fine too + * since multiple calls to destroy_mark is perfectly safe, if + * we found a dnentry already attached to the inode, just sod + * off silently as the flush at close time dealt with it. + */ + if (dnentry == new_dnentry) + destroy = 1; + goto out; + } - spin_lock(&dentry->d_lock); - parent = dentry->d_parent; - if (parent->d_inode->i_dnotify_mask & event) { - dget(parent); - spin_unlock(&dentry->d_lock); - __inode_dir_notify(parent->d_inode, event); - dput(parent); - } else { - spin_unlock(&dentry->d_lock); + error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); + if (error) { + /* if we added, we must shoot */ + if (dnentry == new_dnentry) + destroy = 1; + goto out; } + + error = attach_dn(dn, dnentry, id, fd, filp, mask); + /* !error means that we attached the dn to the dnentry, so don't free it */ + if (!error) + dn = NULL; + /* -EEXIST means that we didn't add this new dn and used an old one. + * that isn't an error (and the unused dn should be freed) */ + else if (error == -EEXIST) + error = 0; + + dnotify_recalc_inode_mask(entry); +out: + spin_unlock(&entry->lock); + + if (destroy) + fsnotify_destroy_mark_by_entry(entry); + + fsnotify_recalc_group_mask(dnotify_group); + + mutex_unlock(&dnotify_mark_mutex); + fsnotify_put_mark(entry); +out_err: + if (new_entry) + fsnotify_put_mark(new_entry); + if (dn) + kmem_cache_free(dnotify_struct_cache, dn); + return error; } -EXPORT_SYMBOL_GPL(dnotify_parent); static int __init dnotify_init(void) { - dn_cache = kmem_cache_create("dnotify_cache", - sizeof(struct dnotify_struct), 0, SLAB_PANIC, NULL); + dnotify_struct_cache = KMEM_CACHE(dnotify_struct, SLAB_PANIC); + dnotify_mark_entry_cache = KMEM_CACHE(dnotify_mark_entry, SLAB_PANIC); + + dnotify_group = fsnotify_obtain_group(DNOTIFY_GROUP_NUM, + 0, &dnotify_fsnotify_ops); + if (IS_ERR(dnotify_group)) + panic("unable to allocate fsnotify group for dnotify\n"); return 0; } diff --git a/include/linux/dnotify.h b/include/linux/dnotify.h index 102a902b439..ecc06286226 100644 --- a/include/linux/dnotify.h +++ b/include/linux/dnotify.h @@ -10,7 +10,7 @@ struct dnotify_struct { struct dnotify_struct * dn_next; - unsigned long dn_mask; + __u32 dn_mask; int dn_fd; struct file * dn_filp; fl_owner_t dn_owner; @@ -21,23 +21,18 @@ struct dnotify_struct { #ifdef CONFIG_DNOTIFY -extern void __inode_dir_notify(struct inode *, unsigned long); +#define DNOTIFY_ALL_EVENTS (FS_DELETE | FS_DELETE_CHILD |\ + FS_MODIFY | FS_MODIFY_CHILD |\ + FS_ACCESS | FS_ACCESS_CHILD |\ + FS_ATTRIB | FS_ATTRIB_CHILD |\ + FS_CREATE | FS_DN_RENAME |\ + FS_MOVED_FROM | FS_MOVED_TO) + extern void dnotify_flush(struct file *, fl_owner_t); extern int fcntl_dirnotify(int, struct file *, unsigned long); -extern void dnotify_parent(struct dentry *, unsigned long); - -static inline void inode_dir_notify(struct inode *inode, unsigned long event) -{ - if (inode->i_dnotify_mask & (event)) - __inode_dir_notify(inode, event); -} #else -static inline void __inode_dir_notify(struct inode *inode, unsigned long event) -{ -} - static inline void dnotify_flush(struct file *filp, fl_owner_t id) { } @@ -47,14 +42,6 @@ static inline int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) return -EINVAL; } -static inline void dnotify_parent(struct dentry *dentry, unsigned long event) -{ -} - -static inline void inode_dir_notify(struct inode *inode, unsigned long event) -{ -} - #endif /* CONFIG_DNOTIFY */ #endif /* __KERNEL __ */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 275b0860044..323b5ce474c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -760,11 +760,6 @@ struct inode { struct hlist_head i_fsnotify_mark_entries; /* fsnotify mark entries */ #endif -#ifdef CONFIG_DNOTIFY - unsigned long i_dnotify_mask; /* Directory notify events */ - struct dnotify_struct *i_dnotify; /* for directory notifications */ -#endif - #ifdef CONFIG_INOTIFY struct list_head inotify_watches; /* watches on this inode */ struct mutex inotify_mutex; /* protects the watches list */ diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 6a662ed0bc8..db12d9de352 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -74,13 +74,7 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, __u32 new_dir_mask = 0; if (old_dir == new_dir) { - inode_dir_notify(old_dir, DN_RENAME); old_dir_mask = FS_DN_RENAME; - } else { - inode_dir_notify(old_dir, DN_DELETE); - old_dir_mask = FS_DELETE; - inode_dir_notify(new_dir, DN_CREATE); - new_dir_mask = FS_CREATE; } if (isdir) { @@ -132,7 +126,6 @@ static inline void fsnotify_nameremove(struct dentry *dentry, int isdir) if (isdir) mask |= FS_IN_ISDIR; - dnotify_parent(dentry, DN_DELETE); fsnotify_parent(dentry, mask); } @@ -154,7 +147,6 @@ static inline void fsnotify_inoderemove(struct inode *inode) */ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry) { - inode_dir_notify(inode, DN_CREATE); inotify_inode_queue_event(inode, IN_CREATE, 0, dentry->d_name.name, dentry->d_inode); audit_inode_child(dentry->d_name.name, dentry, inode); @@ -169,7 +161,6 @@ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry) */ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct dentry *new_dentry) { - inode_dir_notify(dir, DN_CREATE); inotify_inode_queue_event(dir, IN_CREATE, 0, new_dentry->d_name.name, inode); fsnotify_link_count(inode); @@ -186,7 +177,6 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry) __u32 mask = (FS_CREATE | FS_IN_ISDIR); struct inode *d_inode = dentry->d_inode; - inode_dir_notify(inode, DN_CREATE); inotify_inode_queue_event(inode, mask, 0, dentry->d_name.name, d_inode); audit_inode_child(dentry->d_name.name, dentry, inode); @@ -204,7 +194,6 @@ static inline void fsnotify_access(struct dentry *dentry) if (S_ISDIR(inode->i_mode)) mask |= FS_IN_ISDIR; - dnotify_parent(dentry, DN_ACCESS); inotify_inode_queue_event(inode, mask, 0, NULL, NULL); fsnotify_parent(dentry, mask); @@ -222,7 +211,6 @@ static inline void fsnotify_modify(struct dentry *dentry) if (S_ISDIR(inode->i_mode)) mask |= FS_IN_ISDIR; - dnotify_parent(dentry, DN_MODIFY); inotify_inode_queue_event(inode, mask, 0, NULL, NULL); fsnotify_parent(dentry, mask); @@ -289,47 +277,33 @@ static inline void fsnotify_xattr(struct dentry *dentry) static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) { struct inode *inode = dentry->d_inode; - int dn_mask = 0; - __u32 in_mask = 0; + __u32 mask = 0; + + if (ia_valid & ATTR_UID) + mask |= FS_ATTRIB; + if (ia_valid & ATTR_GID) + mask |= FS_ATTRIB; + if (ia_valid & ATTR_SIZE) + mask |= FS_MODIFY; - if (ia_valid & ATTR_UID) { - in_mask |= FS_ATTRIB; - dn_mask |= DN_ATTRIB; - } - if (ia_valid & ATTR_GID) { - in_mask |= FS_ATTRIB; - dn_mask |= DN_ATTRIB; - } - if (ia_valid & ATTR_SIZE) { - in_mask |= FS_MODIFY; - dn_mask |= DN_MODIFY; - } /* both times implies a utime(s) call */ if ((ia_valid & (ATTR_ATIME | ATTR_MTIME)) == (ATTR_ATIME | ATTR_MTIME)) - { - in_mask |= FS_ATTRIB; - dn_mask |= DN_ATTRIB; - } else if (ia_valid & ATTR_ATIME) { - in_mask |= FS_ACCESS; - dn_mask |= DN_ACCESS; - } else if (ia_valid & ATTR_MTIME) { - in_mask |= FS_MODIFY; - dn_mask |= DN_MODIFY; - } - if (ia_valid & ATTR_MODE) { - in_mask |= FS_ATTRIB; - dn_mask |= DN_ATTRIB; - } + mask |= FS_ATTRIB; + else if (ia_valid & ATTR_ATIME) + mask |= FS_ACCESS; + else if (ia_valid & ATTR_MTIME) + mask |= FS_MODIFY; + + if (ia_valid & ATTR_MODE) + mask |= FS_ATTRIB; - if (dn_mask) - dnotify_parent(dentry, dn_mask); - if (in_mask) { + if (mask) { if (S_ISDIR(inode->i_mode)) - in_mask |= FS_IN_ISDIR; - inotify_inode_queue_event(inode, in_mask, 0, NULL, NULL); + mask |= FS_IN_ISDIR; + inotify_inode_queue_event(inode, mask, 0, NULL, NULL); - fsnotify_parent(dentry, in_mask); - fsnotify(inode, in_mask, inode, FSNOTIFY_EVENT_INODE); + fsnotify_parent(dentry, mask); + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE); } } diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 13d2dd57004..9ea800e840f 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -57,6 +57,9 @@ FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE |\ FS_DELETE) +/* listeners that hard code group numbers near the top */ +#define DNOTIFY_GROUP_NUM UINT_MAX + struct fsnotify_group; struct fsnotify_event; struct fsnotify_mark_entry; -- cgit v1.2.3-70-g09d2 From a2d8bc6cb4a3024661baf877242f123787d0c054 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 21 May 2009 17:01:37 -0400 Subject: fsnotify: generic notification queue and waitq inotify needs to do asyc notification in which event information is stored on a queue until the listener is ready to receive it. This patch implements a generic notification queue for inotify (and later fanotify) to store events to be sent at a later time. Signed-off-by: Eric Paris Acked-by: Al Viro Cc: Christoph Hellwig --- fs/notify/fsnotify.h | 9 ++ fs/notify/group.c | 9 ++ fs/notify/notification.c | 230 +++++++++++++++++++++++++++++++++++++-- include/linux/fsnotify_backend.h | 37 +++++++ 4 files changed, 278 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index 83b8ec0a8ec..4dc240824b2 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h @@ -13,8 +13,12 @@ extern struct list_head fsnotify_groups; /* all bitwise OR of all event types (FS_*) for all fsnotify_groups */ extern __u32 fsnotify_mask; +/* destroy all events sitting in this groups notification queue */ +extern void fsnotify_flush_notify(struct fsnotify_group *group); + /* final kfree of a group */ extern void fsnotify_final_destroy_group(struct fsnotify_group *group); + /* run the list of all marks associated with inode and flag them to be freed */ extern void fsnotify_clear_marks_by_inode(struct inode *inode); /* @@ -22,4 +26,9 @@ extern void fsnotify_clear_marks_by_inode(struct inode *inode); * about events that happen to its children. */ extern void __fsnotify_update_child_dentry_flags(struct inode *inode); + +/* allocate and destroy and event holder to attach events to notification/access queues */ +extern struct fsnotify_event_holder *fsnotify_alloc_event_holder(void); +extern void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder); + #endif /* __FS_NOTIFY_FSNOTIFY_H_ */ diff --git a/fs/notify/group.c b/fs/notify/group.c index a29d2fa6792..0e1677144bc 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c @@ -91,6 +91,9 @@ static void fsnotify_get_group(struct fsnotify_group *group) */ void fsnotify_final_destroy_group(struct fsnotify_group *group) { + /* clear the notification queue of all events */ + fsnotify_flush_notify(group); + if (group->ops->free_group_priv) group->ops->free_group_priv(group); @@ -214,6 +217,12 @@ struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num, __u32 mask, group->group_num = group_num; group->mask = mask; + mutex_init(&group->notification_mutex); + INIT_LIST_HEAD(&group->notification_list); + init_waitqueue_head(&group->notification_waitq); + group->q_len = 0; + group->max_events = UINT_MAX; + spin_lock_init(&group->mark_lock); atomic_set(&group->num_marks, 0); INIT_LIST_HEAD(&group->mark_entries); diff --git a/fs/notify/notification.c b/fs/notify/notification.c index b8e9a87f8f5..dddecc74e63 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c @@ -16,6 +16,21 @@ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ +/* + * Basic idea behind the notification queue: An fsnotify group (like inotify) + * sends the userspace notification about events asyncronously some time after + * the event happened. When inotify gets an event it will need to add that + * event to the group notify queue. Since a single event might need to be on + * multiple group's notification queues we can't add the event directly to each + * queue and instead add a small "event_holder" to each queue. This event_holder + * has a pointer back to the original event. Since the majority of events are + * going to end up on one, and only one, notification queue we embed one + * event_holder into each event. This means we have a single allocation instead + * of always needing two. If the embedded event_holder is already in use by + * another group a new event_holder (from fsnotify_event_holder_cachep) will be + * allocated and used. + */ + #include #include #include @@ -33,6 +48,21 @@ #include "fsnotify.h" static struct kmem_cache *fsnotify_event_cachep; +static struct kmem_cache *fsnotify_event_holder_cachep; +/* + * This is a magic event we send when the q is too full. Since it doesn't + * hold real event information we just keep one system wide and use it any time + * it is needed. It's refcnt is set 1 at kernel init time and will never + * get set to 0 so it will never get 'freed' + */ +static struct fsnotify_event q_overflow_event; + +/* return true if the notify queue is empty, false otherwise */ +bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group) +{ + BUG_ON(!mutex_is_locked(&group->notification_mutex)); + return list_empty(&group->notification_list) ? true : false; +} void fsnotify_get_event(struct fsnotify_event *event) { @@ -52,19 +82,176 @@ void fsnotify_put_event(struct fsnotify_event *event) } } +struct fsnotify_event_holder *fsnotify_alloc_event_holder(void) +{ + return kmem_cache_alloc(fsnotify_event_holder_cachep, GFP_KERNEL); +} + +void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder) +{ + kmem_cache_free(fsnotify_event_holder_cachep, holder); +} + +/* + * check if 2 events contain the same information. + */ +static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new) +{ + if ((old->mask == new->mask) && + (old->to_tell == new->to_tell) && + (old->data_type == new->data_type)) { + switch (old->data_type) { + case (FSNOTIFY_EVENT_INODE): + if (old->inode == new->inode) + return true; + break; + case (FSNOTIFY_EVENT_PATH): + if ((old->path.mnt == new->path.mnt) && + (old->path.dentry == new->path.dentry)) + return true; + case (FSNOTIFY_EVENT_NONE): + return true; + }; + } + return false; +} + /* - * Allocate a new event which will be sent to each group's handle_event function - * if the group was interested in this particular event. + * Add an event to the group notification queue. The group can later pull this + * event off the queue to deal with. If the event is successfully added to the + * group's notification queue, a reference is taken on event. */ -struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, - void *data, int data_type) +int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event) +{ + struct fsnotify_event_holder *holder = NULL; + struct list_head *list = &group->notification_list; + struct fsnotify_event_holder *last_holder; + struct fsnotify_event *last_event; + + /* + * There is one fsnotify_event_holder embedded inside each fsnotify_event. + * Check if we expect to be able to use that holder. If not alloc a new + * holder. + * For the overflow event it's possible that something will use the in + * event holder before we get the lock so we may need to jump back and + * alloc a new holder, this can't happen for most events... + */ + if (!list_empty(&event->holder.event_list)) { +alloc_holder: + holder = fsnotify_alloc_event_holder(); + if (!holder) + return -ENOMEM; + } + + mutex_lock(&group->notification_mutex); + + if (group->q_len >= group->max_events) + event = &q_overflow_event; + + spin_lock(&event->lock); + + if (list_empty(&event->holder.event_list)) { + if (unlikely(holder)) + fsnotify_destroy_event_holder(holder); + holder = &event->holder; + } else if (unlikely(!holder)) { + /* between the time we checked above and got the lock the in + * event holder was used, go back and get a new one */ + spin_unlock(&event->lock); + mutex_unlock(&group->notification_mutex); + goto alloc_holder; + } + + if (!list_empty(list)) { + last_holder = list_entry(list->prev, struct fsnotify_event_holder, event_list); + last_event = last_holder->event; + if (event_compare(last_event, event)) { + spin_unlock(&event->lock); + mutex_unlock(&group->notification_mutex); + if (holder != &event->holder) + fsnotify_destroy_event_holder(holder); + return 0; + } + } + + group->q_len++; + holder->event = event; + + fsnotify_get_event(event); + list_add_tail(&holder->event_list, list); + spin_unlock(&event->lock); + mutex_unlock(&group->notification_mutex); + + wake_up(&group->notification_waitq); + return 0; +} + +/* + * Remove and return the first event from the notification list. There is a + * reference held on this event since it was on the list. It is the responsibility + * of the caller to drop this reference. + */ +struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group) { struct fsnotify_event *event; + struct fsnotify_event_holder *holder; - event = kmem_cache_alloc(fsnotify_event_cachep, GFP_KERNEL); - if (!event) - return NULL; + BUG_ON(!mutex_is_locked(&group->notification_mutex)); + holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list); + + event = holder->event; + + spin_lock(&event->lock); + holder->event = NULL; + list_del_init(&holder->event_list); + spin_unlock(&event->lock); + + /* event == holder means we are referenced through the in event holder */ + if (holder != &event->holder) + fsnotify_destroy_event_holder(holder); + + group->q_len--; + + return event; +} + +/* + * This will not remove the event, that must be done with fsnotify_remove_notify_event() + */ +struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group) +{ + struct fsnotify_event *event; + struct fsnotify_event_holder *holder; + + BUG_ON(!mutex_is_locked(&group->notification_mutex)); + + holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list); + event = holder->event; + + return event; +} + +/* + * Called when a group is being torn down to clean up any outstanding + * event notifications. + */ +void fsnotify_flush_notify(struct fsnotify_group *group) +{ + struct fsnotify_event *event; + + mutex_lock(&group->notification_mutex); + while (!fsnotify_notify_queue_is_empty(group)) { + event = fsnotify_remove_notify_event(group); + fsnotify_put_event(event); /* matches fsnotify_add_notify_event */ + } + mutex_unlock(&group->notification_mutex); +} + +static void initialize_event(struct fsnotify_event *event) +{ + event->holder.event = NULL; + INIT_LIST_HEAD(&event->holder.event_list); atomic_set(&event->refcnt, 1); spin_lock_init(&event->lock); @@ -72,7 +259,32 @@ struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, event->path.dentry = NULL; event->path.mnt = NULL; event->inode = NULL; + event->data_type = FSNOTIFY_EVENT_NONE; + event->to_tell = NULL; +} + +/* + * fsnotify_create_event - Allocate a new event which will be sent to each + * group's handle_event function if the group was interested in this + * particular event. + * + * @to_tell the inode which is supposed to receive the event (sometimes a + * parent of the inode to which the event happened. + * @mask what actually happened. + * @data pointer to the object which was actually affected + * @data_type flag indication if the data is a file, path, inode, nothing... + */ +struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, + void *data, int data_type) +{ + struct fsnotify_event *event; + + event = kmem_cache_alloc(fsnotify_event_cachep, GFP_KERNEL); + if (!event) + return NULL; + + initialize_event(event); event->to_tell = to_tell; switch (data_type) { @@ -114,6 +326,10 @@ struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, __init int fsnotify_notification_init(void) { fsnotify_event_cachep = KMEM_CACHE(fsnotify_event, SLAB_PANIC); + fsnotify_event_holder_cachep = KMEM_CACHE(fsnotify_event_holder, SLAB_PANIC); + + initialize_event(&q_overflow_event); + q_overflow_event.mask = FS_Q_OVERFLOW; return 0; } diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 9ea800e840f..15f8f82a5c5 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -119,6 +119,13 @@ struct fsnotify_group { const struct fsnotify_ops *ops; /* how this group handles things */ + /* needed to send notification to userspace */ + struct mutex notification_mutex; /* protect the notification_list */ + struct list_head notification_list; /* list of event_holder this group needs to send to userspace */ + wait_queue_head_t notification_waitq; /* read() on the notification file blocks on this waitq */ + unsigned int q_len; /* events on the queue */ + unsigned int max_events; /* maximum events allowed on the list */ + /* stores all fastapth entries assoc with this group so they can be cleaned on unregister */ spinlock_t mark_lock; /* protect mark_entries list */ atomic_t num_marks; /* 1 for each mark entry and 1 for not being @@ -135,12 +142,33 @@ struct fsnotify_group { }; }; +/* + * A single event can be queued in multiple group->notification_lists. + * + * each group->notification_list will point to an event_holder which in turns points + * to the actual event that needs to be sent to userspace. + * + * Seemed cheaper to create a refcnt'd event and a small holder for every group + * than create a different event for every group + * + */ +struct fsnotify_event_holder { + struct fsnotify_event *event; + struct list_head event_list; +}; + /* * all of the information about the original object we want to now send to * a group. If you want to carry more info from the accessing task to the * listener this structure is where you need to be adding fields. */ struct fsnotify_event { + /* + * If we create an event we are also likely going to need a holder + * to link to a group. So embed one holder in the event. Means only + * one allocation for the common case where we only have one group + */ + struct fsnotify_event_holder holder; spinlock_t lock; /* protection for the associated event_holder and private_list */ /* to_tell may ONLY be dereferenced during handle_event(). */ struct inode *to_tell; /* either the inode the event happened to or its parent */ @@ -264,6 +292,15 @@ extern void fsnotify_put_event(struct fsnotify_event *event); extern struct fsnotify_event_private_data *fsnotify_get_priv_from_event(struct fsnotify_group *group, struct fsnotify_event *event); +/* attach the event to the group notification queue */ +extern int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event); +/* true if the group notification queue is empty */ +extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group); +/* return, but do not dequeue the first event on the notification queue */ +extern struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group); +/* reutnr AND dequeue the first event on the notification queue */ +extern struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group); + /* functions used to manipulate the marks attached to inodes */ /* run all marks associated with an inode and update inode->i_fsnotify_mask */ -- cgit v1.2.3-70-g09d2 From 62ffe5dfba056f7ba81d710fee9f28c58a42fdd6 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 21 May 2009 17:01:43 -0400 Subject: fsnotify: include pathnames with entries when possible When inotify wants to send events to a directory about a child it includes the name of the original file. This patch collects that filename and makes it available for notification. Signed-off-by: Eric Paris Acked-by: Al Viro Cc: Christoph Hellwig --- fs/notify/fsnotify.c | 7 ++++--- fs/notify/notification.c | 16 +++++++++++++++- include/linux/fsnotify.h | 28 ++++++++++++++-------------- include/linux/fsnotify_backend.h | 11 ++++++++--- 4 files changed, 41 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 7fc760067a6..675129fa9fd 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -114,7 +114,8 @@ void __fsnotify_parent(struct dentry *dentry, __u32 mask) * specifies these are events which came from a child. */ mask |= FS_EVENT_ON_CHILD; - fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE); + fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE, + dentry->d_name.name); dput(parent); } @@ -131,7 +132,7 @@ EXPORT_SYMBOL_GPL(__fsnotify_parent); * out to all of the registered fsnotify_group. Those groups can then use the * notification event in whatever means they feel necessary. */ -void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is) +void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const char *file_name) { struct fsnotify_group *group; struct fsnotify_event *event = NULL; @@ -156,7 +157,7 @@ void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is) if (!group->ops->should_send_event(group, to_tell, mask)) continue; if (!event) { - event = fsnotify_create_event(to_tell, mask, data, data_is); + event = fsnotify_create_event(to_tell, mask, data, data_is, file_name); /* shit, we OOM'd and now we can't tell, maybe * someday someone else will want to do something * here */ diff --git a/fs/notify/notification.c b/fs/notify/notification.c index dddecc74e63..c69b18b9aba 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c @@ -78,6 +78,7 @@ void fsnotify_put_event(struct fsnotify_event *event) if (event->data_type == FSNOTIFY_EVENT_PATH) path_put(&event->path); + kfree(event->file_name); kmem_cache_free(fsnotify_event_cachep, event); } } @@ -262,6 +263,9 @@ static void initialize_event(struct fsnotify_event *event) event->data_type = FSNOTIFY_EVENT_NONE; event->to_tell = NULL; + + event->file_name = NULL; + event->name_len = 0; } /* @@ -274,9 +278,10 @@ static void initialize_event(struct fsnotify_event *event) * @mask what actually happened. * @data pointer to the object which was actually affected * @data_type flag indication if the data is a file, path, inode, nothing... + * @name the filename, if available */ struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, - void *data, int data_type) + void *data, int data_type, const char *name) { struct fsnotify_event *event; @@ -285,6 +290,15 @@ struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, return NULL; initialize_event(event); + + if (name) { + event->file_name = kstrdup(name, GFP_KERNEL); + if (!event->file_name) { + kmem_cache_free(fsnotify_event_cachep, event); + return NULL; + } + event->name_len = strlen(event->file_name); + } event->to_tell = to_tell; switch (data_type) { diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index db12d9de352..180740e9ec8 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -58,7 +58,7 @@ static inline void fsnotify_link_count(struct inode *inode) { inotify_inode_queue_event(inode, IN_ATTRIB, 0, NULL, NULL); - fsnotify(inode, FS_ATTRIB, inode, FSNOTIFY_EVENT_INODE); + fsnotify(inode, FS_ATTRIB, inode, FSNOTIFY_EVENT_INODE, NULL); } /* @@ -91,8 +91,8 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, inotify_inode_queue_event(new_dir, IN_MOVED_TO|isdir, cookie, new_name, source); - fsnotify(old_dir, old_dir_mask, old_dir, FSNOTIFY_EVENT_INODE); - fsnotify(new_dir, new_dir_mask, new_dir, FSNOTIFY_EVENT_INODE); + fsnotify(old_dir, old_dir_mask, old_dir, FSNOTIFY_EVENT_INODE, old_name); + fsnotify(new_dir, new_dir_mask, new_dir, FSNOTIFY_EVENT_INODE, new_name); if (target) { inotify_inode_queue_event(target, IN_DELETE_SELF, 0, NULL, NULL); @@ -104,7 +104,7 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, if (source) { inotify_inode_queue_event(source, IN_MOVE_SELF, 0, NULL, NULL); - fsnotify(source, FS_MOVE_SELF, moved->d_inode, FSNOTIFY_EVENT_INODE); + fsnotify(source, FS_MOVE_SELF, moved->d_inode, FSNOTIFY_EVENT_INODE, NULL); } audit_inode_child(new_name, moved, new_dir); } @@ -138,7 +138,7 @@ static inline void fsnotify_inoderemove(struct inode *inode) inotify_inode_queue_event(inode, IN_DELETE_SELF, 0, NULL, NULL); inotify_inode_is_dead(inode); - fsnotify(inode, FS_DELETE_SELF, inode, FSNOTIFY_EVENT_INODE); + fsnotify(inode, FS_DELETE_SELF, inode, FSNOTIFY_EVENT_INODE, NULL); __fsnotify_inode_delete(inode); } @@ -151,7 +151,7 @@ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry) dentry->d_inode); audit_inode_child(dentry->d_name.name, dentry, inode); - fsnotify(inode, FS_CREATE, dentry->d_inode, FSNOTIFY_EVENT_INODE); + fsnotify(inode, FS_CREATE, dentry->d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name); } /* @@ -166,7 +166,7 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct fsnotify_link_count(inode); audit_inode_child(new_dentry->d_name.name, new_dentry, dir); - fsnotify(dir, FS_CREATE, inode, FSNOTIFY_EVENT_INODE); + fsnotify(dir, FS_CREATE, inode, FSNOTIFY_EVENT_INODE, new_dentry->d_name.name); } /* @@ -180,7 +180,7 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry) inotify_inode_queue_event(inode, mask, 0, dentry->d_name.name, d_inode); audit_inode_child(dentry->d_name.name, dentry, inode); - fsnotify(inode, mask, d_inode, FSNOTIFY_EVENT_INODE); + fsnotify(inode, mask, d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name); } /* @@ -197,7 +197,7 @@ static inline void fsnotify_access(struct dentry *dentry) inotify_inode_queue_event(inode, mask, 0, NULL, NULL); fsnotify_parent(dentry, mask); - fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE); + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL); } /* @@ -214,7 +214,7 @@ static inline void fsnotify_modify(struct dentry *dentry) inotify_inode_queue_event(inode, mask, 0, NULL, NULL); fsnotify_parent(dentry, mask); - fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE); + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL); } /* @@ -231,7 +231,7 @@ static inline void fsnotify_open(struct dentry *dentry) inotify_inode_queue_event(inode, mask, 0, NULL, NULL); fsnotify_parent(dentry, mask); - fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE); + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL); } /* @@ -250,7 +250,7 @@ static inline void fsnotify_close(struct file *file) inotify_inode_queue_event(inode, mask, 0, NULL, NULL); fsnotify_parent(dentry, mask); - fsnotify(inode, mask, file, FSNOTIFY_EVENT_FILE); + fsnotify(inode, mask, file, FSNOTIFY_EVENT_FILE, NULL); } /* @@ -267,7 +267,7 @@ static inline void fsnotify_xattr(struct dentry *dentry) inotify_inode_queue_event(inode, mask, 0, NULL, NULL); fsnotify_parent(dentry, mask); - fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE); + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL); } /* @@ -303,7 +303,7 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) inotify_inode_queue_event(inode, mask, 0, NULL, NULL); fsnotify_parent(dentry, mask); - fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE); + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL); } } diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 15f8f82a5c5..52692f40589 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -192,6 +192,9 @@ struct fsnotify_event { int data_type; /* which of the above union we have */ atomic_t refcnt; /* how many groups still are using/need to send this event */ __u32 mask; /* the type of access, bitwise OR for FS_* event types */ + + char *file_name; + size_t name_len; }; /* @@ -224,7 +227,7 @@ struct fsnotify_mark_entry { /* called from the vfs helpers */ /* main fsnotify call to send events */ -extern void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is); +extern void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const char *name); extern void __fsnotify_parent(struct dentry *dentry, __u32 mask); extern void __fsnotify_inode_delete(struct inode *inode); @@ -319,10 +322,12 @@ extern void fsnotify_put_mark(struct fsnotify_mark_entry *entry); /* put here because inotify does some weird stuff when destroying watches */ extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, - void *data, int data_is); + void *data, int data_is, const char *name); + #else -static inline void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is) +static inline void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, + const char *name); {} static inline void __fsnotify_parent(struct dentry *dentry, __u32 mask) -- cgit v1.2.3-70-g09d2 From 47882c6f51e8ef41fbbe2bbb746a1ea3228dd7ca Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 21 May 2009 17:01:47 -0400 Subject: fsnotify: add correlations between events As part of the standard inotify events it includes a correlation cookie between two dentry move operations. This patch includes the same behaviour in fsnotify events. It is needed so that inotify userspace can be implemented on top of fsnotify. Signed-off-by: Eric Paris Acked-by: Al Viro Cc: Christoph Hellwig --- fs/notify/fsnotify.c | 6 +++--- fs/notify/notification.c | 20 ++++++++++++++++++-- include/linux/fsnotify.h | 35 ++++++++++++++++++----------------- include/linux/fsnotify_backend.h | 15 ++++++++++++--- 4 files changed, 51 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 675129fa9fd..f11d75f0236 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -115,7 +115,7 @@ void __fsnotify_parent(struct dentry *dentry, __u32 mask) mask |= FS_EVENT_ON_CHILD; fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE, - dentry->d_name.name); + dentry->d_name.name, 0); dput(parent); } @@ -132,7 +132,7 @@ EXPORT_SYMBOL_GPL(__fsnotify_parent); * out to all of the registered fsnotify_group. Those groups can then use the * notification event in whatever means they feel necessary. */ -void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const char *file_name) +void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const char *file_name, u32 cookie) { struct fsnotify_group *group; struct fsnotify_event *event = NULL; @@ -157,7 +157,7 @@ void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const if (!group->ops->should_send_event(group, to_tell, mask)) continue; if (!event) { - event = fsnotify_create_event(to_tell, mask, data, data_is, file_name); + event = fsnotify_create_event(to_tell, mask, data, data_is, file_name, cookie); /* shit, we OOM'd and now we can't tell, maybe * someday someone else will want to do something * here */ diff --git a/fs/notify/notification.c b/fs/notify/notification.c index c69b18b9aba..346f6e5c355 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -56,6 +57,17 @@ static struct kmem_cache *fsnotify_event_holder_cachep; * get set to 0 so it will never get 'freed' */ static struct fsnotify_event q_overflow_event; +static atomic_t fsnotify_sync_cookie = ATOMIC_INIT(0); + +/** + * fsnotify_get_cookie - return a unique cookie for use in synchronizing events. + * Called from fsnotify_move, which is inlined into filesystem modules. + */ +u32 fsnotify_get_cookie(void) +{ + return atomic_inc_return(&fsnotify_sync_cookie); +} +EXPORT_SYMBOL_GPL(fsnotify_get_cookie); /* return true if the notify queue is empty, false otherwise */ bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group) @@ -266,6 +278,8 @@ static void initialize_event(struct fsnotify_event *event) event->file_name = NULL; event->name_len = 0; + + event->sync_cookie = 0; } /* @@ -280,8 +294,8 @@ static void initialize_event(struct fsnotify_event *event) * @data_type flag indication if the data is a file, path, inode, nothing... * @name the filename, if available */ -struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, - void *data, int data_type, const char *name) +struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, void *data, + int data_type, const char *name, u32 cookie) { struct fsnotify_event *event; @@ -299,6 +313,8 @@ struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, } event->name_len = strlen(event->file_name); } + + event->sync_cookie = cookie; event->to_tell = to_tell; switch (data_type) { diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 180740e9ec8..c25b39ddd62 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -58,7 +58,7 @@ static inline void fsnotify_link_count(struct inode *inode) { inotify_inode_queue_event(inode, IN_ATTRIB, 0, NULL, NULL); - fsnotify(inode, FS_ATTRIB, inode, FSNOTIFY_EVENT_INODE, NULL); + fsnotify(inode, FS_ATTRIB, inode, FSNOTIFY_EVENT_INODE, NULL, 0); } /* @@ -69,7 +69,8 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, int isdir, struct inode *target, struct dentry *moved) { struct inode *source = moved->d_inode; - u32 cookie = inotify_get_cookie(); + u32 in_cookie = inotify_get_cookie(); + u32 fs_cookie = fsnotify_get_cookie(); __u32 old_dir_mask = 0; __u32 new_dir_mask = 0; @@ -86,13 +87,13 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, old_dir_mask |= FS_MOVED_FROM; new_dir_mask |= FS_MOVED_TO; - inotify_inode_queue_event(old_dir, IN_MOVED_FROM|isdir,cookie,old_name, + inotify_inode_queue_event(old_dir, IN_MOVED_FROM|isdir, in_cookie, old_name, source); - inotify_inode_queue_event(new_dir, IN_MOVED_TO|isdir, cookie, new_name, + inotify_inode_queue_event(new_dir, IN_MOVED_TO|isdir, in_cookie, new_name, source); - fsnotify(old_dir, old_dir_mask, old_dir, FSNOTIFY_EVENT_INODE, old_name); - fsnotify(new_dir, new_dir_mask, new_dir, FSNOTIFY_EVENT_INODE, new_name); + fsnotify(old_dir, old_dir_mask, old_dir, FSNOTIFY_EVENT_INODE, old_name, fs_cookie); + fsnotify(new_dir, new_dir_mask, new_dir, FSNOTIFY_EVENT_INODE, new_name, fs_cookie); if (target) { inotify_inode_queue_event(target, IN_DELETE_SELF, 0, NULL, NULL); @@ -104,7 +105,7 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, if (source) { inotify_inode_queue_event(source, IN_MOVE_SELF, 0, NULL, NULL); - fsnotify(source, FS_MOVE_SELF, moved->d_inode, FSNOTIFY_EVENT_INODE, NULL); + fsnotify(source, FS_MOVE_SELF, moved->d_inode, FSNOTIFY_EVENT_INODE, NULL, 0); } audit_inode_child(new_name, moved, new_dir); } @@ -138,7 +139,7 @@ static inline void fsnotify_inoderemove(struct inode *inode) inotify_inode_queue_event(inode, IN_DELETE_SELF, 0, NULL, NULL); inotify_inode_is_dead(inode); - fsnotify(inode, FS_DELETE_SELF, inode, FSNOTIFY_EVENT_INODE, NULL); + fsnotify(inode, FS_DELETE_SELF, inode, FSNOTIFY_EVENT_INODE, NULL, 0); __fsnotify_inode_delete(inode); } @@ -151,7 +152,7 @@ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry) dentry->d_inode); audit_inode_child(dentry->d_name.name, dentry, inode); - fsnotify(inode, FS_CREATE, dentry->d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name); + fsnotify(inode, FS_CREATE, dentry->d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name, 0); } /* @@ -166,7 +167,7 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct fsnotify_link_count(inode); audit_inode_child(new_dentry->d_name.name, new_dentry, dir); - fsnotify(dir, FS_CREATE, inode, FSNOTIFY_EVENT_INODE, new_dentry->d_name.name); + fsnotify(dir, FS_CREATE, inode, FSNOTIFY_EVENT_INODE, new_dentry->d_name.name, 0); } /* @@ -180,7 +181,7 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry) inotify_inode_queue_event(inode, mask, 0, dentry->d_name.name, d_inode); audit_inode_child(dentry->d_name.name, dentry, inode); - fsnotify(inode, mask, d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name); + fsnotify(inode, mask, d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name, 0); } /* @@ -197,7 +198,7 @@ static inline void fsnotify_access(struct dentry *dentry) inotify_inode_queue_event(inode, mask, 0, NULL, NULL); fsnotify_parent(dentry, mask); - fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL); + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0); } /* @@ -214,7 +215,7 @@ static inline void fsnotify_modify(struct dentry *dentry) inotify_inode_queue_event(inode, mask, 0, NULL, NULL); fsnotify_parent(dentry, mask); - fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL); + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0); } /* @@ -231,7 +232,7 @@ static inline void fsnotify_open(struct dentry *dentry) inotify_inode_queue_event(inode, mask, 0, NULL, NULL); fsnotify_parent(dentry, mask); - fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL); + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0); } /* @@ -250,7 +251,7 @@ static inline void fsnotify_close(struct file *file) inotify_inode_queue_event(inode, mask, 0, NULL, NULL); fsnotify_parent(dentry, mask); - fsnotify(inode, mask, file, FSNOTIFY_EVENT_FILE, NULL); + fsnotify(inode, mask, file, FSNOTIFY_EVENT_FILE, NULL, 0); } /* @@ -267,7 +268,7 @@ static inline void fsnotify_xattr(struct dentry *dentry) inotify_inode_queue_event(inode, mask, 0, NULL, NULL); fsnotify_parent(dentry, mask); - fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL); + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0); } /* @@ -303,7 +304,7 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) inotify_inode_queue_event(inode, mask, 0, NULL, NULL); fsnotify_parent(dentry, mask); - fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL); + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0); } } diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 52692f40589..b78b5573d22 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -193,6 +193,7 @@ struct fsnotify_event { atomic_t refcnt; /* how many groups still are using/need to send this event */ __u32 mask; /* the type of access, bitwise OR for FS_* event types */ + u32 sync_cookie; /* used to corrolate events, namely inotify mv events */ char *file_name; size_t name_len; }; @@ -227,9 +228,11 @@ struct fsnotify_mark_entry { /* called from the vfs helpers */ /* main fsnotify call to send events */ -extern void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const char *name); +extern void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, + const char *name, u32 cookie); extern void __fsnotify_parent(struct dentry *dentry, __u32 mask); extern void __fsnotify_inode_delete(struct inode *inode); +extern u32 fsnotify_get_cookie(void); static inline int fsnotify_inode_watches_children(struct inode *inode) { @@ -322,12 +325,13 @@ extern void fsnotify_put_mark(struct fsnotify_mark_entry *entry); /* put here because inotify does some weird stuff when destroying watches */ extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, - void *data, int data_is, const char *name); + void *data, int data_is, const char *name, + u32 cookie); #else static inline void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, - const char *name); + const char *name, u32 cookie) {} static inline void __fsnotify_parent(struct dentry *dentry, __u32 mask) @@ -342,6 +346,11 @@ static inline void __fsnotify_update_dcache_flags(struct dentry *dentry) static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode) {} +static inline u32 fsnotify_get_cookie(void) +{ + return 0; +} + #endif /* CONFIG_FSNOTIFY */ #endif /* __KERNEL __ */ -- cgit v1.2.3-70-g09d2 From e4aff117368cfdd3567ee41844d216d079b55173 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 21 May 2009 17:01:50 -0400 Subject: fsnotify: allow groups to add private data to events inotify needs per group information attached to events. This patch allows groups to attach private information and implements a callback so that information can be freed when an event is being destroyed. Signed-off-by: Eric Paris Acked-by: Al Viro Cc: Christoph Hellwig --- fs/notify/dnotify/dnotify.c | 1 + fs/notify/notification.c | 52 ++++++++++++++++++++++++++++++++++++---- include/linux/fsnotify_backend.h | 24 +++++++++++++++---- 3 files changed, 68 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index d9d80f502c6..12f9e6b1ffe 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -183,6 +183,7 @@ static struct fsnotify_ops dnotify_fsnotify_ops = { .should_send_event = dnotify_should_send_event, .free_group_priv = NULL, .freeing_mark = dnotify_freeing_mark, + .free_event_priv = NULL, }; /* diff --git a/fs/notify/notification.c b/fs/notify/notification.c index 346f6e5c355..959b73e756f 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c @@ -90,6 +90,8 @@ void fsnotify_put_event(struct fsnotify_event *event) if (event->data_type == FSNOTIFY_EVENT_PATH) path_put(&event->path); + BUG_ON(!list_empty(&event->private_data_list)); + kfree(event->file_name); kmem_cache_free(fsnotify_event_cachep, event); } @@ -106,7 +108,29 @@ void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder) } /* - * check if 2 events contain the same information. + * Find the private data that the group previously attached to this event when + * the group added the event to the notification queue (fsnotify_add_notify_event) + */ +struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnotify_group *group, struct fsnotify_event *event) +{ + struct fsnotify_event_private_data *lpriv; + struct fsnotify_event_private_data *priv = NULL; + + assert_spin_locked(&event->lock); + + list_for_each_entry(lpriv, &event->private_data_list, event_list) { + if (lpriv->group == group) { + priv = lpriv; + list_del(&priv->event_list); + break; + } + } + return priv; +} + +/* + * Check if 2 events contain the same information. We do not compare private data + * but at this moment that isn't a problem for any know fsnotify listeners. */ static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new) { @@ -134,13 +158,17 @@ static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new * event off the queue to deal with. If the event is successfully added to the * group's notification queue, a reference is taken on event. */ -int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event) +int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event, + struct fsnotify_event_private_data *priv) { struct fsnotify_event_holder *holder = NULL; struct list_head *list = &group->notification_list; struct fsnotify_event_holder *last_holder; struct fsnotify_event *last_event; + /* easy to tell if priv was attached to the event */ + INIT_LIST_HEAD(&priv->event_list); + /* * There is one fsnotify_event_holder embedded inside each fsnotify_event. * Check if we expect to be able to use that holder. If not alloc a new @@ -158,8 +186,11 @@ alloc_holder: mutex_lock(&group->notification_mutex); - if (group->q_len >= group->max_events) + if (group->q_len >= group->max_events) { event = &q_overflow_event; + /* sorry, no private data on the overflow event */ + priv = NULL; + } spin_lock(&event->lock); @@ -183,7 +214,7 @@ alloc_holder: mutex_unlock(&group->notification_mutex); if (holder != &event->holder) fsnotify_destroy_event_holder(holder); - return 0; + return -EEXIST; } } @@ -192,6 +223,8 @@ alloc_holder: fsnotify_get_event(event); list_add_tail(&holder->event_list, list); + if (priv) + list_add_tail(&priv->event_list, &event->private_data_list); spin_unlock(&event->lock); mutex_unlock(&group->notification_mutex); @@ -252,10 +285,19 @@ struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group) void fsnotify_flush_notify(struct fsnotify_group *group) { struct fsnotify_event *event; + struct fsnotify_event_private_data *priv; mutex_lock(&group->notification_mutex); while (!fsnotify_notify_queue_is_empty(group)) { event = fsnotify_remove_notify_event(group); + /* if they don't implement free_event_priv they better not have attached any */ + if (group->ops->free_event_priv) { + spin_lock(&event->lock); + priv = fsnotify_remove_priv_from_event(group, event); + spin_unlock(&event->lock); + if (priv) + group->ops->free_event_priv(priv); + } fsnotify_put_event(event); /* matches fsnotify_add_notify_event */ } mutex_unlock(&group->notification_mutex); @@ -274,6 +316,8 @@ static void initialize_event(struct fsnotify_event *event) event->inode = NULL; event->data_type = FSNOTIFY_EVENT_NONE; + INIT_LIST_HEAD(&event->private_data_list); + event->to_tell = NULL; event->file_name = NULL; diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index b78b5573d22..efdf9e442d8 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -63,6 +63,7 @@ struct fsnotify_group; struct fsnotify_event; struct fsnotify_mark_entry; +struct fsnotify_event_private_data; /* * Each group much define these ops. The fsnotify infrastructure will call @@ -81,6 +82,7 @@ struct fsnotify_ops { int (*handle_event)(struct fsnotify_group *group, struct fsnotify_event *event); void (*free_group_priv)(struct fsnotify_group *group); void (*freeing_mark)(struct fsnotify_mark_entry *entry, struct fsnotify_group *group); + void (*free_event_priv)(struct fsnotify_event_private_data *priv); }; /* @@ -157,6 +159,15 @@ struct fsnotify_event_holder { struct list_head event_list; }; +/* + * Inotify needs to tack data onto an event. This struct lets us later find the + * correct private data of the correct group. + */ +struct fsnotify_event_private_data { + struct fsnotify_group *group; + struct list_head event_list; +}; + /* * all of the information about the original object we want to now send to * a group. If you want to carry more info from the accessing task to the @@ -196,6 +207,8 @@ struct fsnotify_event { u32 sync_cookie; /* used to corrolate events, namely inotify mv events */ char *file_name; size_t name_len; + + struct list_head private_data_list; /* groups can store private data here */ }; /* @@ -294,17 +307,18 @@ extern void fsnotify_put_group(struct fsnotify_group *group); /* take a reference to an event */ extern void fsnotify_get_event(struct fsnotify_event *event); extern void fsnotify_put_event(struct fsnotify_event *event); -/* find private data previously attached to an event */ -extern struct fsnotify_event_private_data *fsnotify_get_priv_from_event(struct fsnotify_group *group, - struct fsnotify_event *event); +/* find private data previously attached to an event and unlink it */ +extern struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnotify_group *group, + struct fsnotify_event *event); /* attach the event to the group notification queue */ -extern int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event); +extern int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event, + struct fsnotify_event_private_data *priv); /* true if the group notification queue is empty */ extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group); /* return, but do not dequeue the first event on the notification queue */ extern struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group); -/* reutnr AND dequeue the first event on the notification queue */ +/* return AND dequeue the first event on the notification queue */ extern struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group); /* functions used to manipulate the marks attached to inodes */ -- cgit v1.2.3-70-g09d2 From 164bc6195139047faaf5ada1278332e99494803b Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 21 May 2009 17:01:58 -0400 Subject: fsnotify: handle filesystem unmounts with fsnotify marks When an fs is unmounted with an fsnotify mark entry attached to one of its inodes we need to destroy that mark entry and we also (like inotify) send an unmount event. Signed-off-by: Eric Paris Acked-by: Al Viro Cc: Christoph Hellwig --- fs/inode.c | 1 + fs/notify/inode_mark.c | 72 ++++++++++++++++++++++++++++++++++++++++ include/linux/fsnotify_backend.h | 4 +++ 3 files changed, 77 insertions(+) (limited to 'include') diff --git a/fs/inode.c b/fs/inode.c index 54c63ce3de2..ca337014ae2 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -407,6 +407,7 @@ int invalidate_inodes(struct super_block *sb) mutex_lock(&iprune_mutex); spin_lock(&inode_lock); inotify_unmount_inodes(&sb->s_inodes); + fsnotify_unmount_inodes(&sb->s_inodes); busy = invalidate_list(&sb->s_inodes, &throw_away); spin_unlock(&inode_lock); diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index 282150f74cf..0a499d2c619 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c @@ -89,6 +89,7 @@ #include #include #include +#include /* for inode_lock */ #include @@ -351,3 +352,74 @@ int fsnotify_add_mark(struct fsnotify_mark_entry *entry, return ret; } + +/** + * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes. + * @list: list of inodes being unmounted (sb->s_inodes) + * + * Called with inode_lock held, protecting the unmounting super block's list + * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay. + * We temporarily drop inode_lock, however, and CAN block. + */ +void fsnotify_unmount_inodes(struct list_head *list) +{ + struct inode *inode, *next_i, *need_iput = NULL; + + list_for_each_entry_safe(inode, next_i, list, i_sb_list) { + struct inode *need_iput_tmp; + + /* + * We cannot __iget() an inode in state I_CLEAR, I_FREEING, + * I_WILL_FREE, or I_NEW which is fine because by that point + * the inode cannot have any associated watches. + */ + if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW)) + continue; + + /* + * If i_count is zero, the inode cannot have any watches and + * doing an __iget/iput with MS_ACTIVE clear would actually + * evict all inodes with zero i_count from icache which is + * unnecessarily violent and may in fact be illegal to do. + */ + if (!atomic_read(&inode->i_count)) + continue; + + need_iput_tmp = need_iput; + need_iput = NULL; + + /* In case fsnotify_inode_delete() drops a reference. */ + if (inode != need_iput_tmp) + __iget(inode); + else + need_iput_tmp = NULL; + + /* In case the dropping of a reference would nuke next_i. */ + if ((&next_i->i_sb_list != list) && + atomic_read(&next_i->i_count) && + !(next_i->i_state & (I_CLEAR | I_FREEING | I_WILL_FREE))) { + __iget(next_i); + need_iput = next_i; + } + + /* + * We can safely drop inode_lock here because we hold + * references on both inode and next_i. Also no new inodes + * will be added since the umount has begun. Finally, + * iprune_mutex keeps shrink_icache_memory() away. + */ + spin_unlock(&inode_lock); + + if (need_iput_tmp) + iput(need_iput_tmp); + + /* for each watch, send FS_UNMOUNT and then remove it */ + fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0); + + fsnotify_inode_delete(inode); + + iput(inode); + + spin_lock(&inode_lock); + } +} diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index efdf9e442d8..d2c0ee30e61 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -336,6 +336,7 @@ extern void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry); extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group); extern void fsnotify_get_mark(struct fsnotify_mark_entry *entry); extern void fsnotify_put_mark(struct fsnotify_mark_entry *entry); +extern void fsnotify_unmount_inodes(struct list_head *list); /* put here because inotify does some weird stuff when destroying watches */ extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, @@ -365,6 +366,9 @@ static inline u32 fsnotify_get_cookie(void) return 0; } +static inline void fsnotify_unmount_inodes(struct list_head *list) +{} + #endif /* CONFIG_FSNOTIFY */ #endif /* __KERNEL __ */ -- cgit v1.2.3-70-g09d2 From 63c882a05416e18de6fb59f7dd6da48f3bbe8273 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 21 May 2009 17:02:01 -0400 Subject: inotify: reimplement inotify using fsnotify Reimplement inotify_user using fsnotify. This should be feature for feature exactly the same as the original inotify_user. This does not make any changes to the in kernel inotify feature used by audit. Those patches (and the eventual removal of in kernel inotify) will come after the new inotify_user proves to be working correctly. Signed-off-by: Eric Paris Acked-by: Al Viro Cc: Christoph Hellwig --- MAINTAINERS | 2 + fs/notify/inotify/Kconfig | 20 +- fs/notify/inotify/Makefile | 2 +- fs/notify/inotify/inotify.h | 21 + fs/notify/inotify/inotify_fsnotify.c | 137 ++++++ fs/notify/inotify/inotify_user.c | 837 +++++++++++++++++------------------ include/linux/fsnotify_backend.h | 11 + init/Kconfig | 3 +- 8 files changed, 585 insertions(+), 448 deletions(-) create mode 100644 fs/notify/inotify/inotify.h create mode 100644 fs/notify/inotify/inotify_fsnotify.c (limited to 'include') diff --git a/MAINTAINERS b/MAINTAINERS index 96e0c8c6079..e697b67031a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2858,6 +2858,8 @@ P: John McCutchan M: john@johnmccutchan.com P: Robert Love M: rlove@rlove.org +P: Eric Paris +M: eparis@parisplace.org L: linux-kernel@vger.kernel.org S: Maintained F: Documentation/filesystems/inotify.txt diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig index 44679284102..5356884289a 100644 --- a/fs/notify/inotify/Kconfig +++ b/fs/notify/inotify/Kconfig @@ -1,26 +1,30 @@ config INOTIFY bool "Inotify file change notification support" - default y + default n ---help--- - Say Y here to enable inotify support. Inotify is a file change - notification system and a replacement for dnotify. Inotify fixes - numerous shortcomings in dnotify and introduces several new features - including multiple file events, one-shot support, and unmount - notification. + Say Y here to enable legacy in kernel inotify support. Inotify is a + file change notification system. It is a replacement for dnotify. + This option only provides the legacy inotify in kernel API. There + are no in tree kernel users of this interface since it is deprecated. + You only need this if you are loading an out of tree kernel module + that uses inotify. For more information, see - If unsure, say Y. + If unsure, say N. config INOTIFY_USER bool "Inotify support for userspace" - depends on INOTIFY + depends on FSNOTIFY default y ---help--- Say Y here to enable inotify support for userspace, including the associated system calls. Inotify allows monitoring of both files and directories via a single open fd. Events are read from the file descriptor, which is also select()- and poll()-able. + Inotify fixes numerous shortcomings in dnotify and introduces several + new features including multiple file events, one-shot support, and + unmount notification. For more information, see diff --git a/fs/notify/inotify/Makefile b/fs/notify/inotify/Makefile index e290f3bb9d8..94382817136 100644 --- a/fs/notify/inotify/Makefile +++ b/fs/notify/inotify/Makefile @@ -1,2 +1,2 @@ obj-$(CONFIG_INOTIFY) += inotify.o -obj-$(CONFIG_INOTIFY_USER) += inotify_user.o +obj-$(CONFIG_INOTIFY_USER) += inotify_fsnotify.o inotify_user.o diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h new file mode 100644 index 00000000000..ea2605a58b8 --- /dev/null +++ b/fs/notify/inotify/inotify.h @@ -0,0 +1,21 @@ +#include +#include +#include /* struct kmem_cache */ + +extern struct kmem_cache *event_priv_cachep; + +struct inotify_event_private_data { + struct fsnotify_event_private_data fsnotify_event_priv_data; + int wd; +}; + +struct inotify_inode_mark_entry { + /* fsnotify_mark_entry MUST be the first thing */ + struct fsnotify_mark_entry fsn_entry; + int wd; +}; + +extern void inotify_destroy_mark_entry(struct fsnotify_mark_entry *entry, struct fsnotify_group *group); +extern void inotify_free_event_priv(struct fsnotify_event_private_data *event_priv); + +extern const struct fsnotify_ops inotify_fsnotify_ops; diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c new file mode 100644 index 00000000000..160da548683 --- /dev/null +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -0,0 +1,137 @@ +/* + * fs/inotify_user.c - inotify support for userspace + * + * Authors: + * John McCutchan + * Robert Love + * + * Copyright (C) 2005 John McCutchan + * Copyright 2006 Hewlett-Packard Development Company, L.P. + * + * Copyright (C) 2009 Eric Paris + * inotify was largely rewriten to make use of the fsnotify infrastructure + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include /* struct inode */ +#include +#include +#include /* struct path */ +#include /* kmem_* */ +#include + +#include "inotify.h" + +static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_event *event) +{ + struct fsnotify_mark_entry *entry; + struct inotify_inode_mark_entry *ientry; + struct inode *to_tell; + struct inotify_event_private_data *event_priv; + struct fsnotify_event_private_data *fsn_event_priv; + int wd, ret; + + to_tell = event->to_tell; + + spin_lock(&to_tell->i_lock); + entry = fsnotify_find_mark_entry(group, to_tell); + spin_unlock(&to_tell->i_lock); + /* race with watch removal? We already passes should_send */ + if (unlikely(!entry)) + return 0; + ientry = container_of(entry, struct inotify_inode_mark_entry, + fsn_entry); + wd = ientry->wd; + + event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL); + if (unlikely(!event_priv)) + return -ENOMEM; + + fsn_event_priv = &event_priv->fsnotify_event_priv_data; + + fsn_event_priv->group = group; + event_priv->wd = wd; + + ret = fsnotify_add_notify_event(group, event, fsn_event_priv); + /* EEXIST is not an error */ + if (ret == -EEXIST) + ret = 0; + + /* did event_priv get attached? */ + if (list_empty(&fsn_event_priv->event_list)) + inotify_free_event_priv(fsn_event_priv); + + /* + * If we hold the entry until after the event is on the queue + * IN_IGNORED won't be able to pass this event in the queue + */ + fsnotify_put_mark(entry); + + return ret; +} + +static void inotify_freeing_mark(struct fsnotify_mark_entry *entry, struct fsnotify_group *group) +{ + inotify_destroy_mark_entry(entry, group); +} + +static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode, __u32 mask) +{ + struct fsnotify_mark_entry *entry; + bool send; + + spin_lock(&inode->i_lock); + entry = fsnotify_find_mark_entry(group, inode); + spin_unlock(&inode->i_lock); + if (!entry) + return false; + + send = (entry->mask & mask); + + /* find took a reference */ + fsnotify_put_mark(entry); + + return send; +} + +static int idr_callback(int id, void *p, void *data) +{ + BUG(); + return 0; +} + +static void inotify_free_group_priv(struct fsnotify_group *group) +{ + /* ideally the idr is empty and we won't hit the BUG in teh callback */ + idr_for_each(&group->inotify_data.idr, idr_callback, NULL); + idr_remove_all(&group->inotify_data.idr); + idr_destroy(&group->inotify_data.idr); +} + +void inotify_free_event_priv(struct fsnotify_event_private_data *fsn_event_priv) +{ + struct inotify_event_private_data *event_priv; + + + event_priv = container_of(fsn_event_priv, struct inotify_event_private_data, + fsnotify_event_priv_data); + + kmem_cache_free(event_priv_cachep, event_priv); +} + +const struct fsnotify_ops inotify_fsnotify_ops = { + .handle_event = inotify_handle_event, + .should_send_event = inotify_should_send_event, + .free_group_priv = inotify_free_group_priv, + .free_event_priv = inotify_free_event_priv, + .freeing_mark = inotify_freeing_mark, +}; diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 1634319e240..982a412ac5b 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -8,6 +8,9 @@ * Copyright (C) 2005 John McCutchan * Copyright 2006 Hewlett-Packard Development Company, L.P. * + * Copyright (C) 2009 Eric Paris + * inotify was largely rewriten to make use of the fsnotify infrastructure + * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2, or (at your option) any @@ -19,94 +22,48 @@ * General Public License for more details. */ -#include -#include -#include -#include #include -#include -#include -#include -#include -#include +#include /* struct inode */ +#include +#include +#include /* module_init */ #include +#include /* roundup() */ +#include /* superblock magic number */ +#include /* mntget */ +#include /* LOOKUP_FOLLOW */ +#include /* struct path */ +#include /* struct user */ +#include /* struct kmem_cache */ #include -#include +#include +#include +#include +#include -#include +#include "inotify.h" -static struct kmem_cache *watch_cachep __read_mostly; -static struct kmem_cache *event_cachep __read_mostly; +#include static struct vfsmount *inotify_mnt __read_mostly; +/* this just sits here and wastes global memory. used to just pad userspace messages with zeros */ +static struct inotify_event nul_inotify_event; + /* these are configurable via /proc/sys/fs/inotify/ */ static int inotify_max_user_instances __read_mostly; -static int inotify_max_user_watches __read_mostly; static int inotify_max_queued_events __read_mostly; +int inotify_max_user_watches __read_mostly; -/* - * Lock ordering: - * - * inotify_dev->up_mutex (ensures we don't re-add the same watch) - * inode->inotify_mutex (protects inode's watch list) - * inotify_handle->mutex (protects inotify_handle's watch list) - * inotify_dev->ev_mutex (protects device's event queue) - */ +static struct kmem_cache *inotify_inode_mark_cachep __read_mostly; +struct kmem_cache *event_priv_cachep __read_mostly; +static struct fsnotify_event *inotify_ignored_event; /* - * Lifetimes of the main data structures: - * - * inotify_device: Lifetime is managed by reference count, from - * sys_inotify_init() until release. Additional references can bump the count - * via get_inotify_dev() and drop the count via put_inotify_dev(). - * - * inotify_user_watch: Lifetime is from create_watch() to the receipt of an - * IN_IGNORED event from inotify, or when using IN_ONESHOT, to receipt of the - * first event, or to inotify_destroy(). + * When inotify registers a new group it increments this and uses that + * value as an offset to set the fsnotify group "name" and priority. */ - -/* - * struct inotify_device - represents an inotify instance - * - * This structure is protected by the mutex 'mutex'. - */ -struct inotify_device { - wait_queue_head_t wq; /* wait queue for i/o */ - struct mutex ev_mutex; /* protects event queue */ - struct mutex up_mutex; /* synchronizes watch updates */ - struct list_head events; /* list of queued events */ - struct user_struct *user; /* user who opened this dev */ - struct inotify_handle *ih; /* inotify handle */ - struct fasync_struct *fa; /* async notification */ - atomic_t count; /* reference count */ - unsigned int queue_size; /* size of the queue (bytes) */ - unsigned int event_count; /* number of pending events */ - unsigned int max_events; /* maximum number of events */ -}; - -/* - * struct inotify_kernel_event - An inotify event, originating from a watch and - * queued for user-space. A list of these is attached to each instance of the - * device. In read(), this list is walked and all events that can fit in the - * buffer are returned. - * - * Protected by dev->ev_mutex of the device in which we are queued. - */ -struct inotify_kernel_event { - struct inotify_event event; /* the user-space event */ - struct list_head list; /* entry in inotify_device's list */ - char *name; /* filename, if any */ -}; - -/* - * struct inotify_user_watch - our version of an inotify_watch, we add - * a reference to the associated inotify_device. - */ -struct inotify_user_watch { - struct inotify_device *dev; /* associated device */ - struct inotify_watch wdata; /* inotify watch data */ -}; +static atomic_t inotify_grp_num; #ifdef CONFIG_SYSCTL @@ -149,280 +106,36 @@ ctl_table inotify_table[] = { }; #endif /* CONFIG_SYSCTL */ -static inline void get_inotify_dev(struct inotify_device *dev) -{ - atomic_inc(&dev->count); -} - -static inline void put_inotify_dev(struct inotify_device *dev) -{ - if (atomic_dec_and_test(&dev->count)) { - atomic_dec(&dev->user->inotify_devs); - free_uid(dev->user); - kfree(dev); - } -} - -/* - * free_inotify_user_watch - cleans up the watch and its references - */ -static void free_inotify_user_watch(struct inotify_watch *w) -{ - struct inotify_user_watch *watch; - struct inotify_device *dev; - - watch = container_of(w, struct inotify_user_watch, wdata); - dev = watch->dev; - - atomic_dec(&dev->user->inotify_watches); - put_inotify_dev(dev); - kmem_cache_free(watch_cachep, watch); -} - -/* - * kernel_event - create a new kernel event with the given parameters - * - * This function can sleep. - */ -static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie, - const char *name) -{ - struct inotify_kernel_event *kevent; - - kevent = kmem_cache_alloc(event_cachep, GFP_NOFS); - if (unlikely(!kevent)) - return NULL; - - /* we hand this out to user-space, so zero it just in case */ - memset(&kevent->event, 0, sizeof(struct inotify_event)); - - kevent->event.wd = wd; - kevent->event.mask = mask; - kevent->event.cookie = cookie; - - INIT_LIST_HEAD(&kevent->list); - - if (name) { - size_t len, rem, event_size = sizeof(struct inotify_event); - - /* - * We need to pad the filename so as to properly align an - * array of inotify_event structures. Because the structure is - * small and the common case is a small filename, we just round - * up to the next multiple of the structure's sizeof. This is - * simple and safe for all architectures. - */ - len = strlen(name) + 1; - rem = event_size - len; - if (len > event_size) { - rem = event_size - (len % event_size); - if (len % event_size == 0) - rem = 0; - } - - kevent->name = kmalloc(len + rem, GFP_NOFS); - if (unlikely(!kevent->name)) { - kmem_cache_free(event_cachep, kevent); - return NULL; - } - memcpy(kevent->name, name, len); - if (rem) - memset(kevent->name + len, 0, rem); - kevent->event.len = len + rem; - } else { - kevent->event.len = 0; - kevent->name = NULL; - } - - return kevent; -} - -/* - * inotify_dev_get_event - return the next event in the given dev's queue - * - * Caller must hold dev->ev_mutex. - */ -static inline struct inotify_kernel_event * -inotify_dev_get_event(struct inotify_device *dev) -{ - return list_entry(dev->events.next, struct inotify_kernel_event, list); -} - -/* - * inotify_dev_get_last_event - return the last event in the given dev's queue - * - * Caller must hold dev->ev_mutex. - */ -static inline struct inotify_kernel_event * -inotify_dev_get_last_event(struct inotify_device *dev) +static inline __u32 inotify_arg_to_mask(u32 arg) { - if (list_empty(&dev->events)) - return NULL; - return list_entry(dev->events.prev, struct inotify_kernel_event, list); -} + __u32 mask; -/* - * inotify_dev_queue_event - event handler registered with core inotify, adds - * a new event to the given device - * - * Can sleep (calls kernel_event()). - */ -static void inotify_dev_queue_event(struct inotify_watch *w, u32 wd, u32 mask, - u32 cookie, const char *name, - struct inode *ignored) -{ - struct inotify_user_watch *watch; - struct inotify_device *dev; - struct inotify_kernel_event *kevent, *last; + /* everything should accept their own ignored and cares about children */ + mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD); - watch = container_of(w, struct inotify_user_watch, wdata); - dev = watch->dev; + /* mask off the flags used to open the fd */ + mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT)); - mutex_lock(&dev->ev_mutex); - - /* we can safely put the watch as we don't reference it while - * generating the event - */ - if (mask & IN_IGNORED || w->mask & IN_ONESHOT) - put_inotify_watch(w); /* final put */ - - /* coalescing: drop this event if it is a dupe of the previous */ - last = inotify_dev_get_last_event(dev); - if (last && last->event.mask == mask && last->event.wd == wd && - last->event.cookie == cookie) { - const char *lastname = last->name; - - if (!name && !lastname) - goto out; - if (name && lastname && !strcmp(lastname, name)) - goto out; - } - - /* the queue overflowed and we already sent the Q_OVERFLOW event */ - if (unlikely(dev->event_count > dev->max_events)) - goto out; - - /* if the queue overflows, we need to notify user space */ - if (unlikely(dev->event_count == dev->max_events)) - kevent = kernel_event(-1, IN_Q_OVERFLOW, cookie, NULL); - else - kevent = kernel_event(wd, mask, cookie, name); - - if (unlikely(!kevent)) - goto out; - - /* queue the event and wake up anyone waiting */ - dev->event_count++; - dev->queue_size += sizeof(struct inotify_event) + kevent->event.len; - list_add_tail(&kevent->list, &dev->events); - wake_up_interruptible(&dev->wq); - kill_fasync(&dev->fa, SIGIO, POLL_IN); - -out: - mutex_unlock(&dev->ev_mutex); -} - -/* - * remove_kevent - cleans up the given kevent - * - * Caller must hold dev->ev_mutex. - */ -static void remove_kevent(struct inotify_device *dev, - struct inotify_kernel_event *kevent) -{ - list_del(&kevent->list); - - dev->event_count--; - dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len; -} - -/* - * free_kevent - frees the given kevent. - */ -static void free_kevent(struct inotify_kernel_event *kevent) -{ - kfree(kevent->name); - kmem_cache_free(event_cachep, kevent); -} - -/* - * inotify_dev_event_dequeue - destroy an event on the given device - * - * Caller must hold dev->ev_mutex. - */ -static void inotify_dev_event_dequeue(struct inotify_device *dev) -{ - if (!list_empty(&dev->events)) { - struct inotify_kernel_event *kevent; - kevent = inotify_dev_get_event(dev); - remove_kevent(dev, kevent); - free_kevent(kevent); - } -} - -/* - * find_inode - resolve a user-given path to a specific inode - */ -static int find_inode(const char __user *dirname, struct path *path, - unsigned flags) -{ - int error; - - error = user_path_at(AT_FDCWD, dirname, flags, path); - if (error) - return error; - /* you can only watch an inode if you have read permissions on it */ - error = inode_permission(path->dentry->d_inode, MAY_READ); - if (error) - path_put(path); - return error; + return mask; } -/* - * create_watch - creates a watch on the given device. - * - * Callers must hold dev->up_mutex. - */ -static int create_watch(struct inotify_device *dev, struct inode *inode, - u32 mask) +static inline u32 inotify_mask_to_arg(__u32 mask) { - struct inotify_user_watch *watch; - int ret; - - if (atomic_read(&dev->user->inotify_watches) >= - inotify_max_user_watches) - return -ENOSPC; - - watch = kmem_cache_alloc(watch_cachep, GFP_KERNEL); - if (unlikely(!watch)) - return -ENOMEM; - - /* save a reference to device and bump the count to make it official */ - get_inotify_dev(dev); - watch->dev = dev; - - atomic_inc(&dev->user->inotify_watches); - - inotify_init_watch(&watch->wdata); - ret = inotify_add_watch(dev->ih, &watch->wdata, inode, mask); - if (ret < 0) - free_inotify_user_watch(&watch->wdata); - - return ret; + return mask & (IN_ALL_EVENTS | IN_ISDIR | IN_UNMOUNT | IN_IGNORED | + IN_Q_OVERFLOW); } -/* Device Interface */ - +/* intofiy userspace file descriptor functions */ static unsigned int inotify_poll(struct file *file, poll_table *wait) { - struct inotify_device *dev = file->private_data; + struct fsnotify_group *group = file->private_data; int ret = 0; - poll_wait(file, &dev->wq, wait); - mutex_lock(&dev->ev_mutex); - if (!list_empty(&dev->events)) + poll_wait(file, &group->notification_waitq, wait); + mutex_lock(&group->notification_mutex); + if (!fsnotify_notify_queue_is_empty(group)) ret = POLLIN | POLLRDNORM; - mutex_unlock(&dev->ev_mutex); + mutex_unlock(&group->notification_mutex); return ret; } @@ -432,26 +145,29 @@ static unsigned int inotify_poll(struct file *file, poll_table *wait) * enough to fit in "count". Return an error pointer if * not large enough. * - * Called with the device ev_mutex held. + * Called with the group->notification_mutex held. */ -static struct inotify_kernel_event *get_one_event(struct inotify_device *dev, - size_t count) +static struct fsnotify_event *get_one_event(struct fsnotify_group *group, + size_t count) { size_t event_size = sizeof(struct inotify_event); - struct inotify_kernel_event *kevent; + struct fsnotify_event *event; - if (list_empty(&dev->events)) + if (fsnotify_notify_queue_is_empty(group)) return NULL; - kevent = inotify_dev_get_event(dev); - if (kevent->name) - event_size += kevent->event.len; + event = fsnotify_peek_notify_event(group); + + event_size += roundup(event->name_len, event_size); if (event_size > count) return ERR_PTR(-EINVAL); - remove_kevent(dev, kevent); - return kevent; + /* held the notification_mutex the whole time, so this is the + * same event we peeked above */ + fsnotify_remove_notify_event(group); + + return event; } /* @@ -460,51 +176,90 @@ static struct inotify_kernel_event *get_one_event(struct inotify_device *dev, * We already checked that the event size is smaller than the * buffer we had in "get_one_event()" above. */ -static ssize_t copy_event_to_user(struct inotify_kernel_event *kevent, +static ssize_t copy_event_to_user(struct fsnotify_group *group, + struct fsnotify_event *event, char __user *buf) { + struct inotify_event inotify_event; + struct fsnotify_event_private_data *fsn_priv; + struct inotify_event_private_data *priv; size_t event_size = sizeof(struct inotify_event); + size_t name_len; + + /* we get the inotify watch descriptor from the event private data */ + spin_lock(&event->lock); + fsn_priv = fsnotify_remove_priv_from_event(group, event); + spin_unlock(&event->lock); + + if (!fsn_priv) + inotify_event.wd = -1; + else { + priv = container_of(fsn_priv, struct inotify_event_private_data, + fsnotify_event_priv_data); + inotify_event.wd = priv->wd; + inotify_free_event_priv(fsn_priv); + } + + /* round up event->name_len so it is a multiple of event_size */ + name_len = roundup(event->name_len, event_size); + inotify_event.len = name_len; + + inotify_event.mask = inotify_mask_to_arg(event->mask); + inotify_event.cookie = event->sync_cookie; - if (copy_to_user(buf, &kevent->event, event_size)) + /* send the main event */ + if (copy_to_user(buf, &inotify_event, event_size)) return -EFAULT; - if (kevent->name) { - buf += event_size; + buf += event_size; - if (copy_to_user(buf, kevent->name, kevent->event.len)) + /* + * fsnotify only stores the pathname, so here we have to send the pathname + * and then pad that pathname out to a multiple of sizeof(inotify_event) + * with zeros. I get my zeros from the nul_inotify_event. + */ + if (name_len) { + unsigned int len_to_zero = name_len - event->name_len; + /* copy the path name */ + if (copy_to_user(buf, event->file_name, event->name_len)) return -EFAULT; + buf += event->name_len; - event_size += kevent->event.len; + /* fill userspace with 0's from nul_inotify_event */ + if (copy_to_user(buf, &nul_inotify_event, len_to_zero)) + return -EFAULT; + buf += len_to_zero; + event_size += name_len; } + return event_size; } static ssize_t inotify_read(struct file *file, char __user *buf, size_t count, loff_t *pos) { - struct inotify_device *dev; + struct fsnotify_group *group; + struct fsnotify_event *kevent; char __user *start; int ret; DEFINE_WAIT(wait); start = buf; - dev = file->private_data; + group = file->private_data; while (1) { - struct inotify_kernel_event *kevent; + prepare_to_wait(&group->notification_waitq, &wait, TASK_INTERRUPTIBLE); - prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE); - - mutex_lock(&dev->ev_mutex); - kevent = get_one_event(dev, count); - mutex_unlock(&dev->ev_mutex); + mutex_lock(&group->notification_mutex); + kevent = get_one_event(group, count); + mutex_unlock(&group->notification_mutex); if (kevent) { ret = PTR_ERR(kevent); if (IS_ERR(kevent)) break; - ret = copy_event_to_user(kevent, buf); - free_kevent(kevent); + ret = copy_event_to_user(group, kevent, buf); + fsnotify_put_event(kevent); if (ret < 0) break; buf += ret; @@ -525,7 +280,7 @@ static ssize_t inotify_read(struct file *file, char __user *buf, schedule(); } - finish_wait(&dev->wq, &wait); + finish_wait(&group->notification_waitq, &wait); if (start != buf && ret != -EFAULT) ret = buf - start; return ret; @@ -533,25 +288,19 @@ static ssize_t inotify_read(struct file *file, char __user *buf, static int inotify_fasync(int fd, struct file *file, int on) { - struct inotify_device *dev = file->private_data; + struct fsnotify_group *group = file->private_data; - return fasync_helper(fd, file, on, &dev->fa) >= 0 ? 0 : -EIO; + return fasync_helper(fd, file, on, &group->inotify_data.fa) >= 0 ? 0 : -EIO; } static int inotify_release(struct inode *ignored, struct file *file) { - struct inotify_device *dev = file->private_data; - - inotify_destroy(dev->ih); + struct fsnotify_group *group = file->private_data; - /* destroy all of the events on this device */ - mutex_lock(&dev->ev_mutex); - while (!list_empty(&dev->events)) - inotify_dev_event_dequeue(dev); - mutex_unlock(&dev->ev_mutex); + fsnotify_clear_marks_by_group(group); - /* free this device: the put matching the get in inotify_init() */ - put_inotify_dev(dev); + /* free this group, matching get was inotify_init->fsnotify_obtain_group */ + fsnotify_put_group(group); return 0; } @@ -559,16 +308,27 @@ static int inotify_release(struct inode *ignored, struct file *file) static long inotify_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct inotify_device *dev; + struct fsnotify_group *group; + struct fsnotify_event_holder *holder; + struct fsnotify_event *event; void __user *p; int ret = -ENOTTY; + size_t send_len = 0; - dev = file->private_data; + group = file->private_data; p = (void __user *) arg; switch (cmd) { case FIONREAD: - ret = put_user(dev->queue_size, (int __user *) p); + mutex_lock(&group->notification_mutex); + list_for_each_entry(holder, &group->notification_list, event_list) { + event = holder->event; + send_len += sizeof(struct inotify_event); + send_len += roundup(event->name_len, + sizeof(struct inotify_event)); + } + mutex_unlock(&group->notification_mutex); + ret = put_user(send_len, (int __user *) p); break; } @@ -576,23 +336,233 @@ static long inotify_ioctl(struct file *file, unsigned int cmd, } static const struct file_operations inotify_fops = { - .poll = inotify_poll, - .read = inotify_read, - .fasync = inotify_fasync, - .release = inotify_release, - .unlocked_ioctl = inotify_ioctl, + .poll = inotify_poll, + .read = inotify_read, + .fasync = inotify_fasync, + .release = inotify_release, + .unlocked_ioctl = inotify_ioctl, .compat_ioctl = inotify_ioctl, }; -static const struct inotify_operations inotify_user_ops = { - .handle_event = inotify_dev_queue_event, - .destroy_watch = free_inotify_user_watch, -}; +/* + * find_inode - resolve a user-given path to a specific inode + */ +static int inotify_find_inode(const char __user *dirname, struct path *path, unsigned flags) +{ + int error; + + error = user_path_at(AT_FDCWD, dirname, flags, path); + if (error) + return error; + /* you can only watch an inode if you have read permissions on it */ + error = inode_permission(path->dentry->d_inode, MAY_READ); + if (error) + path_put(path); + return error; +} + +/* + * When, for whatever reason, inotify is done with a mark (or what used to be a + * watch) we need to remove that watch from the idr and we need to send IN_IGNORED + * for the given wd. + * + * There is a bit of recursion here. The loop looks like: + * inotify_destroy_mark_entry -> fsnotify_destroy_mark_by_entry -> + * inotify_freeing_mark -> inotify_destory_mark_entry -> restart + * But the loop is broken in 2 places. fsnotify_destroy_mark_by_entry sets + * entry->group = NULL before the call to inotify_freeing_mark, so the if (egroup) + * test below will not call back to fsnotify again. But even if that test wasn't + * there this would still be safe since fsnotify_destroy_mark_by_entry() is + * safe from recursion. + */ +void inotify_destroy_mark_entry(struct fsnotify_mark_entry *entry, struct fsnotify_group *group) +{ + struct inotify_inode_mark_entry *ientry; + struct inotify_event_private_data *event_priv; + struct fsnotify_event_private_data *fsn_event_priv; + struct fsnotify_group *egroup; + struct idr *idr; + + spin_lock(&entry->lock); + egroup = entry->group; + + /* if egroup we aren't really done and something might still send events + * for this inode, on the callback we'll send the IN_IGNORED */ + if (egroup) { + spin_unlock(&entry->lock); + fsnotify_destroy_mark_by_entry(entry); + return; + } + spin_unlock(&entry->lock); + + ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); + + event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL); + if (unlikely(!event_priv)) + goto skip_send_ignore; + + fsn_event_priv = &event_priv->fsnotify_event_priv_data; + + fsn_event_priv->group = group; + event_priv->wd = ientry->wd; + + fsnotify_add_notify_event(group, inotify_ignored_event, fsn_event_priv); + + /* did the private data get added? */ + if (list_empty(&fsn_event_priv->event_list)) + inotify_free_event_priv(fsn_event_priv); + +skip_send_ignore: + + /* remove this entry from the idr */ + spin_lock(&group->inotify_data.idr_lock); + idr = &group->inotify_data.idr; + idr_remove(idr, ientry->wd); + spin_unlock(&group->inotify_data.idr_lock); + + /* removed from idr, drop that reference */ + fsnotify_put_mark(entry); +} + +/* ding dong the mark is dead */ +static void inotify_free_mark(struct fsnotify_mark_entry *entry) +{ + struct inotify_inode_mark_entry *ientry = (struct inotify_inode_mark_entry *)entry; + + kmem_cache_free(inotify_inode_mark_cachep, ientry); +} + +static int inotify_update_watch(struct fsnotify_group *group, struct inode *inode, u32 arg) +{ + struct fsnotify_mark_entry *entry = NULL; + struct inotify_inode_mark_entry *ientry; + int ret = 0; + int add = (arg & IN_MASK_ADD); + __u32 mask; + __u32 old_mask, new_mask; + + /* don't allow invalid bits: we don't want flags set */ + mask = inotify_arg_to_mask(arg); + if (unlikely(!mask)) + return -EINVAL; + + ientry = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); + if (unlikely(!ientry)) + return -ENOMEM; + /* we set the mask at the end after attaching it */ + fsnotify_init_mark(&ientry->fsn_entry, inotify_free_mark); + ientry->wd = 0; + +find_entry: + spin_lock(&inode->i_lock); + entry = fsnotify_find_mark_entry(group, inode); + spin_unlock(&inode->i_lock); + if (entry) { + kmem_cache_free(inotify_inode_mark_cachep, ientry); + ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); + } else { + if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches) { + ret = -ENOSPC; + goto out_err; + } + + ret = fsnotify_add_mark(&ientry->fsn_entry, group, inode); + if (ret == -EEXIST) + goto find_entry; + else if (ret) + goto out_err; + + entry = &ientry->fsn_entry; +retry: + ret = -ENOMEM; + if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL))) + goto out_err; + + spin_lock(&group->inotify_data.idr_lock); + /* if entry is added to the idr we keep the reference obtained + * through fsnotify_mark_add. remember to drop this reference + * when entry is removed from idr */ + ret = idr_get_new_above(&group->inotify_data.idr, entry, + ++group->inotify_data.last_wd, + &ientry->wd); + spin_unlock(&group->inotify_data.idr_lock); + if (ret) { + if (ret == -EAGAIN) + goto retry; + goto out_err; + } + atomic_inc(&group->inotify_data.user->inotify_watches); + } + + spin_lock(&entry->lock); + + old_mask = entry->mask; + if (add) { + entry->mask |= mask; + new_mask = entry->mask; + } else { + entry->mask = mask; + new_mask = entry->mask; + } + + spin_unlock(&entry->lock); + + if (old_mask != new_mask) { + /* more bits in old than in new? */ + int dropped = (old_mask & ~new_mask); + /* more bits in this entry than the inode's mask? */ + int do_inode = (new_mask & ~inode->i_fsnotify_mask); + /* more bits in this entry than the group? */ + int do_group = (new_mask & ~group->mask); + + /* update the inode with this new entry */ + if (dropped || do_inode) + fsnotify_recalc_inode_mask(inode); + + /* update the group mask with the new mask */ + if (dropped || do_group) + fsnotify_recalc_group_mask(group); + } + + return ientry->wd; + +out_err: + /* see this isn't supposed to happen, just kill the watch */ + if (entry) { + fsnotify_destroy_mark_by_entry(entry); + fsnotify_put_mark(entry); + } + return ret; +} + +static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsigned int max_events) +{ + struct fsnotify_group *group; + unsigned int grp_num; + + /* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */ + grp_num = (INOTIFY_GROUP_NUM - atomic_inc_return(&inotify_grp_num)); + group = fsnotify_obtain_group(grp_num, 0, &inotify_fsnotify_ops); + if (IS_ERR(group)) + return group; + + group->max_events = max_events; + + spin_lock_init(&group->inotify_data.idr_lock); + idr_init(&group->inotify_data.idr); + group->inotify_data.last_wd = 0; + group->inotify_data.user = user; + group->inotify_data.fa = NULL; + + return group; +} + + +/* inotify syscalls */ SYSCALL_DEFINE1(inotify_init1, int, flags) { - struct inotify_device *dev; - struct inotify_handle *ih; + struct fsnotify_group *group; struct user_struct *user; struct file *filp; int fd, ret; @@ -621,45 +591,27 @@ SYSCALL_DEFINE1(inotify_init1, int, flags) goto out_free_uid; } - dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL); - if (unlikely(!dev)) { - ret = -ENOMEM; + /* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */ + group = inotify_new_group(user, inotify_max_queued_events); + if (IS_ERR(group)) { + ret = PTR_ERR(group); goto out_free_uid; } - ih = inotify_init(&inotify_user_ops); - if (IS_ERR(ih)) { - ret = PTR_ERR(ih); - goto out_free_dev; - } - dev->ih = ih; - dev->fa = NULL; - filp->f_op = &inotify_fops; filp->f_path.mnt = mntget(inotify_mnt); filp->f_path.dentry = dget(inotify_mnt->mnt_root); filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping; filp->f_mode = FMODE_READ; filp->f_flags = O_RDONLY | (flags & O_NONBLOCK); - filp->private_data = dev; - - INIT_LIST_HEAD(&dev->events); - init_waitqueue_head(&dev->wq); - mutex_init(&dev->ev_mutex); - mutex_init(&dev->up_mutex); - dev->event_count = 0; - dev->queue_size = 0; - dev->max_events = inotify_max_queued_events; - dev->user = user; - atomic_set(&dev->count, 0); - - get_inotify_dev(dev); + filp->private_data = group; + atomic_inc(&user->inotify_devs); + fd_install(fd, filp); return fd; -out_free_dev: - kfree(dev); + out_free_uid: free_uid(user); put_filp(filp); @@ -676,8 +628,8 @@ SYSCALL_DEFINE0(inotify_init) SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, u32, mask) { + struct fsnotify_group *group; struct inode *inode; - struct inotify_device *dev; struct path path; struct file *filp; int ret, fput_needed; @@ -698,20 +650,20 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, if (mask & IN_ONLYDIR) flags |= LOOKUP_DIRECTORY; - ret = find_inode(pathname, &path, flags); - if (unlikely(ret)) + ret = inotify_find_inode(pathname, &path, flags); + if (ret) goto fput_and_out; - /* inode held in place by reference to path; dev by fget on fd */ + /* inode held in place by reference to path; group by fget on fd */ inode = path.dentry->d_inode; - dev = filp->private_data; + group = filp->private_data; - mutex_lock(&dev->up_mutex); - ret = inotify_find_update_watch(dev->ih, inode, mask); - if (ret == -ENOENT) - ret = create_watch(dev, inode, mask); - mutex_unlock(&dev->up_mutex); + /* create/update an inode mark */ + ret = inotify_update_watch(group, inode, mask); + if (unlikely(ret)) + goto path_put_and_out; +path_put_and_out: path_put(&path); fput_and_out: fput_light(filp, fput_needed); @@ -720,9 +672,10 @@ fput_and_out: SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd) { + struct fsnotify_group *group; + struct fsnotify_mark_entry *entry; struct file *filp; - struct inotify_device *dev; - int ret, fput_needed; + int ret = 0, fput_needed; filp = fget_light(fd, &fput_needed); if (unlikely(!filp)) @@ -734,10 +687,20 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd) goto out; } - dev = filp->private_data; + group = filp->private_data; - /* we free our watch data when we get IN_IGNORED */ - ret = inotify_rm_wd(dev->ih, wd); + spin_lock(&group->inotify_data.idr_lock); + entry = idr_find(&group->inotify_data.idr, wd); + if (unlikely(!entry)) { + spin_unlock(&group->inotify_data.idr_lock); + ret = -EINVAL; + goto out; + } + fsnotify_get_mark(entry); + spin_unlock(&group->inotify_data.idr_lock); + + inotify_destroy_mark_entry(entry, group); + fsnotify_put_mark(entry); out: fput_light(filp, fput_needed); @@ -753,9 +716,9 @@ inotify_get_sb(struct file_system_type *fs_type, int flags, } static struct file_system_type inotify_fs_type = { - .name = "inotifyfs", - .get_sb = inotify_get_sb, - .kill_sb = kill_anon_super, + .name = "inotifyfs", + .get_sb = inotify_get_sb, + .kill_sb = kill_anon_super, }; /* @@ -775,18 +738,16 @@ static int __init inotify_user_setup(void) if (IS_ERR(inotify_mnt)) panic("inotify: kern_mount ret %ld!\n", PTR_ERR(inotify_mnt)); + inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark_entry, SLAB_PANIC); + event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC); + inotify_ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL, FSNOTIFY_EVENT_NONE, NULL, 0); + if (!inotify_ignored_event) + panic("unable to allocate the inotify ignored event\n"); + inotify_max_queued_events = 16384; inotify_max_user_instances = 128; inotify_max_user_watches = 8192; - watch_cachep = kmem_cache_create("inotify_watch_cache", - sizeof(struct inotify_user_watch), - 0, SLAB_PANIC, NULL); - event_cachep = kmem_cache_create("inotify_event_cache", - sizeof(struct inotify_kernel_event), - 0, SLAB_PANIC, NULL); - return 0; } - module_init(inotify_user_setup); diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index d2c0ee30e61..44848aa830d 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -9,6 +9,7 @@ #ifdef __KERNEL__ +#include /* inotify uses this */ #include /* struct inode */ #include #include /* struct path */ @@ -59,6 +60,7 @@ /* listeners that hard code group numbers near the top */ #define DNOTIFY_GROUP_NUM UINT_MAX +#define INOTIFY_GROUP_NUM (DNOTIFY_GROUP_NUM-1) struct fsnotify_group; struct fsnotify_event; @@ -141,6 +143,15 @@ struct fsnotify_group { /* groups can define private fields here or use the void *private */ union { void *private; +#ifdef CONFIG_INOTIFY_USER + struct inotify_group_private_data { + spinlock_t idr_lock; + struct idr idr; + u32 last_wd; + struct fasync_struct *fa; /* async notification */ + struct user_struct *user; + } inotify_data; +#endif }; }; diff --git a/init/Kconfig b/init/Kconfig index d4e9671347e..5de1c17c51e 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -302,7 +302,8 @@ config AUDITSYSCALL config AUDIT_TREE def_bool y - depends on AUDITSYSCALL && INOTIFY + depends on AUDITSYSCALL + select INOTIFY menu "RCU Subsystem" -- cgit v1.2.3-70-g09d2 From ff52cc2158b32b3b979ca7802b1fd7c70f36e13c Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 11 Jun 2009 11:09:47 -0400 Subject: fsnotify: move events should indicate the event was on a child fsnotify tells its listeners explicitly when an event happened on the given inode verses on the child of the given inode. (see __fsnotify_parent) However, the semantics of fsnotify_move() are such that we deliver events directly to the two parent directories in question (old_dir and new_dir) directly without using the __fsnotify_parent() call. fsnotify should be adding FS_EVENT_ON_CHILD for the notifications to these parents. Signed-off-by: Eric Paris --- include/linux/fsnotify.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index c25b39ddd62..936f9aa8bb9 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -71,12 +71,11 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, struct inode *source = moved->d_inode; u32 in_cookie = inotify_get_cookie(); u32 fs_cookie = fsnotify_get_cookie(); - __u32 old_dir_mask = 0; - __u32 new_dir_mask = 0; + __u32 old_dir_mask = (FS_EVENT_ON_CHILD | FS_MOVED_FROM); + __u32 new_dir_mask = (FS_EVENT_ON_CHILD | FS_MOVED_TO); - if (old_dir == new_dir) { - old_dir_mask = FS_DN_RENAME; - } + if (old_dir == new_dir) + old_dir_mask |= FS_DN_RENAME; if (isdir) { isdir = IN_ISDIR; @@ -84,9 +83,6 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, new_dir_mask |= FS_IN_ISDIR; } - old_dir_mask |= FS_MOVED_FROM; - new_dir_mask |= FS_MOVED_TO; - inotify_inode_queue_event(old_dir, IN_MOVED_FROM|isdir, in_cookie, old_name, source); inotify_inode_queue_event(new_dir, IN_MOVED_TO|isdir, in_cookie, new_name, -- cgit v1.2.3-70-g09d2 From 63b852a6b67d0820d388b0ecd0da83ccb4048b8d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2009 22:56:24 +0000 Subject: asm-generic: rename termios.h, signal.h and mman.h The existing asm-generic versions are incomplete and included by some architectures. New architectures should be able to use a generic version, so rename the existing files and change all users, which lets us add the new files. Signed-off-by: Remis Lima Baima Signed-off-by: Arnd Bergmann --- arch/alpha/include/asm/signal.h | 2 +- arch/arm/include/asm/mman.h | 2 +- arch/arm/include/asm/signal.h | 2 +- arch/avr32/include/asm/mman.h | 2 +- arch/avr32/include/asm/signal.h | 2 +- arch/avr32/include/asm/termios.h | 2 +- arch/blackfin/include/asm/signal.h | 2 +- arch/cris/include/asm/mman.h | 2 +- arch/cris/include/asm/signal.h | 2 +- arch/frv/include/asm/mman.h | 2 +- arch/frv/include/asm/termios.h | 2 +- arch/h8300/include/asm/mman.h | 2 +- arch/h8300/include/asm/signal.h | 2 +- arch/ia64/include/asm/mman.h | 2 +- arch/ia64/include/asm/signal.h | 2 +- arch/m32r/include/asm/mman.h | 2 +- arch/m32r/include/asm/signal.h | 2 +- arch/m68k/include/asm/mman.h | 2 +- arch/m68k/include/asm/signal.h | 2 +- arch/microblaze/include/asm/signal.h | 2 +- arch/microblaze/include/asm/termios.h | 2 +- arch/mips/include/asm/signal.h | 2 +- arch/mn10300/include/asm/mman.h | 2 +- arch/mn10300/include/asm/signal.h | 2 +- arch/powerpc/include/asm/mman.h | 2 +- arch/powerpc/include/asm/signal.h | 2 +- arch/powerpc/include/asm/termios.h | 2 +- arch/s390/include/asm/mman.h | 2 +- arch/s390/include/asm/signal.h | 2 +- arch/s390/include/asm/termios.h | 2 +- arch/sh/include/asm/mman.h | 2 +- arch/sh/include/asm/signal.h | 2 +- arch/sparc/include/asm/mman.h | 2 +- arch/sparc/include/asm/signal.h | 2 +- arch/x86/include/asm/mman.h | 2 +- arch/x86/include/asm/signal.h | 2 +- include/asm-generic/Kbuild | 4 +- include/asm-generic/mman-common.h | 41 +++++++++++++++++++ include/asm-generic/mman.h | 41 ------------------- include/asm-generic/signal-defs.h | 28 +++++++++++++ include/asm-generic/signal.h | 28 ------------- include/asm-generic/termios-base.h | 77 +++++++++++++++++++++++++++++++++++ include/asm-generic/termios.h | 77 ----------------------------------- 43 files changed, 184 insertions(+), 184 deletions(-) create mode 100644 include/asm-generic/mman-common.h delete mode 100644 include/asm-generic/mman.h create mode 100644 include/asm-generic/signal-defs.h delete mode 100644 include/asm-generic/signal.h create mode 100644 include/asm-generic/termios-base.h delete mode 100644 include/asm-generic/termios.h (limited to 'include') diff --git a/arch/alpha/include/asm/signal.h b/arch/alpha/include/asm/signal.h index 13c2305d35e..a9388300abb 100644 --- a/arch/alpha/include/asm/signal.h +++ b/arch/alpha/include/asm/signal.h @@ -111,7 +111,7 @@ typedef unsigned long sigset_t; #define SIG_UNBLOCK 2 /* for unblocking signals */ #define SIG_SETMASK 3 /* for setting the signal mask */ -#include +#include #ifdef __KERNEL__ struct osf_sigaction { diff --git a/arch/arm/include/asm/mman.h b/arch/arm/include/asm/mman.h index 54570d2e95b..fc26976d8e3 100644 --- a/arch/arm/include/asm/mman.h +++ b/arch/arm/include/asm/mman.h @@ -1,7 +1,7 @@ #ifndef __ARM_MMAN_H__ #define __ARM_MMAN_H__ -#include +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ diff --git a/arch/arm/include/asm/signal.h b/arch/arm/include/asm/signal.h index d0fb487aba4..43ba0fb1c8a 100644 --- a/arch/arm/include/asm/signal.h +++ b/arch/arm/include/asm/signal.h @@ -111,7 +111,7 @@ typedef unsigned long sigset_t; #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 -#include +#include #ifdef __KERNEL__ struct old_sigaction { diff --git a/arch/avr32/include/asm/mman.h b/arch/avr32/include/asm/mman.h index 648f91e7187..9a92b15f6a6 100644 --- a/arch/avr32/include/asm/mman.h +++ b/arch/avr32/include/asm/mman.h @@ -1,7 +1,7 @@ #ifndef __ASM_AVR32_MMAN_H__ #define __ASM_AVR32_MMAN_H__ -#include +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ diff --git a/arch/avr32/include/asm/signal.h b/arch/avr32/include/asm/signal.h index caffefeeba1..8790dfc10d5 100644 --- a/arch/avr32/include/asm/signal.h +++ b/arch/avr32/include/asm/signal.h @@ -112,7 +112,7 @@ typedef unsigned long sigset_t; #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 -#include +#include #ifdef __KERNEL__ struct old_sigaction { diff --git a/arch/avr32/include/asm/termios.h b/arch/avr32/include/asm/termios.h index 0152aba3515..dd7e9da2548 100644 --- a/arch/avr32/include/asm/termios.h +++ b/arch/avr32/include/asm/termios.h @@ -55,7 +55,7 @@ struct termio { */ #define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0" -#include +#include #endif /* __KERNEL__ */ diff --git a/arch/blackfin/include/asm/signal.h b/arch/blackfin/include/asm/signal.h index 87951d25145..2eea9079445 100644 --- a/arch/blackfin/include/asm/signal.h +++ b/arch/blackfin/include/asm/signal.h @@ -104,7 +104,7 @@ typedef unsigned long sigset_t; #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 -#include +#include #ifdef __KERNEL__ struct old_sigaction { diff --git a/arch/cris/include/asm/mman.h b/arch/cris/include/asm/mman.h index 1c35e1b66b4..b7f0afba3ce 100644 --- a/arch/cris/include/asm/mman.h +++ b/arch/cris/include/asm/mman.h @@ -3,7 +3,7 @@ /* verbatim copy of asm-i386/ version */ -#include +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ diff --git a/arch/cris/include/asm/signal.h b/arch/cris/include/asm/signal.h index 349ae682b56..ea6af9aad76 100644 --- a/arch/cris/include/asm/signal.h +++ b/arch/cris/include/asm/signal.h @@ -106,7 +106,7 @@ typedef unsigned long sigset_t; #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 -#include +#include #ifdef __KERNEL__ struct old_sigaction { diff --git a/arch/frv/include/asm/mman.h b/arch/frv/include/asm/mman.h index b4371e92868..58c1d11e2ac 100644 --- a/arch/frv/include/asm/mman.h +++ b/arch/frv/include/asm/mman.h @@ -1,7 +1,7 @@ #ifndef __ASM_MMAN_H__ #define __ASM_MMAN_H__ -#include +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ diff --git a/arch/frv/include/asm/termios.h b/arch/frv/include/asm/termios.h index a62fb587237..b4868aafe79 100644 --- a/arch/frv/include/asm/termios.h +++ b/arch/frv/include/asm/termios.h @@ -52,7 +52,7 @@ struct termio { /* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ #ifdef __KERNEL__ -#include +#include #endif #endif /* _ASM_TERMIOS_H */ diff --git a/arch/h8300/include/asm/mman.h b/arch/h8300/include/asm/mman.h index b9f104f22a3..cf35f0a6f12 100644 --- a/arch/h8300/include/asm/mman.h +++ b/arch/h8300/include/asm/mman.h @@ -1,7 +1,7 @@ #ifndef __H8300_MMAN_H__ #define __H8300_MMAN_H__ -#include +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ diff --git a/arch/h8300/include/asm/signal.h b/arch/h8300/include/asm/signal.h index 7bc15048a64..fd8b66e40dc 100644 --- a/arch/h8300/include/asm/signal.h +++ b/arch/h8300/include/asm/signal.h @@ -105,7 +105,7 @@ typedef unsigned long sigset_t; #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 -#include +#include #ifdef __KERNEL__ struct old_sigaction { diff --git a/arch/ia64/include/asm/mman.h b/arch/ia64/include/asm/mman.h index c73b87832a1..48cf8b98a0b 100644 --- a/arch/ia64/include/asm/mman.h +++ b/arch/ia64/include/asm/mman.h @@ -8,7 +8,7 @@ * David Mosberger-Tang , Hewlett-Packard Co */ -#include +#include #define MAP_GROWSDOWN 0x00100 /* stack-like segment */ #define MAP_GROWSUP 0x00200 /* register stack-like segment */ diff --git a/arch/ia64/include/asm/signal.h b/arch/ia64/include/asm/signal.h index 4f5ca5643cb..b166248d49a 100644 --- a/arch/ia64/include/asm/signal.h +++ b/arch/ia64/include/asm/signal.h @@ -114,7 +114,7 @@ #endif /* __KERNEL__ */ -#include +#include # ifndef __ASSEMBLY__ diff --git a/arch/m32r/include/asm/mman.h b/arch/m32r/include/asm/mman.h index 516a8973b13..04a5f40aa40 100644 --- a/arch/m32r/include/asm/mman.h +++ b/arch/m32r/include/asm/mman.h @@ -1,7 +1,7 @@ #ifndef __M32R_MMAN_H__ #define __M32R_MMAN_H__ -#include +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ diff --git a/arch/m32r/include/asm/signal.h b/arch/m32r/include/asm/signal.h index 1a607066bc6..9c1acb2b1a9 100644 --- a/arch/m32r/include/asm/signal.h +++ b/arch/m32r/include/asm/signal.h @@ -107,7 +107,7 @@ typedef unsigned long sigset_t; #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 -#include +#include #ifdef __KERNEL__ struct old_sigaction { diff --git a/arch/m68k/include/asm/mman.h b/arch/m68k/include/asm/mman.h index 1626d37f489..9f5c4c4b3c7 100644 --- a/arch/m68k/include/asm/mman.h +++ b/arch/m68k/include/asm/mman.h @@ -1,7 +1,7 @@ #ifndef __M68K_MMAN_H__ #define __M68K_MMAN_H__ -#include +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ diff --git a/arch/m68k/include/asm/signal.h b/arch/m68k/include/asm/signal.h index 08788fdefde..5bc09c787a1 100644 --- a/arch/m68k/include/asm/signal.h +++ b/arch/m68k/include/asm/signal.h @@ -103,7 +103,7 @@ typedef unsigned long sigset_t; #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 -#include +#include #ifdef __KERNEL__ struct old_sigaction { diff --git a/arch/microblaze/include/asm/signal.h b/arch/microblaze/include/asm/signal.h index 9676fad3486..46bc2267d94 100644 --- a/arch/microblaze/include/asm/signal.h +++ b/arch/microblaze/include/asm/signal.h @@ -90,7 +90,7 @@ # ifndef __ASSEMBLY__ # include -# include +# include /* Avoid too many header ordering problems. */ struct siginfo; diff --git a/arch/microblaze/include/asm/termios.h b/arch/microblaze/include/asm/termios.h index 102d7725866..47a46d1fbe2 100644 --- a/arch/microblaze/include/asm/termios.h +++ b/arch/microblaze/include/asm/termios.h @@ -81,7 +81,7 @@ struct termio { #ifdef __KERNEL__ -#include +#include #endif /* __KERNEL__ */ diff --git a/arch/mips/include/asm/signal.h b/arch/mips/include/asm/signal.h index bee5153aca4..c783f364938 100644 --- a/arch/mips/include/asm/signal.h +++ b/arch/mips/include/asm/signal.h @@ -109,7 +109,7 @@ typedef unsigned long old_sigset_t; /* at least 32 bits */ #define SIG_UNBLOCK 2 /* for unblocking signals */ #define SIG_SETMASK 3 /* for setting the signal mask */ -#include +#include struct sigaction { unsigned int sa_flags; diff --git a/arch/mn10300/include/asm/mman.h b/arch/mn10300/include/asm/mman.h index b7986b65add..d04fac1da5a 100644 --- a/arch/mn10300/include/asm/mman.h +++ b/arch/mn10300/include/asm/mman.h @@ -12,7 +12,7 @@ #ifndef _ASM_MMAN_H #define _ASM_MMAN_H -#include +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ diff --git a/arch/mn10300/include/asm/signal.h b/arch/mn10300/include/asm/signal.h index e98817cec5f..7e891fce237 100644 --- a/arch/mn10300/include/asm/signal.h +++ b/arch/mn10300/include/asm/signal.h @@ -115,7 +115,7 @@ typedef unsigned long sigset_t; #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 -#include +#include #ifdef __KERNEL__ struct old_sigaction { diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h index e7b99bac9f4..7b1c49811a2 100644 --- a/arch/powerpc/include/asm/mman.h +++ b/arch/powerpc/include/asm/mman.h @@ -1,7 +1,7 @@ #ifndef _ASM_POWERPC_MMAN_H #define _ASM_POWERPC_MMAN_H -#include +#include /* * This program is free software; you can redistribute it and/or diff --git a/arch/powerpc/include/asm/signal.h b/arch/powerpc/include/asm/signal.h index 69f709d8e8e..3eb13be11d8 100644 --- a/arch/powerpc/include/asm/signal.h +++ b/arch/powerpc/include/asm/signal.h @@ -94,7 +94,7 @@ typedef struct { #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 -#include +#include struct old_sigaction { __sighandler_t sa_handler; diff --git a/arch/powerpc/include/asm/termios.h b/arch/powerpc/include/asm/termios.h index 2c14fea07c8..a24f48704a3 100644 --- a/arch/powerpc/include/asm/termios.h +++ b/arch/powerpc/include/asm/termios.h @@ -78,7 +78,7 @@ struct termio { #ifdef __KERNEL__ -#include +#include #endif /* __KERNEL__ */ diff --git a/arch/s390/include/asm/mman.h b/arch/s390/include/asm/mman.h index da01432e8f4..f63fe7b431e 100644 --- a/arch/s390/include/asm/mman.h +++ b/arch/s390/include/asm/mman.h @@ -9,7 +9,7 @@ #ifndef __S390_MMAN_H__ #define __S390_MMAN_H__ -#include +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ diff --git a/arch/s390/include/asm/signal.h b/arch/s390/include/asm/signal.h index f6cfddb278c..cdf5cb2fe03 100644 --- a/arch/s390/include/asm/signal.h +++ b/arch/s390/include/asm/signal.h @@ -115,7 +115,7 @@ typedef unsigned long sigset_t; #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 -#include +#include #ifdef __KERNEL__ struct old_sigaction { diff --git a/arch/s390/include/asm/termios.h b/arch/s390/include/asm/termios.h index 67f66278f53..bc3a35cefc9 100644 --- a/arch/s390/include/asm/termios.h +++ b/arch/s390/include/asm/termios.h @@ -60,7 +60,7 @@ struct termio { #define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios2)) #define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios2)) -#include +#include #endif /* __KERNEL__ */ diff --git a/arch/sh/include/asm/mman.h b/arch/sh/include/asm/mman.h index 156eb0225cf..7d8b72c91a5 100644 --- a/arch/sh/include/asm/mman.h +++ b/arch/sh/include/asm/mman.h @@ -1,7 +1,7 @@ #ifndef __ASM_SH_MMAN_H #define __ASM_SH_MMAN_H -#include +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ diff --git a/arch/sh/include/asm/signal.h b/arch/sh/include/asm/signal.h index 5c5c1e85208..9cc5f014468 100644 --- a/arch/sh/include/asm/signal.h +++ b/arch/sh/include/asm/signal.h @@ -106,7 +106,7 @@ typedef unsigned long sigset_t; #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 -#include +#include #ifdef __KERNEL__ struct old_sigaction { diff --git a/arch/sparc/include/asm/mman.h b/arch/sparc/include/asm/mman.h index fdfbbf0a473..988192e8e95 100644 --- a/arch/sparc/include/asm/mman.h +++ b/arch/sparc/include/asm/mman.h @@ -1,7 +1,7 @@ #ifndef __SPARC_MMAN_H__ #define __SPARC_MMAN_H__ -#include +#include /* SunOS'ified... */ diff --git a/arch/sparc/include/asm/signal.h b/arch/sparc/include/asm/signal.h index cba45206b7f..e49b828a247 100644 --- a/arch/sparc/include/asm/signal.h +++ b/arch/sparc/include/asm/signal.h @@ -176,7 +176,7 @@ struct sigstack { #define SA_STATIC_ALLOC 0x8000 #endif -#include +#include struct __new_sigaction { __sighandler_t sa_handler; diff --git a/arch/x86/include/asm/mman.h b/arch/x86/include/asm/mman.h index 90bc4108a4f..751af2550ed 100644 --- a/arch/x86/include/asm/mman.h +++ b/arch/x86/include/asm/mman.h @@ -1,7 +1,7 @@ #ifndef _ASM_X86_MMAN_H #define _ASM_X86_MMAN_H -#include +#include #define MAP_32BIT 0x40 /* only give out 32bit addresses */ diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index 7761a5d554b..598457cbd0f 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -117,7 +117,7 @@ typedef unsigned long sigset_t; #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 -#include +#include #ifndef __ASSEMBLY__ diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild index 4c9932a2503..460b08d51e2 100644 --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -2,9 +2,9 @@ header-y += errno-base.h header-y += errno.h header-y += fcntl.h header-y += ioctl.h -header-y += mman.h +header-y += mman-common.h header-y += poll.h -header-y += signal.h +header-y += signal-defs.h header-y += statfs.h unifdef-y += int-l64.h diff --git a/include/asm-generic/mman-common.h b/include/asm-generic/mman-common.h new file mode 100644 index 00000000000..3b69ad34189 --- /dev/null +++ b/include/asm-generic/mman-common.h @@ -0,0 +1,41 @@ +#ifndef __ASM_GENERIC_MMAN_COMMON_H +#define __ASM_GENERIC_MMAN_COMMON_H + +/* + Author: Michael S. Tsirkin , Mellanox Technologies Ltd. + Based on: asm-xxx/mman.h +*/ + +#define PROT_READ 0x1 /* page can be read */ +#define PROT_WRITE 0x2 /* page can be written */ +#define PROT_EXEC 0x4 /* page can be executed */ +#define PROT_SEM 0x8 /* page may be used for atomic ops */ +#define PROT_NONE 0x0 /* page can not be accessed */ +#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ +#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ + +#define MAP_SHARED 0x01 /* Share changes */ +#define MAP_PRIVATE 0x02 /* Changes are private */ +#define MAP_TYPE 0x0f /* Mask for type of mapping */ +#define MAP_FIXED 0x10 /* Interpret addr exactly */ +#define MAP_ANONYMOUS 0x20 /* don't use a file */ + +#define MS_ASYNC 1 /* sync memory asynchronously */ +#define MS_INVALIDATE 2 /* invalidate the caches */ +#define MS_SYNC 4 /* synchronous memory sync */ + +#define MADV_NORMAL 0 /* no further special treatment */ +#define MADV_RANDOM 1 /* expect random page references */ +#define MADV_SEQUENTIAL 2 /* expect sequential page references */ +#define MADV_WILLNEED 3 /* will need these pages */ +#define MADV_DONTNEED 4 /* don't need these pages */ + +/* common parameters: try to keep these consistent across architectures */ +#define MADV_REMOVE 9 /* remove these pages & resources */ +#define MADV_DONTFORK 10 /* don't inherit across fork */ +#define MADV_DOFORK 11 /* do inherit across fork */ + +/* compatibility flags */ +#define MAP_FILE 0 + +#endif /* __ASM_GENERIC_MMAN_COMMON_H */ diff --git a/include/asm-generic/mman.h b/include/asm-generic/mman.h deleted file mode 100644 index 5e3dde2ee5a..00000000000 --- a/include/asm-generic/mman.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef _ASM_GENERIC_MMAN_H -#define _ASM_GENERIC_MMAN_H - -/* - Author: Michael S. Tsirkin , Mellanox Technologies Ltd. - Based on: asm-xxx/mman.h -*/ - -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ - -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - -#define MADV_NORMAL 0 /* no further special treatment */ -#define MADV_RANDOM 1 /* expect random page references */ -#define MADV_SEQUENTIAL 2 /* expect sequential page references */ -#define MADV_WILLNEED 3 /* will need these pages */ -#define MADV_DONTNEED 4 /* don't need these pages */ - -/* common parameters: try to keep these consistent across architectures */ -#define MADV_REMOVE 9 /* remove these pages & resources */ -#define MADV_DONTFORK 10 /* don't inherit across fork */ -#define MADV_DOFORK 11 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_FILE 0 - -#endif diff --git a/include/asm-generic/signal-defs.h b/include/asm-generic/signal-defs.h new file mode 100644 index 00000000000..00f95df5429 --- /dev/null +++ b/include/asm-generic/signal-defs.h @@ -0,0 +1,28 @@ +#ifndef __ASM_GENERIC_SIGNAL_DEFS_H +#define __ASM_GENERIC_SIGNAL_DEFS_H + +#include + +#ifndef SIG_BLOCK +#define SIG_BLOCK 0 /* for blocking signals */ +#endif +#ifndef SIG_UNBLOCK +#define SIG_UNBLOCK 1 /* for unblocking signals */ +#endif +#ifndef SIG_SETMASK +#define SIG_SETMASK 2 /* for setting the signal mask */ +#endif + +#ifndef __ASSEMBLY__ +typedef void __signalfn_t(int); +typedef __signalfn_t __user *__sighandler_t; + +typedef void __restorefn_t(void); +typedef __restorefn_t __user *__sigrestore_t; + +#define SIG_DFL ((__force __sighandler_t)0) /* default signal handling */ +#define SIG_IGN ((__force __sighandler_t)1) /* ignore signal */ +#define SIG_ERR ((__force __sighandler_t)-1) /* error return from signal */ +#endif + +#endif /* __ASM_GENERIC_SIGNAL_DEFS_H */ diff --git a/include/asm-generic/signal.h b/include/asm-generic/signal.h deleted file mode 100644 index dae1d872007..00000000000 --- a/include/asm-generic/signal.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef __ASM_GENERIC_SIGNAL_H -#define __ASM_GENERIC_SIGNAL_H - -#include - -#ifndef SIG_BLOCK -#define SIG_BLOCK 0 /* for blocking signals */ -#endif -#ifndef SIG_UNBLOCK -#define SIG_UNBLOCK 1 /* for unblocking signals */ -#endif -#ifndef SIG_SETMASK -#define SIG_SETMASK 2 /* for setting the signal mask */ -#endif - -#ifndef __ASSEMBLY__ -typedef void __signalfn_t(int); -typedef __signalfn_t __user *__sighandler_t; - -typedef void __restorefn_t(void); -typedef __restorefn_t __user *__sigrestore_t; - -#define SIG_DFL ((__force __sighandler_t)0) /* default signal handling */ -#define SIG_IGN ((__force __sighandler_t)1) /* ignore signal */ -#define SIG_ERR ((__force __sighandler_t)-1) /* error return from signal */ -#endif - -#endif /* __ASM_GENERIC_SIGNAL_H */ diff --git a/include/asm-generic/termios-base.h b/include/asm-generic/termios-base.h new file mode 100644 index 00000000000..0a769feb22b --- /dev/null +++ b/include/asm-generic/termios-base.h @@ -0,0 +1,77 @@ +/* termios.h: generic termios/termio user copying/translation + */ + +#ifndef _ASM_GENERIC_TERMIOS_BASE_H +#define _ASM_GENERIC_TERMIOS_BASE_H + +#include + +#ifndef __ARCH_TERMIO_GETPUT + +/* + * Translate a "termio" structure into a "termios". Ugh. + */ +static inline int user_termio_to_kernel_termios(struct ktermios *termios, + struct termio __user *termio) +{ + unsigned short tmp; + + if (get_user(tmp, &termio->c_iflag) < 0) + goto fault; + termios->c_iflag = (0xffff0000 & termios->c_iflag) | tmp; + + if (get_user(tmp, &termio->c_oflag) < 0) + goto fault; + termios->c_oflag = (0xffff0000 & termios->c_oflag) | tmp; + + if (get_user(tmp, &termio->c_cflag) < 0) + goto fault; + termios->c_cflag = (0xffff0000 & termios->c_cflag) | tmp; + + if (get_user(tmp, &termio->c_lflag) < 0) + goto fault; + termios->c_lflag = (0xffff0000 & termios->c_lflag) | tmp; + + if (get_user(termios->c_line, &termio->c_line) < 0) + goto fault; + + if (copy_from_user(termios->c_cc, termio->c_cc, NCC) != 0) + goto fault; + + return 0; + + fault: + return -EFAULT; +} + +/* + * Translate a "termios" structure into a "termio". Ugh. + */ +static inline int kernel_termios_to_user_termio(struct termio __user *termio, + struct ktermios *termios) +{ + if (put_user(termios->c_iflag, &termio->c_iflag) < 0 || + put_user(termios->c_oflag, &termio->c_oflag) < 0 || + put_user(termios->c_cflag, &termio->c_cflag) < 0 || + put_user(termios->c_lflag, &termio->c_lflag) < 0 || + put_user(termios->c_line, &termio->c_line) < 0 || + copy_to_user(termio->c_cc, termios->c_cc, NCC) != 0) + return -EFAULT; + + return 0; +} + +#ifndef user_termios_to_kernel_termios +#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios)) +#endif + +#ifndef kernel_termios_to_user_termios +#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios)) +#endif + +#define user_termios_to_kernel_termios_1(k, u) copy_from_user(k, u, sizeof(struct termios)) +#define kernel_termios_to_user_termios_1(u, k) copy_to_user(u, k, sizeof(struct termios)) + +#endif /* __ARCH_TERMIO_GETPUT */ + +#endif /* _ASM_GENERIC_TERMIOS_BASE_H */ diff --git a/include/asm-generic/termios.h b/include/asm-generic/termios.h deleted file mode 100644 index 7d39ecc92d9..00000000000 --- a/include/asm-generic/termios.h +++ /dev/null @@ -1,77 +0,0 @@ -/* termios.h: generic termios/termio user copying/translation - */ - -#ifndef _ASM_GENERIC_TERMIOS_H -#define _ASM_GENERIC_TERMIOS_H - -#include - -#ifndef __ARCH_TERMIO_GETPUT - -/* - * Translate a "termio" structure into a "termios". Ugh. - */ -static inline int user_termio_to_kernel_termios(struct ktermios *termios, - struct termio __user *termio) -{ - unsigned short tmp; - - if (get_user(tmp, &termio->c_iflag) < 0) - goto fault; - termios->c_iflag = (0xffff0000 & termios->c_iflag) | tmp; - - if (get_user(tmp, &termio->c_oflag) < 0) - goto fault; - termios->c_oflag = (0xffff0000 & termios->c_oflag) | tmp; - - if (get_user(tmp, &termio->c_cflag) < 0) - goto fault; - termios->c_cflag = (0xffff0000 & termios->c_cflag) | tmp; - - if (get_user(tmp, &termio->c_lflag) < 0) - goto fault; - termios->c_lflag = (0xffff0000 & termios->c_lflag) | tmp; - - if (get_user(termios->c_line, &termio->c_line) < 0) - goto fault; - - if (copy_from_user(termios->c_cc, termio->c_cc, NCC) != 0) - goto fault; - - return 0; - - fault: - return -EFAULT; -} - -/* - * Translate a "termios" structure into a "termio". Ugh. - */ -static inline int kernel_termios_to_user_termio(struct termio __user *termio, - struct ktermios *termios) -{ - if (put_user(termios->c_iflag, &termio->c_iflag) < 0 || - put_user(termios->c_oflag, &termio->c_oflag) < 0 || - put_user(termios->c_cflag, &termio->c_cflag) < 0 || - put_user(termios->c_lflag, &termio->c_lflag) < 0 || - put_user(termios->c_line, &termio->c_line) < 0 || - copy_to_user(termio->c_cc, termios->c_cc, NCC) != 0) - return -EFAULT; - - return 0; -} - -#ifndef user_termios_to_kernel_termios -#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios)) -#endif - -#ifndef kernel_termios_to_user_termios -#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios)) -#endif - -#define user_termios_to_kernel_termios_1(k, u) copy_from_user(k, u, sizeof(struct termios)) -#define kernel_termios_to_user_termios_1(u, k) copy_to_user(u, k, sizeof(struct termios)) - -#endif /* __ARCH_TERMIO_GETPUT */ - -#endif /* _ASM_GENERIC_TERMIOS_H */ -- cgit v1.2.3-70-g09d2 From c31ae4bb4a9fa4606a74c0a4fb61b74f804e861e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2009 22:56:25 +0000 Subject: asm-generic: introduce asm/bitsperlong.h This provides a reliable way for asm-generic/types.h and other files to find out if it is running on a 32 or 64 bit platform. We cannot use CONFIG_64BIT for this in headers that are included from user space because CONFIG symbols are not available there. We also cannot do it inside of asm/types.h because some headers need the word size but cannot include types.h. The solution is to introduce a new header that defines both __BITS_PER_LONG for user space and BITS_PER_LONG for usage in the kernel. The asm-generic version falls back to 32 bit unless the architecture overrides it, which I did for all 64 bit platforms. Signed-off-by: Remis Lima Baima Signed-off-by: Arnd Bergmann --- arch/alpha/include/asm/bitsperlong.h | 8 ++++++++ arch/alpha/include/asm/types.h | 3 --- arch/arm/include/asm/bitsperlong.h | 1 + arch/avr32/include/asm/bitsperlong.h | 1 + arch/blackfin/include/asm/bitsperlong.h | 1 + arch/cris/include/asm/bitsperlong.h | 1 + arch/frv/include/asm/bitsperlong.h | 1 + arch/h8300/include/asm/bitsperlong.h | 1 + arch/ia64/include/asm/bitsperlong.h | 8 ++++++++ arch/ia64/include/asm/types.h | 7 ------- arch/m32r/include/asm/bitsperlong.h | 1 + arch/m68k/include/asm/bitsperlong.h | 1 + arch/microblaze/include/asm/bitsperlong.h | 1 + arch/mips/include/asm/bitsperlong.h | 8 ++++++++ arch/mips/include/asm/types.h | 3 --- arch/mn10300/include/asm/bitsperlong.h | 1 + arch/parisc/include/asm/bitsperlong.h | 20 +++++++++++++++++++ arch/parisc/include/asm/types.h | 8 -------- arch/powerpc/include/asm/bitsperlong.h | 12 ++++++++++++ arch/powerpc/include/asm/types.h | 9 --------- arch/s390/include/asm/bitsperlong.h | 13 +++++++++++++ arch/s390/include/asm/types.h | 6 ------ arch/sh/include/asm/bitsperlong.h | 1 + arch/sparc/include/asm/bitsperlong.h | 13 +++++++++++++ arch/sparc/include/asm/types.h | 4 ---- arch/x86/include/asm/bitsperlong.h | 13 +++++++++++++ arch/x86/include/asm/types.h | 6 ------ arch/xtensa/include/asm/bitsperlong.h | 1 + include/asm-generic/Kbuild | 1 + include/asm-generic/Kbuild.asm | 1 + include/asm-generic/bitsperlong.h | 32 +++++++++++++++++++++++++++++++ include/asm-generic/int-l64.h | 2 ++ include/asm-generic/int-ll64.h | 2 ++ 33 files changed, 145 insertions(+), 46 deletions(-) create mode 100644 arch/alpha/include/asm/bitsperlong.h create mode 100644 arch/arm/include/asm/bitsperlong.h create mode 100644 arch/avr32/include/asm/bitsperlong.h create mode 100644 arch/blackfin/include/asm/bitsperlong.h create mode 100644 arch/cris/include/asm/bitsperlong.h create mode 100644 arch/frv/include/asm/bitsperlong.h create mode 100644 arch/h8300/include/asm/bitsperlong.h create mode 100644 arch/ia64/include/asm/bitsperlong.h create mode 100644 arch/m32r/include/asm/bitsperlong.h create mode 100644 arch/m68k/include/asm/bitsperlong.h create mode 100644 arch/microblaze/include/asm/bitsperlong.h create mode 100644 arch/mips/include/asm/bitsperlong.h create mode 100644 arch/mn10300/include/asm/bitsperlong.h create mode 100644 arch/parisc/include/asm/bitsperlong.h create mode 100644 arch/powerpc/include/asm/bitsperlong.h create mode 100644 arch/s390/include/asm/bitsperlong.h create mode 100644 arch/sh/include/asm/bitsperlong.h create mode 100644 arch/sparc/include/asm/bitsperlong.h create mode 100644 arch/x86/include/asm/bitsperlong.h create mode 100644 arch/xtensa/include/asm/bitsperlong.h create mode 100644 include/asm-generic/bitsperlong.h (limited to 'include') diff --git a/arch/alpha/include/asm/bitsperlong.h b/arch/alpha/include/asm/bitsperlong.h new file mode 100644 index 00000000000..ad57f786820 --- /dev/null +++ b/arch/alpha/include/asm/bitsperlong.h @@ -0,0 +1,8 @@ +#ifndef __ASM_ALPHA_BITSPERLONG_H +#define __ASM_ALPHA_BITSPERLONG_H + +#define __BITS_PER_LONG 64 + +#include + +#endif /* __ASM_ALPHA_BITSPERLONG_H */ diff --git a/arch/alpha/include/asm/types.h b/arch/alpha/include/asm/types.h index f072f344497..bd621ecd1eb 100644 --- a/arch/alpha/include/asm/types.h +++ b/arch/alpha/include/asm/types.h @@ -25,9 +25,6 @@ typedef unsigned int umode_t; * These aren't exported outside the kernel to avoid name space clashes */ #ifdef __KERNEL__ - -#define BITS_PER_LONG 64 - #ifndef __ASSEMBLY__ typedef u64 dma_addr_t; diff --git a/arch/arm/include/asm/bitsperlong.h b/arch/arm/include/asm/bitsperlong.h new file mode 100644 index 00000000000..6dc0bb0c13b --- /dev/null +++ b/arch/arm/include/asm/bitsperlong.h @@ -0,0 +1 @@ +#include diff --git a/arch/avr32/include/asm/bitsperlong.h b/arch/avr32/include/asm/bitsperlong.h new file mode 100644 index 00000000000..6dc0bb0c13b --- /dev/null +++ b/arch/avr32/include/asm/bitsperlong.h @@ -0,0 +1 @@ +#include diff --git a/arch/blackfin/include/asm/bitsperlong.h b/arch/blackfin/include/asm/bitsperlong.h new file mode 100644 index 00000000000..6dc0bb0c13b --- /dev/null +++ b/arch/blackfin/include/asm/bitsperlong.h @@ -0,0 +1 @@ +#include diff --git a/arch/cris/include/asm/bitsperlong.h b/arch/cris/include/asm/bitsperlong.h new file mode 100644 index 00000000000..6dc0bb0c13b --- /dev/null +++ b/arch/cris/include/asm/bitsperlong.h @@ -0,0 +1 @@ +#include diff --git a/arch/frv/include/asm/bitsperlong.h b/arch/frv/include/asm/bitsperlong.h new file mode 100644 index 00000000000..6dc0bb0c13b --- /dev/null +++ b/arch/frv/include/asm/bitsperlong.h @@ -0,0 +1 @@ +#include diff --git a/arch/h8300/include/asm/bitsperlong.h b/arch/h8300/include/asm/bitsperlong.h new file mode 100644 index 00000000000..6dc0bb0c13b --- /dev/null +++ b/arch/h8300/include/asm/bitsperlong.h @@ -0,0 +1 @@ +#include diff --git a/arch/ia64/include/asm/bitsperlong.h b/arch/ia64/include/asm/bitsperlong.h new file mode 100644 index 00000000000..ec4db3c970b --- /dev/null +++ b/arch/ia64/include/asm/bitsperlong.h @@ -0,0 +1,8 @@ +#ifndef __ASM_IA64_BITSPERLONG_H +#define __ASM_IA64_BITSPERLONG_H + +#define __BITS_PER_LONG 64 + +#include + +#endif /* __ASM_IA64_BITSPERLONG_H */ diff --git a/arch/ia64/include/asm/types.h b/arch/ia64/include/asm/types.h index e36b3716e71..fbf1ed3b44c 100644 --- a/arch/ia64/include/asm/types.h +++ b/arch/ia64/include/asm/types.h @@ -19,10 +19,6 @@ # define __IA64_UL(x) (x) # define __IA64_UL_CONST(x) x -# ifdef __KERNEL__ -# define BITS_PER_LONG 64 -# endif - #else # define __IA64_UL(x) ((unsigned long)(x)) # define __IA64_UL_CONST(x) x##UL @@ -34,10 +30,7 @@ typedef unsigned int umode_t; */ # ifdef __KERNEL__ -#define BITS_PER_LONG 64 - /* DMA addresses are 64-bits wide, in general. */ - typedef u64 dma_addr_t; # endif /* __KERNEL__ */ diff --git a/arch/m32r/include/asm/bitsperlong.h b/arch/m32r/include/asm/bitsperlong.h new file mode 100644 index 00000000000..6dc0bb0c13b --- /dev/null +++ b/arch/m32r/include/asm/bitsperlong.h @@ -0,0 +1 @@ +#include diff --git a/arch/m68k/include/asm/bitsperlong.h b/arch/m68k/include/asm/bitsperlong.h new file mode 100644 index 00000000000..6dc0bb0c13b --- /dev/null +++ b/arch/m68k/include/asm/bitsperlong.h @@ -0,0 +1 @@ +#include diff --git a/arch/microblaze/include/asm/bitsperlong.h b/arch/microblaze/include/asm/bitsperlong.h new file mode 100644 index 00000000000..6dc0bb0c13b --- /dev/null +++ b/arch/microblaze/include/asm/bitsperlong.h @@ -0,0 +1 @@ +#include diff --git a/arch/mips/include/asm/bitsperlong.h b/arch/mips/include/asm/bitsperlong.h new file mode 100644 index 00000000000..3e4c10a8e78 --- /dev/null +++ b/arch/mips/include/asm/bitsperlong.h @@ -0,0 +1,8 @@ +#ifndef __ASM_MIPS_BITSPERLONG_H +#define __ASM_MIPS_BITSPERLONG_H + +#define __BITS_PER_LONG _MIPS_SZLONG + +#include + +#endif /* __ASM_MIPS_BITSPERLONG_H */ diff --git a/arch/mips/include/asm/types.h b/arch/mips/include/asm/types.h index 7956e69a3bd..544a2854598 100644 --- a/arch/mips/include/asm/types.h +++ b/arch/mips/include/asm/types.h @@ -31,9 +31,6 @@ typedef unsigned short umode_t; * These aren't exported outside the kernel to avoid name space clashes */ #ifdef __KERNEL__ - -#define BITS_PER_LONG _MIPS_SZLONG - #ifndef __ASSEMBLY__ #if (defined(CONFIG_HIGHMEM) && defined(CONFIG_64BIT_PHYS_ADDR)) \ diff --git a/arch/mn10300/include/asm/bitsperlong.h b/arch/mn10300/include/asm/bitsperlong.h new file mode 100644 index 00000000000..6dc0bb0c13b --- /dev/null +++ b/arch/mn10300/include/asm/bitsperlong.h @@ -0,0 +1 @@ +#include diff --git a/arch/parisc/include/asm/bitsperlong.h b/arch/parisc/include/asm/bitsperlong.h new file mode 100644 index 00000000000..75196b415d3 --- /dev/null +++ b/arch/parisc/include/asm/bitsperlong.h @@ -0,0 +1,20 @@ +#ifndef __ASM_PARISC_BITSPERLONG_H +#define __ASM_PARISC_BITSPERLONG_H + +/* + * using CONFIG_* outside of __KERNEL__ is wrong, + * __LP64__ was also removed from headers, so what + * is the right approach on parisc? + * -arnd + */ +#if (defined(__KERNEL__) && defined(CONFIG_64BIT)) || defined (__LP64__) +#define __BITS_PER_LONG 64 +#define SHIFT_PER_LONG 6 +#else +#define __BITS_PER_LONG 32 +#define SHIFT_PER_LONG 5 +#endif + +#include + +#endif /* __ASM_PARISC_BITSPERLONG_H */ diff --git a/arch/parisc/include/asm/types.h b/arch/parisc/include/asm/types.h index 7f5a39bfb4c..20135cc8003 100644 --- a/arch/parisc/include/asm/types.h +++ b/arch/parisc/include/asm/types.h @@ -14,14 +14,6 @@ typedef unsigned short umode_t; */ #ifdef __KERNEL__ -#ifdef CONFIG_64BIT -#define BITS_PER_LONG 64 -#define SHIFT_PER_LONG 6 -#else -#define BITS_PER_LONG 32 -#define SHIFT_PER_LONG 5 -#endif - #ifndef __ASSEMBLY__ /* Dma addresses are 32-bits wide. */ diff --git a/arch/powerpc/include/asm/bitsperlong.h b/arch/powerpc/include/asm/bitsperlong.h new file mode 100644 index 00000000000..5f1659032c4 --- /dev/null +++ b/arch/powerpc/include/asm/bitsperlong.h @@ -0,0 +1,12 @@ +#ifndef __ASM_POWERPC_BITSPERLONG_H +#define __ASM_POWERPC_BITSPERLONG_H + +#if defined(__powerpc64__) +# define __BITS_PER_LONG 64 +#else +# define __BITS_PER_LONG 32 +#endif + +#include + +#endif /* __ASM_POWERPC_BITSPERLONG_H */ diff --git a/arch/powerpc/include/asm/types.h b/arch/powerpc/include/asm/types.h index 7ce27a52bb3..a5aea0ca34e 100644 --- a/arch/powerpc/include/asm/types.h +++ b/arch/powerpc/include/asm/types.h @@ -40,15 +40,6 @@ typedef struct { #endif /* __ASSEMBLY__ */ #ifdef __KERNEL__ -/* - * These aren't exported outside the kernel to avoid name space clashes - */ -#ifdef __powerpc64__ -#define BITS_PER_LONG 64 -#else -#define BITS_PER_LONG 32 -#endif - #ifndef __ASSEMBLY__ typedef __vector128 vector128; diff --git a/arch/s390/include/asm/bitsperlong.h b/arch/s390/include/asm/bitsperlong.h new file mode 100644 index 00000000000..6b235aea9c6 --- /dev/null +++ b/arch/s390/include/asm/bitsperlong.h @@ -0,0 +1,13 @@ +#ifndef __ASM_S390_BITSPERLONG_H +#define __ASM_S390_BITSPERLONG_H + +#ifndef __s390x__ +#define __BITS_PER_LONG 32 +#else +#define __BITS_PER_LONG 64 +#endif + +#include + +#endif /* __ASM_S390_BITSPERLONG_H */ + diff --git a/arch/s390/include/asm/types.h b/arch/s390/include/asm/types.h index 3dc3fc22881..04d6b95a89c 100644 --- a/arch/s390/include/asm/types.h +++ b/arch/s390/include/asm/types.h @@ -28,12 +28,6 @@ typedef __signed__ long saddr_t; */ #ifdef __KERNEL__ -#ifndef __s390x__ -#define BITS_PER_LONG 32 -#else -#define BITS_PER_LONG 64 -#endif - #ifndef __ASSEMBLY__ typedef u64 dma64_addr_t; diff --git a/arch/sh/include/asm/bitsperlong.h b/arch/sh/include/asm/bitsperlong.h new file mode 100644 index 00000000000..6dc0bb0c13b --- /dev/null +++ b/arch/sh/include/asm/bitsperlong.h @@ -0,0 +1 @@ +#include diff --git a/arch/sparc/include/asm/bitsperlong.h b/arch/sparc/include/asm/bitsperlong.h new file mode 100644 index 00000000000..40dcaa3aaa5 --- /dev/null +++ b/arch/sparc/include/asm/bitsperlong.h @@ -0,0 +1,13 @@ +#ifndef __ASM_ALPHA_BITSPERLONG_H +#define __ASM_ALPHA_BITSPERLONG_H + +#if defined(__sparc__) && defined(__arch64__) +#define __BITS_PER_LONG 64 +#else +#define __BITS_PER_LONG 32 +#endif + +#include + +#endif /* __ASM_ALPHA_BITSPERLONG_H */ + diff --git a/arch/sparc/include/asm/types.h b/arch/sparc/include/asm/types.h index 2237118825d..de671d73bae 100644 --- a/arch/sparc/include/asm/types.h +++ b/arch/sparc/include/asm/types.h @@ -21,8 +21,6 @@ typedef unsigned short umode_t; #ifdef __KERNEL__ -#define BITS_PER_LONG 64 - #ifndef __ASSEMBLY__ /* Dma addresses come in generic and 64-bit flavours. */ @@ -46,8 +44,6 @@ typedef unsigned short umode_t; #ifdef __KERNEL__ -#define BITS_PER_LONG 32 - #ifndef __ASSEMBLY__ typedef u32 dma_addr_t; diff --git a/arch/x86/include/asm/bitsperlong.h b/arch/x86/include/asm/bitsperlong.h new file mode 100644 index 00000000000..b0ae1c4dc79 --- /dev/null +++ b/arch/x86/include/asm/bitsperlong.h @@ -0,0 +1,13 @@ +#ifndef __ASM_X86_BITSPERLONG_H +#define __ASM_X86_BITSPERLONG_H + +#ifdef __x86_64__ +# define __BITS_PER_LONG 64 +#else +# define __BITS_PER_LONG 32 +#endif + +#include + +#endif /* __ASM_X86_BITSPERLONG_H */ + diff --git a/arch/x86/include/asm/types.h b/arch/x86/include/asm/types.h index e6f73632007..09b97745772 100644 --- a/arch/x86/include/asm/types.h +++ b/arch/x86/include/asm/types.h @@ -14,12 +14,6 @@ typedef unsigned short umode_t; */ #ifdef __KERNEL__ -#ifdef CONFIG_X86_32 -# define BITS_PER_LONG 32 -#else -# define BITS_PER_LONG 64 -#endif - #ifndef __ASSEMBLY__ typedef u64 dma64_addr_t; diff --git a/arch/xtensa/include/asm/bitsperlong.h b/arch/xtensa/include/asm/bitsperlong.h new file mode 100644 index 00000000000..6dc0bb0c13b --- /dev/null +++ b/arch/xtensa/include/asm/bitsperlong.h @@ -0,0 +1 @@ +#include diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild index 460b08d51e2..cbb437875f5 100644 --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -1,3 +1,4 @@ +header-y += bitsperlong.h header-y += errno-base.h header-y += errno.h header-y += fcntl.h diff --git a/include/asm-generic/Kbuild.asm b/include/asm-generic/Kbuild.asm index 70d185534b9..290910e4ede 100644 --- a/include/asm-generic/Kbuild.asm +++ b/include/asm-generic/Kbuild.asm @@ -9,6 +9,7 @@ unifdef-y += a.out.h endif unifdef-y += auxvec.h unifdef-y += byteorder.h +unifdef-y += bitsperlong.h unifdef-y += errno.h unifdef-y += fcntl.h unifdef-y += ioctl.h diff --git a/include/asm-generic/bitsperlong.h b/include/asm-generic/bitsperlong.h new file mode 100644 index 00000000000..4ae54e07de8 --- /dev/null +++ b/include/asm-generic/bitsperlong.h @@ -0,0 +1,32 @@ +#ifndef __ASM_GENERIC_BITS_PER_LONG +#define __ASM_GENERIC_BITS_PER_LONG + +/* + * There seems to be no way of detecting this automatically from user + * space, so 64 bit architectures should override this in their + * bitsperlong.h. In particular, an architecture that supports + * both 32 and 64 bit user space must not rely on CONFIG_64BIT + * to decide it, but rather check a compiler provided macro. + */ +#ifndef __BITS_PER_LONG +#define __BITS_PER_LONG 32 +#endif + +#ifdef __KERNEL__ + +#ifdef CONFIG_64BIT +#define BITS_PER_LONG 64 +#else +#define BITS_PER_LONG 32 +#endif /* CONFIG_64BIT */ + +/* + * FIXME: The check currently breaks x86-64 build, so it's + * temporarily disabled. Please fix x86-64 and reenable + */ +#if 0 && BITS_PER_LONG != __BITS_PER_LONG +#error Inconsistent word size. Check asm/bitsperlong.h +#endif + +#endif /* __KERNEL__ */ +#endif /* __ASM_GENERIC_BITS_PER_LONG */ diff --git a/include/asm-generic/int-l64.h b/include/asm-generic/int-l64.h index 2af9b75d77d..1ca3efc976c 100644 --- a/include/asm-generic/int-l64.h +++ b/include/asm-generic/int-l64.h @@ -8,6 +8,8 @@ #ifndef _ASM_GENERIC_INT_L64_H #define _ASM_GENERIC_INT_L64_H +#include + #ifndef __ASSEMBLY__ /* * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the diff --git a/include/asm-generic/int-ll64.h b/include/asm-generic/int-ll64.h index f9bc9ac29b3..f394147c073 100644 --- a/include/asm-generic/int-ll64.h +++ b/include/asm-generic/int-ll64.h @@ -8,6 +8,8 @@ #ifndef _ASM_GENERIC_INT_LL64_H #define _ASM_GENERIC_INT_LL64_H +#include + #ifndef __ASSEMBLY__ /* * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the -- cgit v1.2.3-70-g09d2 From 2864d32be31a20a4617e37a857dd9915a57e2efb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2009 22:56:26 +0000 Subject: asm-generic: add generic sysv ipc headers The ipc64 data structures were originally meant to be architecture specific so that each architecture could add their own optimizations for padding. In the end, most of them just copied the x86 version, and most got that wrong. UClibc expects the x86 anyway, so we might just declare that the default and get rid of the extra copies. Signed-off-by: Remis Lima Baima Signed-off-by: Arnd Bergmann --- include/asm-generic/Kbuild | 5 ++++ include/asm-generic/ipcbuf.h | 34 ++++++++++++++++++++++++ include/asm-generic/msgbuf.h | 47 +++++++++++++++++++++++++++++++++ include/asm-generic/sembuf.h | 38 +++++++++++++++++++++++++++ include/asm-generic/shmbuf.h | 59 ++++++++++++++++++++++++++++++++++++++++++ include/asm-generic/shmparam.h | 6 +++++ 6 files changed, 189 insertions(+) create mode 100644 include/asm-generic/ipcbuf.h create mode 100644 include/asm-generic/msgbuf.h create mode 100644 include/asm-generic/sembuf.h create mode 100644 include/asm-generic/shmbuf.h create mode 100644 include/asm-generic/shmparam.h (limited to 'include') diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild index cbb437875f5..fd7a9c49df4 100644 --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -3,8 +3,13 @@ header-y += errno-base.h header-y += errno.h header-y += fcntl.h header-y += ioctl.h +header-y += ipcbuf.h header-y += mman-common.h +header-y += msgbuf.h header-y += poll.h +header-y += sembuf.h +header-y += shmbuf.h +header-y += shmparam.h header-y += signal-defs.h header-y += statfs.h diff --git a/include/asm-generic/ipcbuf.h b/include/asm-generic/ipcbuf.h new file mode 100644 index 00000000000..76982b2a1b5 --- /dev/null +++ b/include/asm-generic/ipcbuf.h @@ -0,0 +1,34 @@ +#ifndef __ASM_GENERIC_IPCBUF_H +#define __ASM_GENERIC_IPCBUF_H + +/* + * The generic ipc64_perm structure: + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * ipc64_perm was originally meant to be architecture specific, but + * everyone just ended up making identical copies without specific + * optimizations, so we may just as well all use the same one. + * + * Pad space is left for: + * - 32-bit mode_t on architectures that only had 16 bit + * - 32-bit seq + * - 2 miscellaneous 32-bit values + */ + +struct ipc64_perm { + __kernel_key_t key; + __kernel_uid32_t uid; + __kernel_gid32_t gid; + __kernel_uid32_t cuid; + __kernel_gid32_t cgid; + __kernel_mode_t mode; + /* pad if mode_t is u16: */ + unsigned char __pad1[4 - sizeof(__kernel_mode_t)]; + unsigned short seq; + unsigned short __pad2; + unsigned long __unused1; + unsigned long __unused2; +}; + +#endif /* __ASM_GENERIC_IPCBUF_H */ diff --git a/include/asm-generic/msgbuf.h b/include/asm-generic/msgbuf.h new file mode 100644 index 00000000000..aec850d9159 --- /dev/null +++ b/include/asm-generic/msgbuf.h @@ -0,0 +1,47 @@ +#ifndef __ASM_GENERIC_MSGBUF_H +#define __ASM_GENERIC_MSGBUF_H + +#include +/* + * generic msqid64_ds structure. + * + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * msqid64_ds was originally meant to be architecture specific, but + * everyone just ended up making identical copies without specific + * optimizations, so we may just as well all use the same one. + * + * 64 bit architectures typically define a 64 bit __kernel_time_t, + * so they do not need the first three padding words. + * On big-endian systems, the padding is in the wrong place. + * + * Pad space is left for: + * - 64-bit time_t to solve y2038 problem + * - 2 miscellaneous 32-bit values + */ + +struct msqid64_ds { + struct ipc64_perm msg_perm; + __kernel_time_t msg_stime; /* last msgsnd time */ +#if __BITS_PER_LONG != 64 + unsigned long __unused1; +#endif + __kernel_time_t msg_rtime; /* last msgrcv time */ +#if __BITS_PER_LONG != 64 + unsigned long __unused2; +#endif + __kernel_time_t msg_ctime; /* last change time */ +#if __BITS_PER_LONG != 64 + unsigned long __unused3; +#endif + unsigned long msg_cbytes; /* current number of bytes on queue */ + unsigned long msg_qnum; /* number of messages in queue */ + unsigned long msg_qbytes; /* max number of bytes on queue */ + __kernel_pid_t msg_lspid; /* pid of last msgsnd */ + __kernel_pid_t msg_lrpid; /* last receive pid */ + unsigned long __unused4; + unsigned long __unused5; +}; + +#endif /* __ASM_GENERIC_MSGBUF_H */ diff --git a/include/asm-generic/sembuf.h b/include/asm-generic/sembuf.h new file mode 100644 index 00000000000..4cb2c13e509 --- /dev/null +++ b/include/asm-generic/sembuf.h @@ -0,0 +1,38 @@ +#ifndef __ASM_GENERIC_SEMBUF_H +#define __ASM_GENERIC_SEMBUF_H + +#include + +/* + * The semid64_ds structure for x86 architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * semid64_ds was originally meant to be architecture specific, but + * everyone just ended up making identical copies without specific + * optimizations, so we may just as well all use the same one. + * + * 64 bit architectures typically define a 64 bit __kernel_time_t, + * so they do not need the first two padding words. + * On big-endian systems, the padding is in the wrong place. + * + * Pad space is left for: + * - 64-bit time_t to solve y2038 problem + * - 2 miscellaneous 32-bit values + */ +struct semid64_ds { + struct ipc64_perm sem_perm; /* permissions .. see ipc.h */ + __kernel_time_t sem_otime; /* last semop time */ +#if __BITS_PER_LONG != 64 + unsigned long __unused1; +#endif + __kernel_time_t sem_ctime; /* last change time */ +#if __BITS_PER_LONG != 64 + unsigned long __unused2; +#endif + unsigned long sem_nsems; /* no. of semaphores in array */ + unsigned long __unused3; + unsigned long __unused4; +}; + +#endif /* __ASM_GENERIC_SEMBUF_H */ diff --git a/include/asm-generic/shmbuf.h b/include/asm-generic/shmbuf.h new file mode 100644 index 00000000000..5768fa60ac8 --- /dev/null +++ b/include/asm-generic/shmbuf.h @@ -0,0 +1,59 @@ +#ifndef __ASM_GENERIC_SHMBUF_H +#define __ASM_GENERIC_SHMBUF_H + +#include + +/* + * The shmid64_ds structure for x86 architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * shmid64_ds was originally meant to be architecture specific, but + * everyone just ended up making identical copies without specific + * optimizations, so we may just as well all use the same one. + * + * 64 bit architectures typically define a 64 bit __kernel_time_t, + * so they do not need the first two padding words. + * On big-endian systems, the padding is in the wrong place. + * + * + * Pad space is left for: + * - 64-bit time_t to solve y2038 problem + * - 2 miscellaneous 32-bit values + */ + +struct shmid64_ds { + struct ipc64_perm shm_perm; /* operation perms */ + size_t shm_segsz; /* size of segment (bytes) */ + __kernel_time_t shm_atime; /* last attach time */ +#if __BITS_PER_LONG != 64 + unsigned long __unused1; +#endif + __kernel_time_t shm_dtime; /* last detach time */ +#if __BITS_PER_LONG != 64 + unsigned long __unused2; +#endif + __kernel_time_t shm_ctime; /* last change time */ +#if __BITS_PER_LONG != 64 + unsigned long __unused3; +#endif + __kernel_pid_t shm_cpid; /* pid of creator */ + __kernel_pid_t shm_lpid; /* pid of last operator */ + unsigned long shm_nattch; /* no. of current attaches */ + unsigned long __unused4; + unsigned long __unused5; +}; + +struct shminfo64 { + unsigned long shmmax; + unsigned long shmmin; + unsigned long shmmni; + unsigned long shmseg; + unsigned long shmall; + unsigned long __unused1; + unsigned long __unused2; + unsigned long __unused3; + unsigned long __unused4; +}; + +#endif /* __ASM_GENERIC_SHMBUF_H */ diff --git a/include/asm-generic/shmparam.h b/include/asm-generic/shmparam.h new file mode 100644 index 00000000000..51a3852de73 --- /dev/null +++ b/include/asm-generic/shmparam.h @@ -0,0 +1,6 @@ +#ifndef __ASM_GENERIC_SHMPARAM_H +#define __ASM_GENERIC_SHMPARAM_H + +#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */ + +#endif /* _ASM_GENERIC_SHMPARAM_H */ -- cgit v1.2.3-70-g09d2 From 6103ec56c65c33774c7c38652c8204120c3c7519 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2009 22:56:27 +0000 Subject: asm-generic: add generic ABI headers These header files are typically copied from an existing architecture into any new one, slightly modified and then remain untouched until the end of time in the name of ABI stability. To make it easier for future architectures, provide a sane generic version here. In cases where multiple architectures already use identical code, I used the most common version. In cases like stat.h that are more or less broken everywhere, I provide a version that is meant to be ideal for new architectures. Signed-off-by: Remis Lima Baima Signed-off-by: Arnd Bergmann --- include/asm-generic/Kbuild | 15 +++ include/asm-generic/auxvec.h | 8 ++ include/asm-generic/ioctls.h | 110 +++++++++++++++++++++ include/asm-generic/mman.h | 18 ++++ include/asm-generic/param.h | 24 +++++ include/asm-generic/posix_types.h | 165 +++++++++++++++++++++++++++++++ include/asm-generic/setup.h | 6 ++ include/asm-generic/signal.h | 131 +++++++++++++++++++++++++ include/asm-generic/socket.h | 63 ++++++++++++ include/asm-generic/sockios.h | 13 +++ include/asm-generic/stat.h | 72 ++++++++++++++ include/asm-generic/swab.h | 18 ++++ include/asm-generic/termbits.h | 198 ++++++++++++++++++++++++++++++++++++++ include/asm-generic/termios.h | 154 +++++++++++++++++++++++++++++ include/asm-generic/types.h | 42 ++++++++ include/asm-generic/ucontext.h | 12 +++ 16 files changed, 1049 insertions(+) create mode 100644 include/asm-generic/auxvec.h create mode 100644 include/asm-generic/ioctls.h create mode 100644 include/asm-generic/mman.h create mode 100644 include/asm-generic/param.h create mode 100644 include/asm-generic/posix_types.h create mode 100644 include/asm-generic/setup.h create mode 100644 include/asm-generic/signal.h create mode 100644 include/asm-generic/socket.h create mode 100644 include/asm-generic/sockios.h create mode 100644 include/asm-generic/stat.h create mode 100644 include/asm-generic/swab.h create mode 100644 include/asm-generic/termbits.h create mode 100644 include/asm-generic/termios.h create mode 100644 include/asm-generic/types.h create mode 100644 include/asm-generic/ucontext.h (limited to 'include') diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild index fd7a9c49df4..11a78b8e2fc 100644 --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -1,17 +1,32 @@ +header-y += auxvec.h header-y += bitsperlong.h header-y += errno-base.h header-y += errno.h header-y += fcntl.h header-y += ioctl.h +header-y += ioctls.h header-y += ipcbuf.h header-y += mman-common.h +header-y += mman.h header-y += msgbuf.h +header-y += param.h header-y += poll.h +header-y += posix_types.h header-y += sembuf.h +header-y += setup.h header-y += shmbuf.h header-y += shmparam.h header-y += signal-defs.h +header-y += signal.h +header-y += socket.h +header-y += sockios.h +header-y += stat.h header-y += statfs.h +header-y += swab.h +header-y += termbits.h +header-y += termios.h +header-y += types.h +header-y += ucontext.h unifdef-y += int-l64.h unifdef-y += int-ll64.h diff --git a/include/asm-generic/auxvec.h b/include/asm-generic/auxvec.h new file mode 100644 index 00000000000..b99573b0ad1 --- /dev/null +++ b/include/asm-generic/auxvec.h @@ -0,0 +1,8 @@ +#ifndef __ASM_GENERIC_AUXVEC_H +#define __ASM_GENERIC_AUXVEC_H +/* + * Not all architectures need their own auxvec.h, the most + * common definitions are already in linux/auxvec.h. + */ + +#endif /* __ASM_GENERIC_AUXVEC_H */ diff --git a/include/asm-generic/ioctls.h b/include/asm-generic/ioctls.h new file mode 100644 index 00000000000..a799e20a769 --- /dev/null +++ b/include/asm-generic/ioctls.h @@ -0,0 +1,110 @@ +#ifndef __ASM_GENERIC_IOCTLS_H +#define __ASM_GENERIC_IOCTLS_H + +#include + +/* + * These are the most common definitions for tty ioctl numbers. + * Most of them do not use the recommended _IOC(), but there is + * probably some source code out there hardcoding the number, + * so we might as well use them for all new platforms. + * + * The architectures that use different values here typically + * try to be compatible with some Unix variants for the same + * architecture. + */ + +/* 0x54 is just a magic number to make these relatively unique ('T') */ + +#define TCGETS 0x5401 +#define TCSETS 0x5402 +#define TCSETSW 0x5403 +#define TCSETSF 0x5404 +#define TCGETA 0x5405 +#define TCSETA 0x5406 +#define TCSETAW 0x5407 +#define TCSETAF 0x5408 +#define TCSBRK 0x5409 +#define TCXONC 0x540A +#define TCFLSH 0x540B +#define TIOCEXCL 0x540C +#define TIOCNXCL 0x540D +#define TIOCSCTTY 0x540E +#define TIOCGPGRP 0x540F +#define TIOCSPGRP 0x5410 +#define TIOCOUTQ 0x5411 +#define TIOCSTI 0x5412 +#define TIOCGWINSZ 0x5413 +#define TIOCSWINSZ 0x5414 +#define TIOCMGET 0x5415 +#define TIOCMBIS 0x5416 +#define TIOCMBIC 0x5417 +#define TIOCMSET 0x5418 +#define TIOCGSOFTCAR 0x5419 +#define TIOCSSOFTCAR 0x541A +#define FIONREAD 0x541B +#define TIOCINQ FIONREAD +#define TIOCLINUX 0x541C +#define TIOCCONS 0x541D +#define TIOCGSERIAL 0x541E +#define TIOCSSERIAL 0x541F +#define TIOCPKT 0x5420 +#define FIONBIO 0x5421 +#define TIOCNOTTY 0x5422 +#define TIOCSETD 0x5423 +#define TIOCGETD 0x5424 +#define TCSBRKP 0x5425 /* Needed for POSIX tcsendbreak() */ +#define TIOCSBRK 0x5427 /* BSD compatibility */ +#define TIOCCBRK 0x5428 /* BSD compatibility */ +#define TIOCGSID 0x5429 /* Return the session ID of FD */ +#define TCGETS2 _IOR('T', 0x2A, struct termios2) +#define TCSETS2 _IOW('T', 0x2B, struct termios2) +#define TCSETSW2 _IOW('T', 0x2C, struct termios2) +#define TCSETSF2 _IOW('T', 0x2D, struct termios2) +#define TIOCGRS485 0x542E +#define TIOCSRS485 0x542F +#define TIOCGPTN _IOR('T', 0x30, unsigned int) /* Get Pty Number (of pty-mux device) */ +#define TIOCSPTLCK _IOW('T', 0x31, int) /* Lock/unlock Pty */ +#define TCGETX 0x5432 /* SYS5 TCGETX compatibility */ +#define TCSETX 0x5433 +#define TCSETXF 0x5434 +#define TCSETXW 0x5435 + +#define FIONCLEX 0x5450 +#define FIOCLEX 0x5451 +#define FIOASYNC 0x5452 +#define TIOCSERCONFIG 0x5453 +#define TIOCSERGWILD 0x5454 +#define TIOCSERSWILD 0x5455 +#define TIOCGLCKTRMIOS 0x5456 +#define TIOCSLCKTRMIOS 0x5457 +#define TIOCSERGSTRUCT 0x5458 /* For debugging only */ +#define TIOCSERGETLSR 0x5459 /* Get line status register */ +#define TIOCSERGETMULTI 0x545A /* Get multiport config */ +#define TIOCSERSETMULTI 0x545B /* Set multiport config */ + +#define TIOCMIWAIT 0x545C /* wait for a change on serial input line(s) */ +#define TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */ + +/* + * some architectures define FIOQSIZE as 0x545E, which is used for + * TIOCGHAYESESP on others + */ +#ifndef FIOQSIZE +# define TIOCGHAYESESP 0x545E /* Get Hayes ESP configuration */ +# define TIOCSHAYESESP 0x545F /* Set Hayes ESP configuration */ +# define FIOQSIZE 0x5460 +#endif + +/* Used for packet mode */ +#define TIOCPKT_DATA 0 +#define TIOCPKT_FLUSHREAD 1 +#define TIOCPKT_FLUSHWRITE 2 +#define TIOCPKT_STOP 4 +#define TIOCPKT_START 8 +#define TIOCPKT_NOSTOP 16 +#define TIOCPKT_DOSTOP 32 + +#define TIOCSER_TEMT 0x01 /* Transmitter physically empty */ + +#endif /* __ASM_GENERIC_IOCTLS_H */ diff --git a/include/asm-generic/mman.h b/include/asm-generic/mman.h new file mode 100644 index 00000000000..7cab4de2bca --- /dev/null +++ b/include/asm-generic/mman.h @@ -0,0 +1,18 @@ +#ifndef __ASM_GENERIC_MMAN_H +#define __ASM_GENERIC_MMAN_H + +#include + +#define MAP_GROWSDOWN 0x0100 /* stack-like segment */ +#define MAP_DENYWRITE 0x0800 /* ETXTBSY */ +#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ +#define MAP_LOCKED 0x2000 /* pages are locked */ +#define MAP_NORESERVE 0x4000 /* don't check for reservations */ +#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ +#define MAP_NONBLOCK 0x10000 /* do not block on IO */ +#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ + +#define MCL_CURRENT 1 /* lock all current mappings */ +#define MCL_FUTURE 2 /* lock all future mappings */ + +#endif /* __ASM_GENERIC_MMAN_H */ diff --git a/include/asm-generic/param.h b/include/asm-generic/param.h new file mode 100644 index 00000000000..cdf8251bfb6 --- /dev/null +++ b/include/asm-generic/param.h @@ -0,0 +1,24 @@ +#ifndef __ASM_GENERIC_PARAM_H +#define __ASM_GENERIC_PARAM_H + +#ifdef __KERNEL__ +# define HZ CONFIG_HZ /* Internal kernel timer frequency */ +# define USER_HZ 100 /* some user interfaces are */ +# define CLOCKS_PER_SEC (USER_HZ) /* in "ticks" like times() */ +#endif + +#ifndef HZ +#define HZ 100 +#endif + +#ifndef EXEC_PAGESIZE +#define EXEC_PAGESIZE 4096 +#endif + +#ifndef NOGROUP +#define NOGROUP (-1) +#endif + +#define MAXHOSTNAMELEN 64 /* max length of hostname */ + +#endif /* __ASM_GENERIC_PARAM_H */ diff --git a/include/asm-generic/posix_types.h b/include/asm-generic/posix_types.h new file mode 100644 index 00000000000..3dab00860e7 --- /dev/null +++ b/include/asm-generic/posix_types.h @@ -0,0 +1,165 @@ +#ifndef __ASM_GENERIC_POSIX_TYPES_H +#define __ASM_GENERIC_POSIX_TYPES_H + +#include +/* + * This file is generally used by user-level software, so you need to + * be a little careful about namespace pollution etc. + * + * First the types that are often defined in different ways across + * architectures, so that you can override them. + */ + +#ifndef __kernel_ino_t +typedef unsigned long __kernel_ino_t; +#endif + +#ifndef __kernel_mode_t +typedef unsigned int __kernel_mode_t; +#endif + +#ifndef __kernel_nlink_t +typedef unsigned long __kernel_nlink_t; +#endif + +#ifndef __kernel_pid_t +typedef int __kernel_pid_t; +#endif + +#ifndef __kernel_ipc_pid_t +typedef int __kernel_ipc_pid_t; +#endif + +#ifndef __kernel_uid_t +typedef unsigned int __kernel_uid_t; +typedef unsigned int __kernel_gid_t; +#endif + +#ifndef __kernel_suseconds_t +typedef long __kernel_suseconds_t; +#endif + +#ifndef __kernel_daddr_t +typedef int __kernel_daddr_t; +#endif + +#ifndef __kernel_uid32_t +typedef __kernel_uid_t __kernel_uid32_t; +typedef __kernel_gid_t __kernel_gid32_t; +#endif + +#ifndef __kernel_old_uid_t +typedef __kernel_uid_t __kernel_old_uid_t; +typedef __kernel_gid_t __kernel_old_gid_t; +#endif + +#ifndef __kernel_old_dev_t +typedef unsigned int __kernel_old_dev_t; +#endif + +/* + * Most 32 bit architectures use "unsigned int" size_t, + * and all 64 bit architectures use "unsigned long" size_t. + */ +#ifndef __kernel_size_t +#if __BITS_PER_LONG != 64 +typedef unsigned int __kernel_size_t; +typedef int __kernel_ssize_t; +typedef int __kernel_ptrdiff_t; +#else +typedef unsigned long __kernel_size_t; +typedef long __kernel_ssize_t; +typedef long __kernel_ptrdiff_t; +#endif +#endif + +/* + * anything below here should be completely generic + */ +typedef long __kernel_off_t; +typedef long long __kernel_loff_t; +typedef long __kernel_time_t; +typedef long __kernel_clock_t; +typedef int __kernel_timer_t; +typedef int __kernel_clockid_t; +typedef char * __kernel_caddr_t; +typedef unsigned short __kernel_uid16_t; +typedef unsigned short __kernel_gid16_t; + +typedef struct { + int val[2]; +} __kernel_fsid_t; + +#ifdef __KERNEL__ + +#undef __FD_SET +static inline void __FD_SET(unsigned long __fd, __kernel_fd_set *__fdsetp) +{ + unsigned long __tmp = __fd / __NFDBITS; + unsigned long __rem = __fd % __NFDBITS; + __fdsetp->fds_bits[__tmp] |= (1UL<<__rem); +} + +#undef __FD_CLR +static inline void __FD_CLR(unsigned long __fd, __kernel_fd_set *__fdsetp) +{ + unsigned long __tmp = __fd / __NFDBITS; + unsigned long __rem = __fd % __NFDBITS; + __fdsetp->fds_bits[__tmp] &= ~(1UL<<__rem); +} + +#undef __FD_ISSET +static inline int __FD_ISSET(unsigned long __fd, const __kernel_fd_set *__p) +{ + unsigned long __tmp = __fd / __NFDBITS; + unsigned long __rem = __fd % __NFDBITS; + return (__p->fds_bits[__tmp] & (1UL<<__rem)) != 0; +} + +/* + * This will unroll the loop for the normal constant case (8 ints, + * for a 256-bit fd_set) + */ +#undef __FD_ZERO +static inline void __FD_ZERO(__kernel_fd_set *__p) +{ + unsigned long *__tmp = __p->fds_bits; + int __i; + + if (__builtin_constant_p(__FDSET_LONGS)) { + switch (__FDSET_LONGS) { + case 16: + __tmp[ 0] = 0; __tmp[ 1] = 0; + __tmp[ 2] = 0; __tmp[ 3] = 0; + __tmp[ 4] = 0; __tmp[ 5] = 0; + __tmp[ 6] = 0; __tmp[ 7] = 0; + __tmp[ 8] = 0; __tmp[ 9] = 0; + __tmp[10] = 0; __tmp[11] = 0; + __tmp[12] = 0; __tmp[13] = 0; + __tmp[14] = 0; __tmp[15] = 0; + return; + + case 8: + __tmp[ 0] = 0; __tmp[ 1] = 0; + __tmp[ 2] = 0; __tmp[ 3] = 0; + __tmp[ 4] = 0; __tmp[ 5] = 0; + __tmp[ 6] = 0; __tmp[ 7] = 0; + return; + + case 4: + __tmp[ 0] = 0; __tmp[ 1] = 0; + __tmp[ 2] = 0; __tmp[ 3] = 0; + return; + } + } + __i = __FDSET_LONGS; + while (__i) { + __i--; + *__tmp = 0; + __tmp++; + } +} + +#endif /* __KERNEL__ */ + +#endif /* __ASM_GENERIC_POSIX_TYPES_H */ diff --git a/include/asm-generic/setup.h b/include/asm-generic/setup.h new file mode 100644 index 00000000000..6fc26a51003 --- /dev/null +++ b/include/asm-generic/setup.h @@ -0,0 +1,6 @@ +#ifndef __ASM_GENERIC_SETUP_H +#define __ASM_GENERIC_SETUP_H + +#define COMMAND_LINE_SIZE 512 + +#endif /* __ASM_GENERIC_SETUP_H */ diff --git a/include/asm-generic/signal.h b/include/asm-generic/signal.h new file mode 100644 index 00000000000..555c0aee8a4 --- /dev/null +++ b/include/asm-generic/signal.h @@ -0,0 +1,131 @@ +#ifndef __ASM_GENERIC_SIGNAL_H +#define __ASM_GENERIC_SIGNAL_H + +#include + +#define _NSIG 64 +#define _NSIG_BPW __BITS_PER_LONG +#define _NSIG_WORDS (_NSIG / _NSIG_BPW) + +#define SIGHUP 1 +#define SIGINT 2 +#define SIGQUIT 3 +#define SIGILL 4 +#define SIGTRAP 5 +#define SIGABRT 6 +#define SIGIOT 6 +#define SIGBUS 7 +#define SIGFPE 8 +#define SIGKILL 9 +#define SIGUSR1 10 +#define SIGSEGV 11 +#define SIGUSR2 12 +#define SIGPIPE 13 +#define SIGALRM 14 +#define SIGTERM 15 +#define SIGSTKFLT 16 +#define SIGCHLD 17 +#define SIGCONT 18 +#define SIGSTOP 19 +#define SIGTSTP 20 +#define SIGTTIN 21 +#define SIGTTOU 22 +#define SIGURG 23 +#define SIGXCPU 24 +#define SIGXFSZ 25 +#define SIGVTALRM 26 +#define SIGPROF 27 +#define SIGWINCH 28 +#define SIGIO 29 +#define SIGPOLL SIGIO +/* +#define SIGLOST 29 +*/ +#define SIGPWR 30 +#define SIGSYS 31 +#define SIGUNUSED 31 + +/* These should not be considered constants from userland. */ +#define SIGRTMIN 32 +#ifndef SIGRTMAX +#define SIGRTMAX _NSIG +#endif + +/* + * SA_FLAGS values: + * + * SA_ONSTACK indicates that a registered stack_t will be used. + * SA_RESTART flag to get restarting signals (which were the default long ago) + * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop. + * SA_RESETHAND clears the handler when the signal is delivered. + * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies. + * SA_NODEFER prevents the current signal from being masked in the handler. + * + * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single + * Unix names RESETHAND and NODEFER respectively. + */ +#define SA_NOCLDSTOP 0x00000001 +#define SA_NOCLDWAIT 0x00000002 +#define SA_SIGINFO 0x00000004 +#define SA_ONSTACK 0x08000000 +#define SA_RESTART 0x10000000 +#define SA_NODEFER 0x40000000 +#define SA_RESETHAND 0x80000000 + +#define SA_NOMASK SA_NODEFER +#define SA_ONESHOT SA_RESETHAND + +/* + * New architectures should not define the obsolete + * SA_RESTORER 0x04000000 + */ + +/* + * sigaltstack controls + */ +#define SS_ONSTACK 1 +#define SS_DISABLE 2 + +#define MINSIGSTKSZ 2048 +#define SIGSTKSZ 8192 + +#ifndef __ASSEMBLY__ +typedef struct { + unsigned long sig[_NSIG_WORDS]; +} sigset_t; + +/* not actually used, but required for linux/syscalls.h */ +typedef unsigned long old_sigset_t; + +#include + +struct sigaction { + __sighandler_t sa_handler; + unsigned long sa_flags; +#ifdef SA_RESTORER + __sigrestore_t sa_restorer; +#endif + sigset_t sa_mask; /* mask last for extensibility */ +}; + +struct k_sigaction { + struct sigaction sa; +}; + +typedef struct sigaltstack { + void __user *ss_sp; + int ss_flags; + size_t ss_size; +} stack_t; + +#ifdef __KERNEL__ + +#include +#undef __HAVE_ARCH_SIG_BITOPS + +#define ptrace_signal_deliver(regs, cookie) do { } while (0) + +#endif /* __KERNEL__ */ +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_GENERIC_SIGNAL_H */ diff --git a/include/asm-generic/socket.h b/include/asm-generic/socket.h new file mode 100644 index 00000000000..5d79e409241 --- /dev/null +++ b/include/asm-generic/socket.h @@ -0,0 +1,63 @@ +#ifndef __ASM_GENERIC_SOCKET_H +#define __ASM_GENERIC_SOCKET_H + +#include + +/* For setsockopt(2) */ +#define SOL_SOCKET 1 + +#define SO_DEBUG 1 +#define SO_REUSEADDR 2 +#define SO_TYPE 3 +#define SO_ERROR 4 +#define SO_DONTROUTE 5 +#define SO_BROADCAST 6 +#define SO_SNDBUF 7 +#define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 +#define SO_KEEPALIVE 9 +#define SO_OOBINLINE 10 +#define SO_NO_CHECK 11 +#define SO_PRIORITY 12 +#define SO_LINGER 13 +#define SO_BSDCOMPAT 14 +/* To add :#define SO_REUSEPORT 15 */ + +#ifndef SO_PASSCRED /* powerpc only differs in these */ +#define SO_PASSCRED 16 +#define SO_PEERCRED 17 +#define SO_RCVLOWAT 18 +#define SO_SNDLOWAT 19 +#define SO_RCVTIMEO 20 +#define SO_SNDTIMEO 21 +#endif + +/* Security levels - as per NRL IPv6 - don't actually do anything */ +#define SO_SECURITY_AUTHENTICATION 22 +#define SO_SECURITY_ENCRYPTION_TRANSPORT 23 +#define SO_SECURITY_ENCRYPTION_NETWORK 24 + +#define SO_BINDTODEVICE 25 + +/* Socket filtering */ +#define SO_ATTACH_FILTER 26 +#define SO_DETACH_FILTER 27 + +#define SO_PEERNAME 28 +#define SO_TIMESTAMP 29 +#define SCM_TIMESTAMP SO_TIMESTAMP + +#define SO_ACCEPTCONN 30 + +#define SO_PEERSEC 31 +#define SO_PASSSEC 34 +#define SO_TIMESTAMPNS 35 +#define SCM_TIMESTAMPNS SO_TIMESTAMPNS + +#define SO_MARK 36 + +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + +#endif /* __ASM_GENERIC_SOCKET_H */ diff --git a/include/asm-generic/sockios.h b/include/asm-generic/sockios.h new file mode 100644 index 00000000000..9a61a369b90 --- /dev/null +++ b/include/asm-generic/sockios.h @@ -0,0 +1,13 @@ +#ifndef __ASM_GENERIC_SOCKIOS_H +#define __ASM_GENERIC_SOCKIOS_H + +/* Socket-level I/O control calls. */ +#define FIOSETOWN 0x8901 +#define SIOCSPGRP 0x8902 +#define FIOGETOWN 0x8903 +#define SIOCGPGRP 0x8904 +#define SIOCATMARK 0x8905 +#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */ +#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */ + +#endif /* __ASM_GENERIC_SOCKIOS_H */ diff --git a/include/asm-generic/stat.h b/include/asm-generic/stat.h new file mode 100644 index 00000000000..47e64170305 --- /dev/null +++ b/include/asm-generic/stat.h @@ -0,0 +1,72 @@ +#ifndef __ASM_GENERIC_STAT_H +#define __ASM_GENERIC_STAT_H + +/* + * Everybody gets this wrong and has to stick with it for all + * eternity. Hopefully, this version gets used by new architectures + * so they don't fall into the same traps. + * + * stat64 is copied from powerpc64, with explicit padding added. + * stat is the same structure layout on 64-bit, without the 'long long' + * types. + * + * By convention, 64 bit architectures use the stat interface, while + * 32 bit architectures use the stat64 interface. Note that we don't + * provide an __old_kernel_stat here, which new architecture should + * not have to start with. + */ + +#include + +#define STAT_HAVE_NSEC 1 + +struct stat { + unsigned long st_dev; /* Device. */ + unsigned long st_ino; /* File serial number. */ + unsigned int st_mode; /* File mode. */ + unsigned int st_nlink; /* Link count. */ + unsigned int st_uid; /* User ID of the file's owner. */ + unsigned int st_gid; /* Group ID of the file's group. */ + unsigned long st_rdev; /* Device number, if device. */ + unsigned long __pad1; + long st_size; /* Size of file, in bytes. */ + int st_blksize; /* Optimal block size for I/O. */ + int __pad2; + long st_blocks; /* Number 512-byte blocks allocated. */ + int st_atime; /* Time of last access. */ + unsigned int st_atime_nsec; + int st_mtime; /* Time of last modification. */ + unsigned int st_mtime_nsec; + int st_ctime; /* Time of last status change. */ + unsigned int st_ctime_nsec; + unsigned int __unused4; + unsigned int __unused5; +}; + +#if __BITS_PER_LONG != 64 +/* This matches struct stat64 in glibc2.1. Only used for 32 bit. */ +struct stat64 { + unsigned long long st_dev; /* Device. */ + unsigned long long st_ino; /* File serial number. */ + unsigned int st_mode; /* File mode. */ + unsigned int st_nlink; /* Link count. */ + unsigned int st_uid; /* User ID of the file's owner. */ + unsigned int st_gid; /* Group ID of the file's group. */ + unsigned long long st_rdev; /* Device number, if device. */ + unsigned long long __pad1; + long long st_size; /* Size of file, in bytes. */ + int st_blksize; /* Optimal block size for I/O. */ + int __pad2; + long long st_blocks; /* Number 512-byte blocks allocated. */ + int st_atime; /* Time of last access. */ + unsigned int st_atime_nsec; + int st_mtime; /* Time of last modification. */ + unsigned int st_mtime_nsec; + int st_ctime; /* Time of last status change. */ + unsigned int st_ctime_nsec; + unsigned int __unused4; + unsigned int __unused5; +}; +#endif + +#endif /* __ASM_GENERIC_STAT_H */ diff --git a/include/asm-generic/swab.h b/include/asm-generic/swab.h new file mode 100644 index 00000000000..a8e9029d9eb --- /dev/null +++ b/include/asm-generic/swab.h @@ -0,0 +1,18 @@ +#ifndef _ASM_GENERIC_SWAB_H +#define _ASM_GENERIC_SWAB_H + +#include + +/* + * 32 bit architectures typically (but not always) want to + * set __SWAB_64_THRU_32__. In user space, this is only + * valid if the compiler supports 64 bit data types. + */ + +#if __BITS_PER_LONG == 32 +#if defined(__GNUC__) && !defined(__STRICT_ANSI__) || defined(__KERNEL__) +#define __SWAB_64_THRU_32__ +#endif +#endif + +#endif /* _ASM_GENERIC_SWAB_H */ diff --git a/include/asm-generic/termbits.h b/include/asm-generic/termbits.h new file mode 100644 index 00000000000..1c9773d48cb --- /dev/null +++ b/include/asm-generic/termbits.h @@ -0,0 +1,198 @@ +#ifndef __ASM_GENERIC_TERMBITS_H +#define __ASM_GENERIC_TERMBITS_H + +#include + +typedef unsigned char cc_t; +typedef unsigned int speed_t; +typedef unsigned int tcflag_t; + +#define NCCS 19 +struct termios { + tcflag_t c_iflag; /* input mode flags */ + tcflag_t c_oflag; /* output mode flags */ + tcflag_t c_cflag; /* control mode flags */ + tcflag_t c_lflag; /* local mode flags */ + cc_t c_line; /* line discipline */ + cc_t c_cc[NCCS]; /* control characters */ +}; + +struct termios2 { + tcflag_t c_iflag; /* input mode flags */ + tcflag_t c_oflag; /* output mode flags */ + tcflag_t c_cflag; /* control mode flags */ + tcflag_t c_lflag; /* local mode flags */ + cc_t c_line; /* line discipline */ + cc_t c_cc[NCCS]; /* control characters */ + speed_t c_ispeed; /* input speed */ + speed_t c_ospeed; /* output speed */ +}; + +struct ktermios { + tcflag_t c_iflag; /* input mode flags */ + tcflag_t c_oflag; /* output mode flags */ + tcflag_t c_cflag; /* control mode flags */ + tcflag_t c_lflag; /* local mode flags */ + cc_t c_line; /* line discipline */ + cc_t c_cc[NCCS]; /* control characters */ + speed_t c_ispeed; /* input speed */ + speed_t c_ospeed; /* output speed */ +}; + +/* c_cc characters */ +#define VINTR 0 +#define VQUIT 1 +#define VERASE 2 +#define VKILL 3 +#define VEOF 4 +#define VTIME 5 +#define VMIN 6 +#define VSWTC 7 +#define VSTART 8 +#define VSTOP 9 +#define VSUSP 10 +#define VEOL 11 +#define VREPRINT 12 +#define VDISCARD 13 +#define VWERASE 14 +#define VLNEXT 15 +#define VEOL2 16 + +/* c_iflag bits */ +#define IGNBRK 0000001 +#define BRKINT 0000002 +#define IGNPAR 0000004 +#define PARMRK 0000010 +#define INPCK 0000020 +#define ISTRIP 0000040 +#define INLCR 0000100 +#define IGNCR 0000200 +#define ICRNL 0000400 +#define IUCLC 0001000 +#define IXON 0002000 +#define IXANY 0004000 +#define IXOFF 0010000 +#define IMAXBEL 0020000 +#define IUTF8 0040000 + +/* c_oflag bits */ +#define OPOST 0000001 +#define OLCUC 0000002 +#define ONLCR 0000004 +#define OCRNL 0000010 +#define ONOCR 0000020 +#define ONLRET 0000040 +#define OFILL 0000100 +#define OFDEL 0000200 +#define NLDLY 0000400 +#define NL0 0000000 +#define NL1 0000400 +#define CRDLY 0003000 +#define CR0 0000000 +#define CR1 0001000 +#define CR2 0002000 +#define CR3 0003000 +#define TABDLY 0014000 +#define TAB0 0000000 +#define TAB1 0004000 +#define TAB2 0010000 +#define TAB3 0014000 +#define XTABS 0014000 +#define BSDLY 0020000 +#define BS0 0000000 +#define BS1 0020000 +#define VTDLY 0040000 +#define VT0 0000000 +#define VT1 0040000 +#define FFDLY 0100000 +#define FF0 0000000 +#define FF1 0100000 + +/* c_cflag bit meaning */ +#define CBAUD 0010017 +#define B0 0000000 /* hang up */ +#define B50 0000001 +#define B75 0000002 +#define B110 0000003 +#define B134 0000004 +#define B150 0000005 +#define B200 0000006 +#define B300 0000007 +#define B600 0000010 +#define B1200 0000011 +#define B1800 0000012 +#define B2400 0000013 +#define B4800 0000014 +#define B9600 0000015 +#define B19200 0000016 +#define B38400 0000017 +#define EXTA B19200 +#define EXTB B38400 +#define CSIZE 0000060 +#define CS5 0000000 +#define CS6 0000020 +#define CS7 0000040 +#define CS8 0000060 +#define CSTOPB 0000100 +#define CREAD 0000200 +#define PARENB 0000400 +#define PARODD 0001000 +#define HUPCL 0002000 +#define CLOCAL 0004000 +#define CBAUDEX 0010000 +#define BOTHER 0010000 +#define B57600 0010001 +#define B115200 0010002 +#define B230400 0010003 +#define B460800 0010004 +#define B500000 0010005 +#define B576000 0010006 +#define B921600 0010007 +#define B1000000 0010010 +#define B1152000 0010011 +#define B1500000 0010012 +#define B2000000 0010013 +#define B2500000 0010014 +#define B3000000 0010015 +#define B3500000 0010016 +#define B4000000 0010017 +#define CIBAUD 002003600000 /* input baud rate */ +#define CMSPAR 010000000000 /* mark or space (stick) parity */ +#define CRTSCTS 020000000000 /* flow control */ + +#define IBSHIFT 16 /* Shift from CBAUD to CIBAUD */ + +/* c_lflag bits */ +#define ISIG 0000001 +#define ICANON 0000002 +#define XCASE 0000004 +#define ECHO 0000010 +#define ECHOE 0000020 +#define ECHOK 0000040 +#define ECHONL 0000100 +#define NOFLSH 0000200 +#define TOSTOP 0000400 +#define ECHOCTL 0001000 +#define ECHOPRT 0002000 +#define ECHOKE 0004000 +#define FLUSHO 0010000 +#define PENDIN 0040000 +#define IEXTEN 0100000 + +/* tcflow() and TCXONC use these */ +#define TCOOFF 0 +#define TCOON 1 +#define TCIOFF 2 +#define TCION 3 + +/* tcflush() and TCFLSH use these */ +#define TCIFLUSH 0 +#define TCOFLUSH 1 +#define TCIOFLUSH 2 + +/* tcsetattr uses these */ +#define TCSANOW 0 +#define TCSADRAIN 1 +#define TCSAFLUSH 2 + +#endif /* __ASM_GENERIC_TERMBITS_H */ diff --git a/include/asm-generic/termios.h b/include/asm-generic/termios.h new file mode 100644 index 00000000000..d0922adc56d --- /dev/null +++ b/include/asm-generic/termios.h @@ -0,0 +1,154 @@ +#ifndef _ASM_GENERIC_TERMIOS_H +#define _ASM_GENERIC_TERMIOS_H +/* + * Most architectures have straight copies of the x86 code, with + * varying levels of bug fixes on top. Usually it's a good idea + * to use this generic version instead, but be careful to avoid + * ABI changes. + * New architectures should not provide their own version. + */ + +#include +#include + +struct winsize { + unsigned short ws_row; + unsigned short ws_col; + unsigned short ws_xpixel; + unsigned short ws_ypixel; +}; + +#define NCC 8 +struct termio { + unsigned short c_iflag; /* input mode flags */ + unsigned short c_oflag; /* output mode flags */ + unsigned short c_cflag; /* control mode flags */ + unsigned short c_lflag; /* local mode flags */ + unsigned char c_line; /* line discipline */ + unsigned char c_cc[NCC]; /* control characters */ +}; + +/* modem lines */ +#define TIOCM_LE 0x001 +#define TIOCM_DTR 0x002 +#define TIOCM_RTS 0x004 +#define TIOCM_ST 0x008 +#define TIOCM_SR 0x010 +#define TIOCM_CTS 0x020 +#define TIOCM_CAR 0x040 +#define TIOCM_RNG 0x080 +#define TIOCM_DSR 0x100 +#define TIOCM_CD TIOCM_CAR +#define TIOCM_RI TIOCM_RNG +#define TIOCM_OUT1 0x2000 +#define TIOCM_OUT2 0x4000 +#define TIOCM_LOOP 0x8000 + +/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ + +#ifdef __KERNEL__ + +#include + +/* intr=^C quit=^\ erase=del kill=^U + eof=^D vtime=\0 vmin=\1 sxtc=\0 + start=^Q stop=^S susp=^Z eol=\0 + reprint=^R discard=^U werase=^W lnext=^V + eol2=\0 +*/ +#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0" + +/* + * Translate a "termio" structure into a "termios". Ugh. + */ +static inline int user_termio_to_kernel_termios(struct ktermios *termios, + const struct termio __user *termio) +{ + unsigned short tmp; + + if (get_user(tmp, &termio->c_iflag) < 0) + goto fault; + termios->c_iflag = (0xffff0000 & termios->c_iflag) | tmp; + + if (get_user(tmp, &termio->c_oflag) < 0) + goto fault; + termios->c_oflag = (0xffff0000 & termios->c_oflag) | tmp; + + if (get_user(tmp, &termio->c_cflag) < 0) + goto fault; + termios->c_cflag = (0xffff0000 & termios->c_cflag) | tmp; + + if (get_user(tmp, &termio->c_lflag) < 0) + goto fault; + termios->c_lflag = (0xffff0000 & termios->c_lflag) | tmp; + + if (get_user(termios->c_line, &termio->c_line) < 0) + goto fault; + + if (copy_from_user(termios->c_cc, termio->c_cc, NCC) != 0) + goto fault; + + return 0; + + fault: + return -EFAULT; +} + +/* + * Translate a "termios" structure into a "termio". Ugh. + */ +static inline int kernel_termios_to_user_termio(struct termio __user *termio, + struct ktermios *termios) +{ + if (put_user(termios->c_iflag, &termio->c_iflag) < 0 || + put_user(termios->c_oflag, &termio->c_oflag) < 0 || + put_user(termios->c_cflag, &termio->c_cflag) < 0 || + put_user(termios->c_lflag, &termio->c_lflag) < 0 || + put_user(termios->c_line, &termio->c_line) < 0 || + copy_to_user(termio->c_cc, termios->c_cc, NCC) != 0) + return -EFAULT; + + return 0; +} + +#ifdef TCGETS2 +static inline int user_termios_to_kernel_termios(struct ktermios *k, + struct termios2 __user *u) +{ + return copy_from_user(k, u, sizeof(struct termios2)); +} + +static inline int kernel_termios_to_user_termios(struct termios2 __user *u, + struct ktermios *k) +{ + return copy_to_user(u, k, sizeof(struct termios2)); +} + +static inline int user_termios_to_kernel_termios_1(struct ktermios *k, + struct termios __user *u) +{ + return copy_from_user(k, u, sizeof(struct termios)); +} + +static inline int kernel_termios_to_user_termios_1(struct termios __user *u, + struct ktermios *k) +{ + return copy_to_user(u, k, sizeof(struct termios)); +} +#else /* TCGETS2 */ +static inline int user_termios_to_kernel_termios(struct ktermios *k, + struct termios __user *u) +{ + return copy_from_user(k, u, sizeof(struct termios)); +} + +static inline int kernel_termios_to_user_termios(struct termios __user *u, + struct ktermios *k) +{ + return copy_to_user(u, k, sizeof(struct termios)); +} +#endif /* TCGETS2 */ + +#endif /* __KERNEL__ */ + +#endif /* _ASM_GENERIC_TERMIOS_H */ diff --git a/include/asm-generic/types.h b/include/asm-generic/types.h new file mode 100644 index 00000000000..fba7d33ca3f --- /dev/null +++ b/include/asm-generic/types.h @@ -0,0 +1,42 @@ +#ifndef _ASM_GENERIC_TYPES_H +#define _ASM_GENERIC_TYPES_H +/* + * int-ll64 is used practically everywhere now, + * so use it as a reasonable default. + */ +#include + +#ifndef __ASSEMBLY__ + +typedef unsigned short umode_t; + +#endif /* __ASSEMBLY__ */ + +/* + * These aren't exported outside the kernel to avoid name space clashes + */ +#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ +/* + * DMA addresses may be very different from physical addresses + * and pointers. i386 and powerpc may have 64 bit DMA on 32 bit + * systems, while sparc64 uses 32 bit DMA addresses for 64 bit + * physical addresses. + * This default defines dma_addr_t to have the same size as + * phys_addr_t, which is the most common way. + * Do not define the dma64_addr_t type, which never really + * worked. + */ +#ifndef dma_addr_t +#ifdef CONFIG_PHYS_ADDR_T_64BIT +typedef u64 dma_addr_t; +#else +typedef u32 dma_addr_t; +#endif /* CONFIG_PHYS_ADDR_T_64BIT */ +#endif /* dma_addr_t */ + +#endif /* __ASSEMBLY__ */ + +#endif /* __KERNEL__ */ + +#endif /* _ASM_GENERIC_TYPES_H */ diff --git a/include/asm-generic/ucontext.h b/include/asm-generic/ucontext.h new file mode 100644 index 00000000000..ad77343e8a9 --- /dev/null +++ b/include/asm-generic/ucontext.h @@ -0,0 +1,12 @@ +#ifndef __ASM_GENERIC_UCONTEXT_H +#define __ASM_GENERIC_UCONTEXT_H + +struct ucontext { + unsigned long uc_flags; + struct ucontext *uc_link; + stack_t uc_stack; + struct sigcontext uc_mcontext; + sigset_t uc_sigmask; /* mask last for extensibility */ +}; + +#endif /* __ASM_GENERIC_UCONTEXT_H */ -- cgit v1.2.3-70-g09d2 From e64a1617eca39d62b248a11699de9c1195369661 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2009 22:56:28 +0000 Subject: asm-generic: add a generic unistd.h A new architecture should only define a minimal set of system calls while still providing the full functionality. This version of unistd.h has gone through intensive review to make sure that by default it only enables syscalls that do not already have a more featureful replacement. It is modeled after the x86-64 version of unistd.h, which unifies the syscall number definition and the actual system call table in a single file, in order to keep them synchronized much more easily. This first version still keeps legacy system call definitions around, guarded by various #ifdefs, and with numbers larger than 1024. The idea behind this is to make it easier for new architectures to transition from a full list to the reduced set. In particular, the new microblaze architecture that should migrate to using the generic ABI headers can at least use an existing uClibc source tree without major rewrites during the conversion. Signed-off-by: Arnd Bergmann --- include/asm-generic/Kbuild | 1 + include/asm-generic/unistd.h | 854 +++++++++++++++++++++++++++++++++++++++++++ scripts/checksyscalls.sh | 92 ++++- 3 files changed, 944 insertions(+), 3 deletions(-) create mode 100644 include/asm-generic/unistd.h (limited to 'include') diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild index 11a78b8e2fc..eb62334cda2 100644 --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -27,6 +27,7 @@ header-y += termbits.h header-y += termios.h header-y += types.h header-y += ucontext.h +header-y += unistd.h unifdef-y += int-l64.h unifdef-y += int-ll64.h diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h new file mode 100644 index 00000000000..5b34b6233d6 --- /dev/null +++ b/include/asm-generic/unistd.h @@ -0,0 +1,854 @@ +#if !defined(_ASM_GENERIC_UNISTD_H) || defined(__SYSCALL) +#define _ASM_GENERIC_UNISTD_H + +#include + +/* + * This file contains the system call numbers, based on the + * layout of the x86-64 architecture, which embeds the + * pointer to the syscall in the table. + * + * As a basic principle, no duplication of functionality + * should be added, e.g. we don't use lseek when llseek + * is present. New architectures should use this file + * and implement the less feature-full calls in user space. + */ + +#ifndef __SYSCALL +#define __SYSCALL(x, y) +#endif + +#if __BITS_PER_LONG == 32 +#define __SC_3264(_nr, _32, _64) __SYSCALL(_nr, _32) +#else +#define __SC_3264(_nr, _32, _64) __SYSCALL(_nr, _64) +#endif + +#define __NR_io_setup 0 +__SYSCALL(__NR_io_setup, sys_io_setup) +#define __NR_io_destroy 1 +__SYSCALL(__NR_io_destroy, sys_io_destroy) +#define __NR_io_submit 2 +__SYSCALL(__NR_io_submit, sys_io_submit) +#define __NR_io_cancel 3 +__SYSCALL(__NR_io_cancel, sys_io_cancel) +#define __NR_io_getevents 4 +__SYSCALL(__NR_io_getevents, sys_io_getevents) + +/* fs/xattr.c */ +#define __NR_setxattr 5 +__SYSCALL(__NR_setxattr, sys_setxattr) +#define __NR_lsetxattr 6 +__SYSCALL(__NR_lsetxattr, sys_lsetxattr) +#define __NR_fsetxattr 7 +__SYSCALL(__NR_fsetxattr, sys_fsetxattr) +#define __NR_getxattr 8 +__SYSCALL(__NR_getxattr, sys_getxattr) +#define __NR_lgetxattr 9 +__SYSCALL(__NR_lgetxattr, sys_lgetxattr) +#define __NR_fgetxattr 10 +__SYSCALL(__NR_fgetxattr, sys_fgetxattr) +#define __NR_listxattr 11 +__SYSCALL(__NR_listxattr, sys_listxattr) +#define __NR_llistxattr 12 +__SYSCALL(__NR_llistxattr, sys_llistxattr) +#define __NR_flistxattr 13 +__SYSCALL(__NR_flistxattr, sys_flistxattr) +#define __NR_removexattr 14 +__SYSCALL(__NR_removexattr, sys_removexattr) +#define __NR_lremovexattr 15 +__SYSCALL(__NR_lremovexattr, sys_lremovexattr) +#define __NR_fremovexattr 16 +__SYSCALL(__NR_fremovexattr, sys_fremovexattr) + +/* fs/dcache.c */ +#define __NR_getcwd 17 +__SYSCALL(__NR_getcwd, sys_getcwd) + +/* fs/cookies.c */ +#define __NR_lookup_dcookie 18 +__SYSCALL(__NR_lookup_dcookie, sys_lookup_dcookie) + +/* fs/eventfd.c */ +#define __NR_eventfd2 19 +__SYSCALL(__NR_eventfd2, sys_eventfd2) + +/* fs/eventpoll.c */ +#define __NR_epoll_create1 20 +__SYSCALL(__NR_epoll_create1, sys_epoll_create1) +#define __NR_epoll_ctl 21 +__SYSCALL(__NR_epoll_ctl, sys_epoll_ctl) +#define __NR_epoll_pwait 22 +__SYSCALL(__NR_epoll_pwait, sys_epoll_pwait) + +/* fs/fcntl.c */ +#define __NR_dup 23 +__SYSCALL(__NR_dup, sys_dup) +#define __NR_dup3 24 +__SYSCALL(__NR_dup3, sys_dup3) +#define __NR3264_fcntl 25 +__SC_3264(__NR3264_fcntl, sys_fcntl64, sys_fcntl) + +/* fs/inotify_user.c */ +#define __NR_inotify_init1 26 +__SYSCALL(__NR_inotify_init1, sys_inotify_init1) +#define __NR_inotify_add_watch 27 +__SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch) +#define __NR_inotify_rm_watch 28 +__SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch) + +/* fs/ioctl.c */ +#define __NR_ioctl 29 +__SYSCALL(__NR_ioctl, sys_ioctl) + +/* fs/ioprio.c */ +#define __NR_ioprio_set 30 +__SYSCALL(__NR_ioprio_set, sys_ioprio_set) +#define __NR_ioprio_get 31 +__SYSCALL(__NR_ioprio_get, sys_ioprio_get) + +/* fs/locks.c */ +#define __NR_flock 32 +__SYSCALL(__NR_flock, sys_flock) + +/* fs/namei.c */ +#define __NR_mknodat 33 +__SYSCALL(__NR_mknodat, sys_mknodat) +#define __NR_mkdirat 34 +__SYSCALL(__NR_mkdirat, sys_mkdirat) +#define __NR_unlinkat 35 +__SYSCALL(__NR_unlinkat, sys_unlinkat) +#define __NR_symlinkat 36 +__SYSCALL(__NR_symlinkat, sys_symlinkat) +#define __NR_linkat 37 +__SYSCALL(__NR_linkat, sys_linkat) +#define __NR_renameat 38 +__SYSCALL(__NR_renameat, sys_renameat) + +/* fs/namespace.c */ +#define __NR_umount2 39 +__SYSCALL(__NR_umount2, sys_umount) +#define __NR_mount 40 +__SYSCALL(__NR_mount, sys_mount) +#define __NR_pivot_root 41 +__SYSCALL(__NR_pivot_root, sys_pivot_root) + +/* fs/nfsctl.c */ +#define __NR_nfsservctl 42 +__SYSCALL(__NR_nfsservctl, sys_nfsservctl) + +/* fs/open.c */ +#define __NR3264_statfs 43 +__SC_3264(__NR3264_statfs, sys_statfs64, sys_statfs) +#define __NR3264_fstatfs 44 +__SC_3264(__NR3264_fstatfs, sys_fstatfs64, sys_fstatfs) +#define __NR3264_truncate 45 +__SC_3264(__NR3264_truncate, sys_truncate64, sys_truncate) +#define __NR3264_ftruncate 46 +__SC_3264(__NR3264_ftruncate, sys_ftruncate64, sys_ftruncate) + +#define __NR_fallocate 47 +__SYSCALL(__NR_fallocate, sys_fallocate) +#define __NR_faccessat 48 +__SYSCALL(__NR_faccessat, sys_faccessat) +#define __NR_chdir 49 +__SYSCALL(__NR_chdir, sys_chdir) +#define __NR_fchdir 50 +__SYSCALL(__NR_fchdir, sys_fchdir) +#define __NR_chroot 51 +__SYSCALL(__NR_chroot, sys_chroot) +#define __NR_fchmod 52 +__SYSCALL(__NR_fchmod, sys_fchmod) +#define __NR_fchmodat 53 +__SYSCALL(__NR_fchmodat, sys_fchmodat) +#define __NR_fchownat 54 +__SYSCALL(__NR_fchownat, sys_fchownat) +#define __NR_fchown 55 +__SYSCALL(__NR_fchown, sys_fchown) +#define __NR_openat 56 +__SYSCALL(__NR_openat, sys_openat) +#define __NR_close 57 +__SYSCALL(__NR_close, sys_close) +#define __NR_vhangup 58 +__SYSCALL(__NR_vhangup, sys_vhangup) + +/* fs/pipe.c */ +#define __NR_pipe2 59 +__SYSCALL(__NR_pipe2, sys_pipe2) + +/* fs/quota.c */ +#define __NR_quotactl 60 +__SYSCALL(__NR_quotactl, sys_quotactl) + +/* fs/readdir.c */ +#define __NR_getdents64 61 +__SYSCALL(__NR_getdents64, sys_getdents64) + +/* fs/read_write.c */ +#define __NR3264_lseek 62 +__SC_3264(__NR3264_lseek, sys_llseek, sys_lseek) +#define __NR_read 63 +__SYSCALL(__NR_read, sys_read) +#define __NR_write 64 +__SYSCALL(__NR_write, sys_write) +#define __NR_readv 65 +__SYSCALL(__NR_readv, sys_readv) +#define __NR_writev 66 +__SYSCALL(__NR_writev, sys_writev) +#define __NR_pread64 67 +__SYSCALL(__NR_pread64, sys_pread64) +#define __NR_pwrite64 68 +__SYSCALL(__NR_pwrite64, sys_pwrite64) +#define __NR_preadv 69 +__SYSCALL(__NR_preadv, sys_preadv) +#define __NR_pwritev 70 +__SYSCALL(__NR_pwritev, sys_pwritev) + +/* fs/sendfile.c */ +#define __NR3264_sendfile 71 +__SC_3264(__NR3264_sendfile, sys_sendfile64, sys_sendfile) + +/* fs/select.c */ +#define __NR_pselect6 72 +__SYSCALL(__NR_pselect6, sys_pselect6) +#define __NR_ppoll 73 +__SYSCALL(__NR_ppoll, sys_ppoll) + +/* fs/signalfd.c */ +#define __NR_signalfd4 74 +__SYSCALL(__NR_signalfd4, sys_signalfd4) + +/* fs/splice.c */ +#define __NR_vmsplice 75 +__SYSCALL(__NR_vmsplice, sys_vmsplice) +#define __NR_splice 76 +__SYSCALL(__NR_splice, sys_splice) +#define __NR_tee 77 +__SYSCALL(__NR_tee, sys_tee) + +/* fs/stat.c */ +#define __NR_readlinkat 78 +__SYSCALL(__NR_readlinkat, sys_readlinkat) +#define __NR3264_fstatat 79 +__SC_3264(__NR3264_fstatat, sys_fstatat64, sys_newfstatat) +#define __NR3264_fstat 80 +__SC_3264(__NR3264_fstat, sys_fstat64, sys_newfstat) + +/* fs/sync.c */ +#define __NR_sync 81 +__SYSCALL(__NR_sync, sys_sync) +#define __NR_fsync 82 +__SYSCALL(__NR_fsync, sys_fsync) +#define __NR_fdatasync 83 +__SYSCALL(__NR_fdatasync, sys_fdatasync) +#define __NR_sync_file_range 84 +__SYSCALL(__NR_sync_file_range, sys_sync_file_range) /* .long sys_sync_file_range2, */ + +/* fs/timerfd.c */ +#define __NR_timerfd_create 85 +__SYSCALL(__NR_timerfd_create, sys_timerfd_create) +#define __NR_timerfd_settime 86 +__SYSCALL(__NR_timerfd_settime, sys_timerfd_settime) +#define __NR_timerfd_gettime 87 +__SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime) + +/* fs/utimes.c */ +#define __NR_utimensat 88 +__SYSCALL(__NR_utimensat, sys_utimensat) + +/* kernel/acct.c */ +#define __NR_acct 89 +__SYSCALL(__NR_acct, sys_acct) + +/* kernel/capability.c */ +#define __NR_capget 90 +__SYSCALL(__NR_capget, sys_capget) +#define __NR_capset 91 +__SYSCALL(__NR_capset, sys_capset) + +/* kernel/exec_domain.c */ +#define __NR_personality 92 +__SYSCALL(__NR_personality, sys_personality) + +/* kernel/exit.c */ +#define __NR_exit 93 +__SYSCALL(__NR_exit, sys_exit) +#define __NR_exit_group 94 +__SYSCALL(__NR_exit_group, sys_exit_group) +#define __NR_waitid 95 +__SYSCALL(__NR_waitid, sys_waitid) + +/* kernel/fork.c */ +#define __NR_set_tid_address 96 +__SYSCALL(__NR_set_tid_address, sys_set_tid_address) +#define __NR_unshare 97 +__SYSCALL(__NR_unshare, sys_unshare) + +/* kernel/futex.c */ +#define __NR_futex 98 +__SYSCALL(__NR_futex, sys_futex) +#define __NR_set_robust_list 99 +__SYSCALL(__NR_set_robust_list, sys_set_robust_list) +#define __NR_get_robust_list 100 +__SYSCALL(__NR_get_robust_list, sys_get_robust_list) + +/* kernel/hrtimer.c */ +#define __NR_nanosleep 101 +__SYSCALL(__NR_nanosleep, sys_nanosleep) + +/* kernel/itimer.c */ +#define __NR_getitimer 102 +__SYSCALL(__NR_getitimer, sys_getitimer) +#define __NR_setitimer 103 +__SYSCALL(__NR_setitimer, sys_setitimer) + +/* kernel/kexec.c */ +#define __NR_kexec_load 104 +__SYSCALL(__NR_kexec_load, sys_kexec_load) + +/* kernel/module.c */ +#define __NR_init_module 105 +__SYSCALL(__NR_init_module, sys_init_module) +#define __NR_delete_module 106 +__SYSCALL(__NR_delete_module, sys_delete_module) + +/* kernel/posix-timers.c */ +#define __NR_timer_create 107 +__SYSCALL(__NR_timer_create, sys_timer_create) +#define __NR_timer_gettime 108 +__SYSCALL(__NR_timer_gettime, sys_timer_gettime) +#define __NR_timer_getoverrun 109 +__SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun) +#define __NR_timer_settime 110 +__SYSCALL(__NR_timer_settime, sys_timer_settime) +#define __NR_timer_delete 111 +__SYSCALL(__NR_timer_delete, sys_timer_delete) +#define __NR_clock_settime 112 +__SYSCALL(__NR_clock_settime, sys_clock_settime) +#define __NR_clock_gettime 113 +__SYSCALL(__NR_clock_gettime, sys_clock_gettime) +#define __NR_clock_getres 114 +__SYSCALL(__NR_clock_getres, sys_clock_getres) +#define __NR_clock_nanosleep 115 +__SYSCALL(__NR_clock_nanosleep, sys_clock_nanosleep) + +/* kernel/printk.c */ +#define __NR_syslog 116 +__SYSCALL(__NR_syslog, sys_syslog) + +/* kernel/ptrace.c */ +#define __NR_ptrace 117 +__SYSCALL(__NR_ptrace, sys_ptrace) + +/* kernel/sched.c */ +#define __NR_sched_setparam 118 +__SYSCALL(__NR_sched_setparam, sys_sched_setparam) +#define __NR_sched_setscheduler 119 +__SYSCALL(__NR_sched_setscheduler, sys_sched_setscheduler) +#define __NR_sched_getscheduler 120 +__SYSCALL(__NR_sched_getscheduler, sys_sched_getscheduler) +#define __NR_sched_getparam 121 +__SYSCALL(__NR_sched_getparam, sys_sched_getparam) +#define __NR_sched_setaffinity 122 +__SYSCALL(__NR_sched_setaffinity, sys_sched_setaffinity) +#define __NR_sched_getaffinity 123 +__SYSCALL(__NR_sched_getaffinity, sys_sched_getaffinity) +#define __NR_sched_yield 124 +__SYSCALL(__NR_sched_yield, sys_sched_yield) +#define __NR_sched_get_priority_max 125 +__SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max) +#define __NR_sched_get_priority_min 126 +__SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min) +#define __NR_sched_rr_get_interval 127 +__SYSCALL(__NR_sched_rr_get_interval, sys_sched_rr_get_interval) + +/* kernel/signal.c */ +#define __NR_restart_syscall 128 +__SYSCALL(__NR_restart_syscall, sys_restart_syscall) +#define __NR_kill 129 +__SYSCALL(__NR_kill, sys_kill) +#define __NR_tkill 130 +__SYSCALL(__NR_tkill, sys_tkill) +#define __NR_tgkill 131 +__SYSCALL(__NR_tgkill, sys_tgkill) +#define __NR_sigaltstack 132 +__SYSCALL(__NR_sigaltstack, sys_sigaltstack) +#define __NR_rt_sigsuspend 133 +__SYSCALL(__NR_rt_sigsuspend, sys_rt_sigsuspend) /* __ARCH_WANT_SYS_RT_SIGSUSPEND */ +#define __NR_rt_sigaction 134 +__SYSCALL(__NR_rt_sigaction, sys_rt_sigaction) /* __ARCH_WANT_SYS_RT_SIGACTION */ +#define __NR_rt_sigprocmask 135 +__SYSCALL(__NR_rt_sigprocmask, sys_rt_sigprocmask) +#define __NR_rt_sigpending 136 +__SYSCALL(__NR_rt_sigpending, sys_rt_sigpending) +#define __NR_rt_sigtimedwait 137 +__SYSCALL(__NR_rt_sigtimedwait, sys_rt_sigtimedwait) +#define __NR_rt_sigqueueinfo 138 +__SYSCALL(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo) +#define __NR_rt_sigreturn 139 +__SYSCALL(__NR_rt_sigreturn, sys_rt_sigreturn) /* sys_rt_sigreturn_wrapper, */ + +/* kernel/sys.c */ +#define __NR_setpriority 140 +__SYSCALL(__NR_setpriority, sys_setpriority) +#define __NR_getpriority 141 +__SYSCALL(__NR_getpriority, sys_getpriority) +#define __NR_reboot 142 +__SYSCALL(__NR_reboot, sys_reboot) +#define __NR_setregid 143 +__SYSCALL(__NR_setregid, sys_setregid) +#define __NR_setgid 144 +__SYSCALL(__NR_setgid, sys_setgid) +#define __NR_setreuid 145 +__SYSCALL(__NR_setreuid, sys_setreuid) +#define __NR_setuid 146 +__SYSCALL(__NR_setuid, sys_setuid) +#define __NR_setresuid 147 +__SYSCALL(__NR_setresuid, sys_setresuid) +#define __NR_getresuid 148 +__SYSCALL(__NR_getresuid, sys_getresuid) +#define __NR_setresgid 149 +__SYSCALL(__NR_setresgid, sys_setresgid) +#define __NR_getresgid 150 +__SYSCALL(__NR_getresgid, sys_getresgid) +#define __NR_setfsuid 151 +__SYSCALL(__NR_setfsuid, sys_setfsuid) +#define __NR_setfsgid 152 +__SYSCALL(__NR_setfsgid, sys_setfsgid) +#define __NR_times 153 +__SYSCALL(__NR_times, sys_times) +#define __NR_setpgid 154 +__SYSCALL(__NR_setpgid, sys_setpgid) +#define __NR_getpgid 155 +__SYSCALL(__NR_getpgid, sys_getpgid) +#define __NR_getsid 156 +__SYSCALL(__NR_getsid, sys_getsid) +#define __NR_setsid 157 +__SYSCALL(__NR_setsid, sys_setsid) +#define __NR_getgroups 158 +__SYSCALL(__NR_getgroups, sys_getgroups) +#define __NR_setgroups 159 +__SYSCALL(__NR_setgroups, sys_setgroups) +#define __NR_uname 160 +__SYSCALL(__NR_uname, sys_newuname) +#define __NR_sethostname 161 +__SYSCALL(__NR_sethostname, sys_sethostname) +#define __NR_setdomainname 162 +__SYSCALL(__NR_setdomainname, sys_setdomainname) +#define __NR_getrlimit 163 +__SYSCALL(__NR_getrlimit, sys_getrlimit) +#define __NR_setrlimit 164 +__SYSCALL(__NR_setrlimit, sys_setrlimit) +#define __NR_getrusage 165 +__SYSCALL(__NR_getrusage, sys_getrusage) +#define __NR_umask 166 +__SYSCALL(__NR_umask, sys_umask) +#define __NR_prctl 167 +__SYSCALL(__NR_prctl, sys_prctl) +#define __NR_getcpu 168 +__SYSCALL(__NR_getcpu, sys_getcpu) + +/* kernel/time.c */ +#define __NR_gettimeofday 169 +__SYSCALL(__NR_gettimeofday, sys_gettimeofday) +#define __NR_settimeofday 170 +__SYSCALL(__NR_settimeofday, sys_settimeofday) +#define __NR_adjtimex 171 +__SYSCALL(__NR_adjtimex, sys_adjtimex) + +/* kernel/timer.c */ +#define __NR_getpid 172 +__SYSCALL(__NR_getpid, sys_getpid) +#define __NR_getppid 173 +__SYSCALL(__NR_getppid, sys_getppid) +#define __NR_getuid 174 +__SYSCALL(__NR_getuid, sys_getuid) +#define __NR_geteuid 175 +__SYSCALL(__NR_geteuid, sys_geteuid) +#define __NR_getgid 176 +__SYSCALL(__NR_getgid, sys_getgid) +#define __NR_getegid 177 +__SYSCALL(__NR_getegid, sys_getegid) +#define __NR_gettid 178 +__SYSCALL(__NR_gettid, sys_gettid) +#define __NR_sysinfo 179 +__SYSCALL(__NR_sysinfo, sys_sysinfo) + +/* ipc/mqueue.c */ +#define __NR_mq_open 180 +__SYSCALL(__NR_mq_open, sys_mq_open) +#define __NR_mq_unlink 181 +__SYSCALL(__NR_mq_unlink, sys_mq_unlink) +#define __NR_mq_timedsend 182 +__SYSCALL(__NR_mq_timedsend, sys_mq_timedsend) +#define __NR_mq_timedreceive 183 +__SYSCALL(__NR_mq_timedreceive, sys_mq_timedreceive) +#define __NR_mq_notify 184 +__SYSCALL(__NR_mq_notify, sys_mq_notify) +#define __NR_mq_getsetattr 185 +__SYSCALL(__NR_mq_getsetattr, sys_mq_getsetattr) + +/* ipc/msg.c */ +#define __NR_msgget 186 +__SYSCALL(__NR_msgget, sys_msgget) +#define __NR_msgctl 187 +__SYSCALL(__NR_msgctl, sys_msgctl) +#define __NR_msgrcv 188 +__SYSCALL(__NR_msgrcv, sys_msgrcv) +#define __NR_msgsnd 189 +__SYSCALL(__NR_msgsnd, sys_msgsnd) + +/* ipc/sem.c */ +#define __NR_semget 190 +__SYSCALL(__NR_semget, sys_semget) +#define __NR_semctl 191 +__SYSCALL(__NR_semctl, sys_semctl) +#define __NR_semtimedop 192 +__SYSCALL(__NR_semtimedop, sys_semtimedop) +#define __NR_semop 193 +__SYSCALL(__NR_semop, sys_semop) + +/* ipc/shm.c */ +#define __NR_shmget 194 +__SYSCALL(__NR_shmget, sys_shmget) +#define __NR_shmctl 195 +__SYSCALL(__NR_shmctl, sys_shmctl) +#define __NR_shmat 196 +__SYSCALL(__NR_shmat, sys_shmat) +#define __NR_shmdt 197 +__SYSCALL(__NR_shmdt, sys_shmdt) + +/* net/socket.c */ +#define __NR_socket 198 +__SYSCALL(__NR_socket, sys_socket) +#define __NR_socketpair 199 +__SYSCALL(__NR_socketpair, sys_socketpair) +#define __NR_bind 200 +__SYSCALL(__NR_bind, sys_bind) +#define __NR_listen 201 +__SYSCALL(__NR_listen, sys_listen) +#define __NR_accept 202 +__SYSCALL(__NR_accept, sys_accept) +#define __NR_connect 203 +__SYSCALL(__NR_connect, sys_connect) +#define __NR_getsockname 204 +__SYSCALL(__NR_getsockname, sys_getsockname) +#define __NR_getpeername 205 +__SYSCALL(__NR_getpeername, sys_getpeername) +#define __NR_sendto 206 +__SYSCALL(__NR_sendto, sys_sendto) +#define __NR_recvfrom 207 +__SYSCALL(__NR_recvfrom, sys_recvfrom) +#define __NR_setsockopt 208 +__SYSCALL(__NR_setsockopt, sys_setsockopt) +#define __NR_getsockopt 209 +__SYSCALL(__NR_getsockopt, sys_getsockopt) +#define __NR_shutdown 210 +__SYSCALL(__NR_shutdown, sys_shutdown) +#define __NR_sendmsg 211 +__SYSCALL(__NR_sendmsg, sys_sendmsg) +#define __NR_recvmsg 212 +__SYSCALL(__NR_recvmsg, sys_recvmsg) + +/* mm/filemap.c */ +#define __NR_readahead 213 +__SYSCALL(__NR_readahead, sys_readahead) + +/* mm/nommu.c, also with MMU */ +#define __NR_brk 214 +__SYSCALL(__NR_brk, sys_brk) +#define __NR_munmap 215 +__SYSCALL(__NR_munmap, sys_munmap) +#define __NR_mremap 216 +__SYSCALL(__NR_mremap, sys_mremap) + +/* security/keys/keyctl.c */ +#define __NR_add_key 217 +__SYSCALL(__NR_add_key, sys_add_key) +#define __NR_request_key 218 +__SYSCALL(__NR_request_key, sys_request_key) +#define __NR_keyctl 219 +__SYSCALL(__NR_keyctl, sys_keyctl) + +/* arch/example/kernel/sys_example.c */ +#define __NR_clone 220 +__SYSCALL(__NR_clone, sys_clone) /* .long sys_clone_wrapper */ +#define __NR_execve 221 +__SYSCALL(__NR_execve, sys_execve) /* .long sys_execve_wrapper */ + +#define __NR3264_mmap 222 +__SC_3264(__NR3264_mmap, sys_mmap2, sys_mmap) +/* mm/fadvise.c */ +#define __NR3264_fadvise64 223 +__SC_3264(__NR3264_fadvise64, sys_fadvise64_64, sys_fadvise64) + +/* mm/, CONFIG_MMU only */ +#ifndef __ARCH_NOMMU +#define __NR_swapon 224 +__SYSCALL(__NR_swapon, sys_swapon) +#define __NR_swapoff 225 +__SYSCALL(__NR_swapoff, sys_swapoff) +#define __NR_mprotect 226 +__SYSCALL(__NR_mprotect, sys_mprotect) +#define __NR_msync 227 +__SYSCALL(__NR_msync, sys_msync) +#define __NR_mlock 228 +__SYSCALL(__NR_mlock, sys_mlock) +#define __NR_munlock 229 +__SYSCALL(__NR_munlock, sys_munlock) +#define __NR_mlockall 230 +__SYSCALL(__NR_mlockall, sys_mlockall) +#define __NR_munlockall 231 +__SYSCALL(__NR_munlockall, sys_munlockall) +#define __NR_mincore 232 +__SYSCALL(__NR_mincore, sys_mincore) +#define __NR_madvise 233 +__SYSCALL(__NR_madvise, sys_madvise) +#define __NR_remap_file_pages 234 +__SYSCALL(__NR_remap_file_pages, sys_remap_file_pages) +#define __NR_mbind 235 +__SYSCALL(__NR_mbind, sys_mbind) +#define __NR_get_mempolicy 236 +__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy) +#define __NR_set_mempolicy 237 +__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy) +#define __NR_migrate_pages 238 +__SYSCALL(__NR_migrate_pages, sys_migrate_pages) +#define __NR_move_pages 239 +__SYSCALL(__NR_move_pages, sys_move_pages) +#endif + +#undef __NR_syscalls +#define __NR_syscalls 240 + +/* + * All syscalls below here should go away really, + * these are provided for both review and as a porting + * help for the C library version. +* + * Last chance: are any of these important enought to + * enable by default? + */ +#ifdef __ARCH_WANT_SYSCALL_NO_AT +#define __NR_open 1024 +__SYSCALL(__NR_open, sys_open) +#define __NR_link 1025 +__SYSCALL(__NR_link, sys_link) +#define __NR_unlink 1026 +__SYSCALL(__NR_unlink, sys_unlink) +#define __NR_mknod 1027 +__SYSCALL(__NR_mknod, sys_mknod) +#define __NR_chmod 1028 +__SYSCALL(__NR_chmod, sys_chmod) +#define __NR_chown 1029 +__SYSCALL(__NR_chown, sys_chown) +#define __NR_mkdir 1030 +__SYSCALL(__NR_mkdir, sys_mkdir) +#define __NR_rmdir 1031 +__SYSCALL(__NR_rmdir, sys_rmdir) +#define __NR_lchown 1032 +__SYSCALL(__NR_lchown, sys_lchown) +#define __NR_access 1033 +__SYSCALL(__NR_access, sys_access) +#define __NR_rename 1034 +__SYSCALL(__NR_rename, sys_rename) +#define __NR_readlink 1035 +__SYSCALL(__NR_readlink, sys_readlink) +#define __NR_symlink 1036 +__SYSCALL(__NR_symlink, sys_symlink) +#define __NR_utimes 1037 +__SYSCALL(__NR_utimes, sys_utimes) +#define __NR3264_stat 1038 +__SC_3264(__NR3264_stat, sys_stat64, sys_newstat) +#define __NR3264_lstat 1039 +__SC_3264(__NR3264_lstat, sys_lstat64, sys_newlstat) + +#undef __NR_syscalls +#define __NR_syscalls (__NR3264_lstat+1) +#endif /* __ARCH_WANT_SYSCALL_NO_AT */ + +#ifdef __ARCH_WANT_SYSCALL_NO_FLAGS +#define __NR_pipe 1040 +__SYSCALL(__NR_pipe, sys_pipe) +#define __NR_dup2 1041 +__SYSCALL(__NR_dup2, sys_dup2) +#define __NR_epoll_create 1042 +__SYSCALL(__NR_epoll_create, sys_epoll_create) +#define __NR_inotify_init 1043 +__SYSCALL(__NR_inotify_init, sys_inotify_init) +#define __NR_eventfd 1044 +__SYSCALL(__NR_eventfd, sys_eventfd) +#define __NR_signalfd 1045 +__SYSCALL(__NR_signalfd, sys_signalfd) + +#undef __NR_syscalls +#define __NR_syscalls (__NR_signalfd+1) +#endif /* __ARCH_WANT_SYSCALL_NO_FLAGS */ + +#if __BITS_PER_LONG == 32 && defined(__ARCH_WANT_SYSCALL_OFF_T) +#define __NR_sendfile 1046 +__SYSCALL(__NR_sendfile, sys_sendfile) +#define __NR_ftruncate 1047 +__SYSCALL(__NR_ftruncate, sys_ftruncate) +#define __NR_truncate 1048 +__SYSCALL(__NR_truncate, sys_truncate) +#define __NR_stat 1049 +__SYSCALL(__NR_stat, sys_newstat) +#define __NR_lstat 1050 +__SYSCALL(__NR_lstat, sys_newlstat) +#define __NR_fstat 1051 +__SYSCALL(__NR_fstat, sys_newfstat) +#define __NR_fcntl 1052 +__SYSCALL(__NR_fcntl, sys_fcntl) +#define __NR_fadvise64 1053 +#define __ARCH_WANT_SYS_FADVISE64 +__SYSCALL(__NR_fadvise64, sys_fadvise64) +#define __NR_newfstatat 1054 +#define __ARCH_WANT_SYS_NEWFSTATAT +__SYSCALL(__NR_newfstatat, sys_newfstatat) +#define __NR_fstatfs 1055 +__SYSCALL(__NR_fstatfs, sys_fstatfs) +#define __NR_statfs 1056 +__SYSCALL(__NR_statfs, sys_statfs) +#define __NR_lseek 1057 +__SYSCALL(__NR_lseek, sys_lseek) +#define __NR_mmap 1058 +__SYSCALL(__NR_mmap, sys_mmap) + +#undef __NR_syscalls +#define __NR_syscalls (__NR_mmap+1) +#endif /* 32 bit off_t syscalls */ + +#ifdef __ARCH_WANT_SYSCALL_DEPRECATED +#define __NR_alarm 1059 +#define __ARCH_WANT_SYS_ALARM +__SYSCALL(__NR_alarm, sys_alarm) +#define __NR_getpgrp 1060 +#define __ARCH_WANT_SYS_GETPGRP +__SYSCALL(__NR_getpgrp, sys_getpgrp) +#define __NR_pause 1061 +#define __ARCH_WANT_SYS_PAUSE +__SYSCALL(__NR_pause, sys_pause) +#define __NR_time 1062 +#define __ARCH_WANT_SYS_TIME +__SYSCALL(__NR_time, sys_time) +#define __NR_utime 1063 +#define __ARCH_WANT_SYS_UTIME +__SYSCALL(__NR_utime, sys_utime) + +#define __NR_creat 1064 +__SYSCALL(__NR_creat, sys_creat) +#define __NR_getdents 1065 +#define __ARCH_WANT_SYS_GETDENTS +__SYSCALL(__NR_getdents, sys_getdents) +#define __NR_futimesat 1066 +__SYSCALL(__NR_futimesat, sys_futimesat) +#define __NR_select 1067 +#define __ARCH_WANT_SYS_SELECT +__SYSCALL(__NR_select, sys_select) +#define __NR_poll 1068 +__SYSCALL(__NR_poll, sys_poll) +#define __NR_epoll_wait 1069 +__SYSCALL(__NR_epoll_wait, sys_epoll_wait) +#define __NR_ustat 1070 +__SYSCALL(__NR_ustat, sys_ustat) +#define __NR_vfork 1071 +__SYSCALL(__NR_vfork, sys_vfork) +#define __NR_wait4 1072 +__SYSCALL(__NR_wait4, sys_wait4) +#define __NR_recv 1073 +__SYSCALL(__NR_recv, sys_recv) +#define __NR_send 1074 +__SYSCALL(__NR_send, sys_send) +#define __NR_bdflush 1075 +__SYSCALL(__NR_bdflush, sys_bdflush) +#define __NR_umount 1076 +__SYSCALL(__NR_umount, sys_oldumount) +#define __ARCH_WANT_SYS_OLDUMOUNT +#define __NR_uselib 1077 +__SYSCALL(__NR_uselib, sys_uselib) +#define __NR__sysctl 1078 +__SYSCALL(__NR__sysctl, sys_sysctl) + +#define __NR_fork 1079 +#ifdef CONFIG_MMU +__SYSCALL(__NR_fork, sys_fork) +#else +__SYSCALL(__NR_fork, sys_ni_syscall) +#endif /* CONFIG_MMU */ + +#undef __NR_syscalls +#define __NR_syscalls (__NR_fork+1) + +#endif /* __ARCH_WANT_SYSCALL_DEPRECATED */ + +/* + * 32 bit systems traditionally used different + * syscalls for off_t and loff_t arguments, while + * 64 bit systems only need the off_t version. + * For new 32 bit platforms, there is no need to + * implement the old 32 bit off_t syscalls, so + * they take different names. + * Here we map the numbers so that both versions + * use the same syscall table layout. + */ +#if __BITS_PER_LONG == 64 +#define __NR_fcntl __NR3264_fcntl +#define __NR_statfs __NR3264_statfs +#define __NR_fstatfs __NR3264_fstatfs +#define __NR_truncate __NR3264_truncate +#define __NR_ftruncate __NR3264_truncate +#define __NR_lseek __NR3264_lseek +#define __NR_sendfile __NR3264_sendfile +#define __NR_newfstatat __NR3264_fstatat +#define __NR_fstat __NR3264_fstat +#define __NR_mmap __NR3264_mmap +#define __NR_fadvise64 __NR3264_fadvise64 +#ifdef __NR3264_stat +#define __NR_stat __NR3264_stat +#define __NR_lstat __NR3264_lstat +#endif +#else +#define __NR_fcntl64 __NR3264_fcntl +#define __NR_statfs64 __NR3264_statfs +#define __NR_fstatfs64 __NR3264_fstatfs +#define __NR_truncate64 __NR3264_truncate +#define __NR_ftruncate64 __NR3264_truncate +#define __NR_llseek __NR3264_lseek +#define __NR_sendfile64 __NR3264_sendfile +#define __NR_fstatat64 __NR3264_fstatat +#define __NR_fstat64 __NR3264_fstat +#define __NR_mmap2 __NR3264_mmap +#define __NR_fadvise64_64 __NR3264_fadvise64 +#ifdef __NR3264_stat +#define __NR_stat64 __NR3264_stat +#define __NR_lstat64 __NR3264_lstat +#endif +#endif + +#ifdef __KERNEL__ + +/* + * These are required system calls, we should + * invert the logic eventually and let them + * be selected by default. + */ +#if __BITS_PER_LONG == 32 +#define __ARCH_WANT_STAT64 +#define __ARCH_WANT_SYS_LLSEEK +#endif +#define __ARCH_WANT_SYS_RT_SIGACTION +#define __ARCH_WANT_SYS_RT_SIGSUSPEND + +/* + * "Conditional" syscalls + * + * What we want is __attribute__((weak,alias("sys_ni_syscall"))), + * but it doesn't work on all toolchains, so we just do it by hand + */ +#ifndef cond_syscall +#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall") +#endif + +#endif /* __KERNEL__ */ +#endif /* _ASM_GENERIC_UNISTD_H */ diff --git a/scripts/checksyscalls.sh b/scripts/checksyscalls.sh index 60d00d1c4ee..66ad375612f 100755 --- a/scripts/checksyscalls.sh +++ b/scripts/checksyscalls.sh @@ -14,6 +14,57 @@ cat << EOF #include #include +/* *at */ +#define __IGNORE_open /* openat */ +#define __IGNORE_link /* linkat */ +#define __IGNORE_unlink /* unlinkat */ +#define __IGNORE_mknod /* mknodat */ +#define __IGNORE_chmod /* fchmodat */ +#define __IGNORE_chown /* fchownat */ +#define __IGNORE_mkdir /* mkdirat */ +#define __IGNORE_rmdir /* unlinkat */ +#define __IGNORE_lchown /* fchownat */ +#define __IGNORE_access /* faccessat */ +#define __IGNORE_rename /* renameat */ +#define __IGNORE_readlink /* readlinkat */ +#define __IGNORE_symlink /* symlinkat */ +#define __IGNORE_utimes /* futimesat */ +#if BITS_PER_LONG == 64 +#define __IGNORE_stat /* fstatat */ +#define __IGNORE_lstat /* fstatat */ +#else +#define __IGNORE_stat64 /* fstatat64 */ +#define __IGNORE_lstat64 /* fstatat64 */ +#endif + +/* CLOEXEC flag */ +#define __IGNORE_pipe /* pipe2 */ +#define __IGNORE_dup2 /* dup3 */ +#define __IGNORE_epoll_create /* epoll_create1 */ +#define __IGNORE_inotify_init /* inotify_init1 */ +#define __IGNORE_eventfd /* eventfd2 */ +#define __IGNORE_signalfd /* signalfd4 */ + +/* MMU */ +#ifndef CONFIG_MMU +#define __IGNORE_madvise +#define __IGNORE_mbind +#define __IGNORE_mincore +#define __IGNORE_mlock +#define __IGNORE_mlockall +#define __IGNORE_munlock +#define __IGNORE_munlockall +#define __IGNORE_mprotect +#define __IGNORE_msync +#define __IGNORE_migrate_pages +#define __IGNORE_move_pages +#define __IGNORE_remap_file_pages +#define __IGNORE_get_mempolicy +#define __IGNORE_set_mempolicy +#define __IGNORE_swapoff +#define __IGNORE_swapon +#endif + /* System calls for 32-bit kernels only */ #if BITS_PER_LONG == 64 #define __IGNORE_sendfile64 @@ -27,6 +78,22 @@ cat << EOF #define __IGNORE_fstatat64 #define __IGNORE_fstatfs64 #define __IGNORE_statfs64 +#define __IGNORE_llseek +#define __IGNORE_mmap2 +#else +#define __IGNORE_sendfile +#define __IGNORE_ftruncate +#define __IGNORE_truncate +#define __IGNORE_stat +#define __IGNORE_lstat +#define __IGNORE_fstat +#define __IGNORE_fcntl +#define __IGNORE_fadvise64 +#define __IGNORE_newfstatat +#define __IGNORE_fstatfs +#define __IGNORE_statfs +#define __IGNORE_lseek +#define __IGNORE_mmap #endif /* i386-specific or historical system calls */ @@ -44,7 +111,6 @@ cat << EOF #define __IGNORE_idle #define __IGNORE_modify_ldt #define __IGNORE_ugetrlimit -#define __IGNORE_mmap2 #define __IGNORE_vm86 #define __IGNORE_vm86old #define __IGNORE_set_thread_area @@ -55,7 +121,6 @@ cat << EOF #define __IGNORE_oldlstat #define __IGNORE_oldolduname #define __IGNORE_olduname -#define __IGNORE_umount2 #define __IGNORE_umount #define __IGNORE_waitpid #define __IGNORE_stime @@ -75,9 +140,12 @@ cat << EOF #define __IGNORE__llseek #define __IGNORE__newselect #define __IGNORE_create_module -#define __IGNORE_delete_module #define __IGNORE_query_module #define __IGNORE_get_kernel_syms +#define __IGNORE_sysfs +#define __IGNORE_uselib +#define __IGNORE__sysctl + /* ... including the "new" 32-bit uid syscalls */ #define __IGNORE_lchown32 #define __IGNORE_getuid32 @@ -99,6 +167,24 @@ cat << EOF #define __IGNORE_setfsuid32 #define __IGNORE_setfsgid32 +/* these can be expressed using other calls */ +#define __IGNORE_alarm /* setitimer */ +#define __IGNORE_creat /* open */ +#define __IGNORE_fork /* clone */ +#define __IGNORE_futimesat /* utimensat */ +#define __IGNORE_getpgrp /* getpgid */ +#define __IGNORE_getdents /* getdents64 */ +#define __IGNORE_pause /* sigsuspend */ +#define __IGNORE_poll /* ppoll */ +#define __IGNORE_select /* pselect6 */ +#define __IGNORE_epoll_wait /* epoll_pwait */ +#define __IGNORE_time /* gettimeofday */ +#define __IGNORE_uname /* newuname */ +#define __IGNORE_ustat /* statfs */ +#define __IGNORE_utime /* utimes */ +#define __IGNORE_vfork /* clone */ +#define __IGNORE_wait4 /* waitid */ + /* sync_file_range had a stupid ABI. Allow sync_file_range2 instead */ #ifdef __NR_sync_file_range2 #define __IGNORE_sync_file_range -- cgit v1.2.3-70-g09d2 From 72099ed2719fc5829bd79c6ca9d1783ed026eb37 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2009 22:56:29 +0000 Subject: asm-generic: rename atomic.h to atomic-long.h The existing asm-generic/atomic.h only defines the atomic_long type. This renames it to atomic-long.h so we have a place to add a truly generic atomic.h that can be used on all non-SMP systems. Signed-off-by: Remis Lima Baima Signed-off-by: Arnd Bergmann Acked-by: Ingo Molnar --- arch/alpha/include/asm/atomic.h | 2 +- arch/arm/include/asm/atomic.h | 2 +- arch/avr32/include/asm/atomic.h | 2 +- arch/blackfin/include/asm/atomic.h | 2 +- arch/cris/include/asm/atomic.h | 2 +- arch/frv/include/asm/atomic.h | 2 +- arch/h8300/include/asm/atomic.h | 2 +- arch/ia64/include/asm/atomic.h | 2 +- arch/m32r/include/asm/atomic.h | 2 +- arch/m68k/include/asm/atomic_mm.h | 2 +- arch/m68k/include/asm/atomic_no.h | 2 +- arch/microblaze/include/asm/atomic.h | 2 +- arch/mips/include/asm/atomic.h | 2 +- arch/mn10300/include/asm/atomic.h | 2 +- arch/parisc/include/asm/atomic.h | 2 +- arch/powerpc/include/asm/atomic.h | 2 +- arch/s390/include/asm/atomic.h | 2 +- arch/sh/include/asm/atomic.h | 2 +- arch/sparc/include/asm/atomic_32.h | 2 +- arch/sparc/include/asm/atomic_64.h | 2 +- arch/x86/include/asm/atomic_32.h | 2 +- arch/x86/include/asm/atomic_64.h | 2 +- arch/xtensa/include/asm/atomic.h | 2 +- include/asm-generic/atomic-long.h | 258 +++++++++++++++++++++++++++++++++++ include/asm-generic/atomic.h | 258 ----------------------------------- include/asm-generic/bitops/atomic.h | 1 + 26 files changed, 282 insertions(+), 281 deletions(-) create mode 100644 include/asm-generic/atomic-long.h delete mode 100644 include/asm-generic/atomic.h (limited to 'include') diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h index 62b363584b2..610dff44d94 100644 --- a/arch/alpha/include/asm/atomic.h +++ b/arch/alpha/include/asm/atomic.h @@ -256,5 +256,5 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u) #define smp_mb__before_atomic_inc() smp_mb() #define smp_mb__after_atomic_inc() smp_mb() -#include +#include #endif /* _ALPHA_ATOMIC_H */ diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h index 16b52f39798..9e07fe50702 100644 --- a/arch/arm/include/asm/atomic.h +++ b/arch/arm/include/asm/atomic.h @@ -249,6 +249,6 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) #define smp_mb__before_atomic_inc() smp_mb() #define smp_mb__after_atomic_inc() smp_mb() -#include +#include #endif #endif diff --git a/arch/avr32/include/asm/atomic.h b/arch/avr32/include/asm/atomic.h index 31881510774..b131c27ddf5 100644 --- a/arch/avr32/include/asm/atomic.h +++ b/arch/avr32/include/asm/atomic.h @@ -196,6 +196,6 @@ static inline int atomic_sub_if_positive(int i, atomic_t *v) #define smp_mb__before_atomic_inc() barrier() #define smp_mb__after_atomic_inc() barrier() -#include +#include #endif /* __ASM_AVR32_ATOMIC_H */ diff --git a/arch/blackfin/include/asm/atomic.h b/arch/blackfin/include/asm/atomic.h index 94b2a9b1945..7bbf44e4ddf 100644 --- a/arch/blackfin/include/asm/atomic.h +++ b/arch/blackfin/include/asm/atomic.h @@ -208,6 +208,6 @@ static inline void atomic_set_mask(unsigned int mask, atomic_t *v) #define atomic_sub_and_test(i,v) (atomic_sub_return((i), (v)) == 0) #define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0) -#include +#include #endif /* __ARCH_BLACKFIN_ATOMIC __ */ diff --git a/arch/cris/include/asm/atomic.h b/arch/cris/include/asm/atomic.h index 5718dd8902a..a6aca819e9f 100644 --- a/arch/cris/include/asm/atomic.h +++ b/arch/cris/include/asm/atomic.h @@ -158,5 +158,5 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) #define smp_mb__before_atomic_inc() barrier() #define smp_mb__after_atomic_inc() barrier() -#include +#include #endif diff --git a/arch/frv/include/asm/atomic.h b/arch/frv/include/asm/atomic.h index 296c35cfb20..0409d981fd3 100644 --- a/arch/frv/include/asm/atomic.h +++ b/arch/frv/include/asm/atomic.h @@ -194,5 +194,5 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u) #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) -#include +#include #endif /* _ASM_ATOMIC_H */ diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h index 833186c8dc3..33c8c0fa958 100644 --- a/arch/h8300/include/asm/atomic.h +++ b/arch/h8300/include/asm/atomic.h @@ -141,5 +141,5 @@ static __inline__ void atomic_set_mask(unsigned long mask, unsigned long *v) #define smp_mb__before_atomic_inc() barrier() #define smp_mb__after_atomic_inc() barrier() -#include +#include #endif /* __ARCH_H8300_ATOMIC __ */ diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h index d37292bd987..88405cb0832 100644 --- a/arch/ia64/include/asm/atomic.h +++ b/arch/ia64/include/asm/atomic.h @@ -216,5 +216,5 @@ atomic64_add_negative (__s64 i, atomic64_t *v) #define smp_mb__before_atomic_inc() barrier() #define smp_mb__after_atomic_inc() barrier() -#include +#include #endif /* _ASM_IA64_ATOMIC_H */ diff --git a/arch/m32r/include/asm/atomic.h b/arch/m32r/include/asm/atomic.h index 2eed30f8408..63f0cf0f50d 100644 --- a/arch/m32r/include/asm/atomic.h +++ b/arch/m32r/include/asm/atomic.h @@ -314,5 +314,5 @@ static __inline__ void atomic_set_mask(unsigned long mask, atomic_t *addr) #define smp_mb__before_atomic_inc() barrier() #define smp_mb__after_atomic_inc() barrier() -#include +#include #endif /* _ASM_M32R_ATOMIC_H */ diff --git a/arch/m68k/include/asm/atomic_mm.h b/arch/m68k/include/asm/atomic_mm.h index eb0ab9d4ee7..88b7af20a99 100644 --- a/arch/m68k/include/asm/atomic_mm.h +++ b/arch/m68k/include/asm/atomic_mm.h @@ -192,5 +192,5 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u) #define smp_mb__before_atomic_inc() barrier() #define smp_mb__after_atomic_inc() barrier() -#include +#include #endif /* __ARCH_M68K_ATOMIC __ */ diff --git a/arch/m68k/include/asm/atomic_no.h b/arch/m68k/include/asm/atomic_no.h index 6bb674855a3..5674cb9449b 100644 --- a/arch/m68k/include/asm/atomic_no.h +++ b/arch/m68k/include/asm/atomic_no.h @@ -151,5 +151,5 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u) #define atomic_dec_return(v) atomic_sub_return(1,(v)) #define atomic_inc_return(v) atomic_add_return(1,(v)) -#include +#include #endif /* __ARCH_M68KNOMMU_ATOMIC __ */ diff --git a/arch/microblaze/include/asm/atomic.h b/arch/microblaze/include/asm/atomic.h index a448d94ab72..0de612ad7cb 100644 --- a/arch/microblaze/include/asm/atomic.h +++ b/arch/microblaze/include/asm/atomic.h @@ -118,6 +118,6 @@ static inline int atomic_dec_if_positive(atomic_t *v) #define smp_mb__before_atomic_inc() barrier() #define smp_mb__after_atomic_inc() barrier() -#include +#include #endif /* _ASM_MICROBLAZE_ATOMIC_H */ diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index 1b332e15ab5..eb7f01cfd1a 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -793,6 +793,6 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u) #define smp_mb__before_atomic_inc() smp_llsc_mb() #define smp_mb__after_atomic_inc() smp_llsc_mb() -#include +#include #endif /* _ASM_ATOMIC_H */ diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h index bc064825f9b..5bf5be9566d 100644 --- a/arch/mn10300/include/asm/atomic.h +++ b/arch/mn10300/include/asm/atomic.h @@ -151,7 +151,7 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr) #define smp_mb__before_atomic_inc() barrier() #define smp_mb__after_atomic_inc() barrier() -#include +#include #endif /* __KERNEL__ */ #endif /* _ASM_ATOMIC_H */ diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h index ada3e5364d8..7eeaff94436 100644 --- a/arch/parisc/include/asm/atomic.h +++ b/arch/parisc/include/asm/atomic.h @@ -338,6 +338,6 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u) #endif /* CONFIG_64BIT */ -#include +#include #endif /* _ASM_PARISC_ATOMIC_H_ */ diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index b401950f525..b7d2d07b6f9 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -472,6 +472,6 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u) #endif /* __powerpc64__ */ -#include +#include #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_ATOMIC_H_ */ diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index de432f2de2d..fca9dffcc66 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -275,6 +275,6 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, #define smp_mb__before_atomic_inc() smp_mb() #define smp_mb__after_atomic_inc() smp_mb() -#include +#include #endif /* __KERNEL__ */ #endif /* __ARCH_S390_ATOMIC__ */ diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h index 6327ffbb199..a5647d0cd17 100644 --- a/arch/sh/include/asm/atomic.h +++ b/arch/sh/include/asm/atomic.h @@ -84,5 +84,5 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) #define smp_mb__before_atomic_inc() barrier() #define smp_mb__after_atomic_inc() barrier() -#include +#include #endif /* __ASM_SH_ATOMIC_H */ diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h index bb91b1248cd..f0d343c3b95 100644 --- a/arch/sparc/include/asm/atomic_32.h +++ b/arch/sparc/include/asm/atomic_32.h @@ -161,5 +161,5 @@ static inline int __atomic24_sub(int i, atomic24_t *v) #endif /* !(__KERNEL__) */ -#include +#include #endif /* !(__ARCH_SPARC_ATOMIC__) */ diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h index a0a70649269..f2e48009989 100644 --- a/arch/sparc/include/asm/atomic_64.h +++ b/arch/sparc/include/asm/atomic_64.h @@ -114,5 +114,5 @@ static inline int atomic64_add_unless(atomic64_t *v, long a, long u) #define smp_mb__before_atomic_inc() barrier() #define smp_mb__after_atomic_inc() barrier() -#include +#include #endif /* !(__ARCH_SPARC64_ATOMIC__) */ diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h index 85b46fba422..c83d3148608 100644 --- a/arch/x86/include/asm/atomic_32.h +++ b/arch/x86/include/asm/atomic_32.h @@ -247,5 +247,5 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) #define smp_mb__before_atomic_inc() barrier() #define smp_mb__after_atomic_inc() barrier() -#include +#include #endif /* _ASM_X86_ATOMIC_32_H */ diff --git a/arch/x86/include/asm/atomic_64.h b/arch/x86/include/asm/atomic_64.h index 8c21731984d..0d636022000 100644 --- a/arch/x86/include/asm/atomic_64.h +++ b/arch/x86/include/asm/atomic_64.h @@ -455,5 +455,5 @@ static inline void atomic_or_long(unsigned long *v1, unsigned long v2) #define smp_mb__before_atomic_inc() barrier() #define smp_mb__after_atomic_inc() barrier() -#include +#include #endif /* _ASM_X86_ATOMIC_64_H */ diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h index 67ad67bed8c..22d6dde4261 100644 --- a/arch/xtensa/include/asm/atomic.h +++ b/arch/xtensa/include/asm/atomic.h @@ -292,7 +292,7 @@ static inline void atomic_set_mask(unsigned int mask, atomic_t *v) #define smp_mb__before_atomic_inc() barrier() #define smp_mb__after_atomic_inc() barrier() -#include +#include #endif /* __KERNEL__ */ #endif /* _XTENSA_ATOMIC_H */ diff --git a/include/asm-generic/atomic-long.h b/include/asm-generic/atomic-long.h new file mode 100644 index 00000000000..76e27d66c05 --- /dev/null +++ b/include/asm-generic/atomic-long.h @@ -0,0 +1,258 @@ +#ifndef _ASM_GENERIC_ATOMIC_LONG_H +#define _ASM_GENERIC_ATOMIC_LONG_H +/* + * Copyright (C) 2005 Silicon Graphics, Inc. + * Christoph Lameter + * + * Allows to provide arch independent atomic definitions without the need to + * edit all arch specific atomic.h files. + */ + +#include + +/* + * Suppport for atomic_long_t + * + * Casts for parameters are avoided for existing atomic functions in order to + * avoid issues with cast-as-lval under gcc 4.x and other limitations that the + * macros of a platform may have. + */ + +#if BITS_PER_LONG == 64 + +typedef atomic64_t atomic_long_t; + +#define ATOMIC_LONG_INIT(i) ATOMIC64_INIT(i) + +static inline long atomic_long_read(atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return (long)atomic64_read(v); +} + +static inline void atomic_long_set(atomic_long_t *l, long i) +{ + atomic64_t *v = (atomic64_t *)l; + + atomic64_set(v, i); +} + +static inline void atomic_long_inc(atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + atomic64_inc(v); +} + +static inline void atomic_long_dec(atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + atomic64_dec(v); +} + +static inline void atomic_long_add(long i, atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + atomic64_add(i, v); +} + +static inline void atomic_long_sub(long i, atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + atomic64_sub(i, v); +} + +static inline int atomic_long_sub_and_test(long i, atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return atomic64_sub_and_test(i, v); +} + +static inline int atomic_long_dec_and_test(atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return atomic64_dec_and_test(v); +} + +static inline int atomic_long_inc_and_test(atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return atomic64_inc_and_test(v); +} + +static inline int atomic_long_add_negative(long i, atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return atomic64_add_negative(i, v); +} + +static inline long atomic_long_add_return(long i, atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return (long)atomic64_add_return(i, v); +} + +static inline long atomic_long_sub_return(long i, atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return (long)atomic64_sub_return(i, v); +} + +static inline long atomic_long_inc_return(atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return (long)atomic64_inc_return(v); +} + +static inline long atomic_long_dec_return(atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return (long)atomic64_dec_return(v); +} + +static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u) +{ + atomic64_t *v = (atomic64_t *)l; + + return (long)atomic64_add_unless(v, a, u); +} + +#define atomic_long_inc_not_zero(l) atomic64_inc_not_zero((atomic64_t *)(l)) + +#define atomic_long_cmpxchg(l, old, new) \ + (atomic64_cmpxchg((atomic64_t *)(l), (old), (new))) +#define atomic_long_xchg(v, new) \ + (atomic64_xchg((atomic64_t *)(l), (new))) + +#else /* BITS_PER_LONG == 64 */ + +typedef atomic_t atomic_long_t; + +#define ATOMIC_LONG_INIT(i) ATOMIC_INIT(i) +static inline long atomic_long_read(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return (long)atomic_read(v); +} + +static inline void atomic_long_set(atomic_long_t *l, long i) +{ + atomic_t *v = (atomic_t *)l; + + atomic_set(v, i); +} + +static inline void atomic_long_inc(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + atomic_inc(v); +} + +static inline void atomic_long_dec(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + atomic_dec(v); +} + +static inline void atomic_long_add(long i, atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + atomic_add(i, v); +} + +static inline void atomic_long_sub(long i, atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + atomic_sub(i, v); +} + +static inline int atomic_long_sub_and_test(long i, atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_sub_and_test(i, v); +} + +static inline int atomic_long_dec_and_test(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_dec_and_test(v); +} + +static inline int atomic_long_inc_and_test(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_inc_and_test(v); +} + +static inline int atomic_long_add_negative(long i, atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_add_negative(i, v); +} + +static inline long atomic_long_add_return(long i, atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return (long)atomic_add_return(i, v); +} + +static inline long atomic_long_sub_return(long i, atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return (long)atomic_sub_return(i, v); +} + +static inline long atomic_long_inc_return(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return (long)atomic_inc_return(v); +} + +static inline long atomic_long_dec_return(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return (long)atomic_dec_return(v); +} + +static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u) +{ + atomic_t *v = (atomic_t *)l; + + return (long)atomic_add_unless(v, a, u); +} + +#define atomic_long_inc_not_zero(l) atomic_inc_not_zero((atomic_t *)(l)) + +#define atomic_long_cmpxchg(l, old, new) \ + (atomic_cmpxchg((atomic_t *)(l), (old), (new))) +#define atomic_long_xchg(v, new) \ + (atomic_xchg((atomic_t *)(v), (new))) + +#endif /* BITS_PER_LONG == 64 */ + +#endif /* _ASM_GENERIC_ATOMIC_LONG_H */ diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h deleted file mode 100644 index 3673a13b670..00000000000 --- a/include/asm-generic/atomic.h +++ /dev/null @@ -1,258 +0,0 @@ -#ifndef _ASM_GENERIC_ATOMIC_H -#define _ASM_GENERIC_ATOMIC_H -/* - * Copyright (C) 2005 Silicon Graphics, Inc. - * Christoph Lameter - * - * Allows to provide arch independent atomic definitions without the need to - * edit all arch specific atomic.h files. - */ - -#include - -/* - * Suppport for atomic_long_t - * - * Casts for parameters are avoided for existing atomic functions in order to - * avoid issues with cast-as-lval under gcc 4.x and other limitations that the - * macros of a platform may have. - */ - -#if BITS_PER_LONG == 64 - -typedef atomic64_t atomic_long_t; - -#define ATOMIC_LONG_INIT(i) ATOMIC64_INIT(i) - -static inline long atomic_long_read(atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - return (long)atomic64_read(v); -} - -static inline void atomic_long_set(atomic_long_t *l, long i) -{ - atomic64_t *v = (atomic64_t *)l; - - atomic64_set(v, i); -} - -static inline void atomic_long_inc(atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - atomic64_inc(v); -} - -static inline void atomic_long_dec(atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - atomic64_dec(v); -} - -static inline void atomic_long_add(long i, atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - atomic64_add(i, v); -} - -static inline void atomic_long_sub(long i, atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - atomic64_sub(i, v); -} - -static inline int atomic_long_sub_and_test(long i, atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - return atomic64_sub_and_test(i, v); -} - -static inline int atomic_long_dec_and_test(atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - return atomic64_dec_and_test(v); -} - -static inline int atomic_long_inc_and_test(atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - return atomic64_inc_and_test(v); -} - -static inline int atomic_long_add_negative(long i, atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - return atomic64_add_negative(i, v); -} - -static inline long atomic_long_add_return(long i, atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - return (long)atomic64_add_return(i, v); -} - -static inline long atomic_long_sub_return(long i, atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - return (long)atomic64_sub_return(i, v); -} - -static inline long atomic_long_inc_return(atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - return (long)atomic64_inc_return(v); -} - -static inline long atomic_long_dec_return(atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - return (long)atomic64_dec_return(v); -} - -static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u) -{ - atomic64_t *v = (atomic64_t *)l; - - return (long)atomic64_add_unless(v, a, u); -} - -#define atomic_long_inc_not_zero(l) atomic64_inc_not_zero((atomic64_t *)(l)) - -#define atomic_long_cmpxchg(l, old, new) \ - (atomic64_cmpxchg((atomic64_t *)(l), (old), (new))) -#define atomic_long_xchg(v, new) \ - (atomic64_xchg((atomic64_t *)(l), (new))) - -#else /* BITS_PER_LONG == 64 */ - -typedef atomic_t atomic_long_t; - -#define ATOMIC_LONG_INIT(i) ATOMIC_INIT(i) -static inline long atomic_long_read(atomic_long_t *l) -{ - atomic_t *v = (atomic_t *)l; - - return (long)atomic_read(v); -} - -static inline void atomic_long_set(atomic_long_t *l, long i) -{ - atomic_t *v = (atomic_t *)l; - - atomic_set(v, i); -} - -static inline void atomic_long_inc(atomic_long_t *l) -{ - atomic_t *v = (atomic_t *)l; - - atomic_inc(v); -} - -static inline void atomic_long_dec(atomic_long_t *l) -{ - atomic_t *v = (atomic_t *)l; - - atomic_dec(v); -} - -static inline void atomic_long_add(long i, atomic_long_t *l) -{ - atomic_t *v = (atomic_t *)l; - - atomic_add(i, v); -} - -static inline void atomic_long_sub(long i, atomic_long_t *l) -{ - atomic_t *v = (atomic_t *)l; - - atomic_sub(i, v); -} - -static inline int atomic_long_sub_and_test(long i, atomic_long_t *l) -{ - atomic_t *v = (atomic_t *)l; - - return atomic_sub_and_test(i, v); -} - -static inline int atomic_long_dec_and_test(atomic_long_t *l) -{ - atomic_t *v = (atomic_t *)l; - - return atomic_dec_and_test(v); -} - -static inline int atomic_long_inc_and_test(atomic_long_t *l) -{ - atomic_t *v = (atomic_t *)l; - - return atomic_inc_and_test(v); -} - -static inline int atomic_long_add_negative(long i, atomic_long_t *l) -{ - atomic_t *v = (atomic_t *)l; - - return atomic_add_negative(i, v); -} - -static inline long atomic_long_add_return(long i, atomic_long_t *l) -{ - atomic_t *v = (atomic_t *)l; - - return (long)atomic_add_return(i, v); -} - -static inline long atomic_long_sub_return(long i, atomic_long_t *l) -{ - atomic_t *v = (atomic_t *)l; - - return (long)atomic_sub_return(i, v); -} - -static inline long atomic_long_inc_return(atomic_long_t *l) -{ - atomic_t *v = (atomic_t *)l; - - return (long)atomic_inc_return(v); -} - -static inline long atomic_long_dec_return(atomic_long_t *l) -{ - atomic_t *v = (atomic_t *)l; - - return (long)atomic_dec_return(v); -} - -static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u) -{ - atomic_t *v = (atomic_t *)l; - - return (long)atomic_add_unless(v, a, u); -} - -#define atomic_long_inc_not_zero(l) atomic_inc_not_zero((atomic_t *)(l)) - -#define atomic_long_cmpxchg(l, old, new) \ - (atomic_cmpxchg((atomic_t *)(l), (old), (new))) -#define atomic_long_xchg(v, new) \ - (atomic_xchg((atomic_t *)(v), (new))) - -#endif /* BITS_PER_LONG == 64 */ - -#endif /* _ASM_GENERIC_ATOMIC_H */ diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h index 4657f3e410f..c8946465e63 100644 --- a/include/asm-generic/bitops/atomic.h +++ b/include/asm-generic/bitops/atomic.h @@ -2,6 +2,7 @@ #define _ASM_GENERIC_BITOPS_ATOMIC_H_ #include +#include #ifdef CONFIG_SMP #include -- cgit v1.2.3-70-g09d2 From 5b17e1cd8928ae65932758ce6478ac6d3e9a86b2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2009 22:56:30 +0000 Subject: asm-generic: rename page.h and uaccess.h The current asm-generic/page.h only contains the get_order function, and asm-generic/uaccess.h only implements unaligned accesses. This renames the file to getorder.h and uaccess-unaligned.h to make room for new page.h and uaccess.h file that will be usable by all simple (e.g. nommu) architectures. Signed-off-by: Remis Lima Baima Signed-off-by: Arnd Bergmann --- arch/alpha/include/asm/page.h | 2 +- arch/arm/include/asm/page.h | 2 +- arch/blackfin/include/asm/page.h | 2 +- arch/cris/include/asm/page.h | 2 +- arch/frv/include/asm/page.h | 2 +- arch/h8300/include/asm/page.h | 2 +- arch/m32r/include/asm/page.h | 2 +- arch/m68k/include/asm/page_mm.h | 2 +- arch/m68k/include/asm/page_no.h | 2 +- arch/microblaze/include/asm/page.h | 2 +- arch/mips/include/asm/page.h | 2 +- arch/parisc/include/asm/page.h | 2 +- arch/parisc/include/asm/uaccess.h | 2 +- arch/powerpc/include/asm/page_32.h | 2 +- arch/powerpc/include/asm/page_64.h | 2 +- arch/s390/include/asm/page.h | 2 +- arch/sh/include/asm/page.h | 2 +- arch/sparc/include/asm/page_32.h | 2 +- arch/sparc/include/asm/page_64.h | 2 +- arch/sparc/include/asm/uaccess_64.h | 2 +- arch/um/include/asm/page.h | 2 +- arch/x86/include/asm/page.h | 2 +- arch/xtensa/include/asm/page.h | 2 +- include/asm-generic/getorder.h | 24 ++++++++++++++++++++++++ include/asm-generic/page.h | 24 ------------------------ include/asm-generic/uaccess-unaligned.h | 26 ++++++++++++++++++++++++++ include/asm-generic/uaccess.h | 26 -------------------------- 27 files changed, 73 insertions(+), 73 deletions(-) create mode 100644 include/asm-generic/getorder.h delete mode 100644 include/asm-generic/page.h create mode 100644 include/asm-generic/uaccess-unaligned.h delete mode 100644 include/asm-generic/uaccess.h (limited to 'include') diff --git a/arch/alpha/include/asm/page.h b/arch/alpha/include/asm/page.h index 0995f9d1341..07af062544f 100644 --- a/arch/alpha/include/asm/page.h +++ b/arch/alpha/include/asm/page.h @@ -93,6 +93,6 @@ typedef struct page *pgtable_t; VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) #include -#include +#include #endif /* _ALPHA_PAGE_H */ diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h index 7b522770f29..be962c1349c 100644 --- a/arch/arm/include/asm/page.h +++ b/arch/arm/include/asm/page.h @@ -202,6 +202,6 @@ typedef struct page *pgtable_t; (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) -#include +#include #endif diff --git a/arch/blackfin/include/asm/page.h b/arch/blackfin/include/asm/page.h index 344f6a8c1f2..3ea2016a1d4 100644 --- a/arch/blackfin/include/asm/page.h +++ b/arch/blackfin/include/asm/page.h @@ -81,7 +81,7 @@ extern unsigned long memory_end; #define virt_addr_valid(kaddr) (((void *)(kaddr) >= (void *)PAGE_OFFSET) && \ ((void *)(kaddr) < (void *)memory_end)) -#include +#include #endif /* __ASSEMBLY__ */ diff --git a/arch/cris/include/asm/page.h b/arch/cris/include/asm/page.h index f3fdbd09c34..be45ee366be 100644 --- a/arch/cris/include/asm/page.h +++ b/arch/cris/include/asm/page.h @@ -68,7 +68,7 @@ typedef struct page *pgtable_t; VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) #include -#include +#include #endif /* _CRIS_PAGE_H */ diff --git a/arch/frv/include/asm/page.h b/arch/frv/include/asm/page.h index bd9c220094c..25c6a500235 100644 --- a/arch/frv/include/asm/page.h +++ b/arch/frv/include/asm/page.h @@ -73,6 +73,6 @@ extern unsigned long max_pfn; #endif /* __ASSEMBLY__ */ #include -#include +#include #endif /* _ASM_PAGE_H */ diff --git a/arch/h8300/include/asm/page.h b/arch/h8300/include/asm/page.h index 0b6acf0b03a..837381a2df4 100644 --- a/arch/h8300/include/asm/page.h +++ b/arch/h8300/include/asm/page.h @@ -73,6 +73,6 @@ extern unsigned long memory_end; #endif /* __ASSEMBLY__ */ #include -#include +#include #endif /* _H8300_PAGE_H */ diff --git a/arch/m32r/include/asm/page.h b/arch/m32r/include/asm/page.h index c9333089fe1..11777f7a562 100644 --- a/arch/m32r/include/asm/page.h +++ b/arch/m32r/include/asm/page.h @@ -82,6 +82,6 @@ typedef struct page *pgtable_t; #define devmem_is_allowed(x) 1 #include -#include +#include #endif /* _ASM_M32R_PAGE_H */ diff --git a/arch/m68k/include/asm/page_mm.h b/arch/m68k/include/asm/page_mm.h index a34b8bad784..d009f3ea39a 100644 --- a/arch/m68k/include/asm/page_mm.h +++ b/arch/m68k/include/asm/page_mm.h @@ -223,6 +223,6 @@ static inline __attribute_const__ int __virt_to_node_shift(void) #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) -#include +#include #endif /* _M68K_PAGE_H */ diff --git a/arch/m68k/include/asm/page_no.h b/arch/m68k/include/asm/page_no.h index 3a1ede4544c..9aa3f90f485 100644 --- a/arch/m68k/include/asm/page_no.h +++ b/arch/m68k/include/asm/page_no.h @@ -72,6 +72,6 @@ extern unsigned long memory_end; #endif /* __ASSEMBLY__ */ -#include +#include #endif /* _M68KNOMMU_PAGE_H */ diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h index 7238dcfcc51..962c210e5b9 100644 --- a/arch/microblaze/include/asm/page.h +++ b/arch/microblaze/include/asm/page.h @@ -135,6 +135,6 @@ extern unsigned int memory_size; #endif /* __KERNEL__ */ #include -#include +#include #endif /* _ASM_MICROBLAZE_PAGE_H */ diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h index 9f946e4ca05..72c80d2034c 100644 --- a/arch/mips/include/asm/page.h +++ b/arch/mips/include/asm/page.h @@ -189,6 +189,6 @@ typedef struct { unsigned long pgprot; } pgprot_t; #define CAC_ADDR(addr) ((addr) - UNCAC_BASE + PAGE_OFFSET) #include -#include +#include #endif /* _ASM_PAGE_H */ diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h index 7bc5125d7d4..a84cc1f925f 100644 --- a/arch/parisc/include/asm/page.h +++ b/arch/parisc/include/asm/page.h @@ -159,6 +159,6 @@ extern int npmem_ranges; VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) #include -#include +#include #endif /* _PARISC_PAGE_H */ diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index cd4c0b2a8e7..7cf799d70b4 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -7,7 +7,7 @@ #include #include #include -#include +#include #define VERIFY_READ 0 #define VERIFY_WRITE 1 diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h index a0e3f6e6b4e..bd0849dbcaa 100644 --- a/arch/powerpc/include/asm/page_32.h +++ b/arch/powerpc/include/asm/page_32.h @@ -41,7 +41,7 @@ extern void clear_pages(void *page, int order); static inline void clear_page(void *page) { clear_pages(page, 0); } extern void copy_page(void *to, void *from); -#include +#include #define PGD_T_LOG2 (__builtin_ffs(sizeof(pgd_t)) - 1) #define PTE_T_LOG2 (__builtin_ffs(sizeof(pte_t)) - 1) diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h index 043bfdfe4f7..5817a3b747e 100644 --- a/arch/powerpc/include/asm/page_64.h +++ b/arch/powerpc/include/asm/page_64.h @@ -180,6 +180,6 @@ do { \ (test_thread_flag(TIF_32BIT) ? \ VM_STACK_DEFAULT_FLAGS32 : VM_STACK_DEFAULT_FLAGS64) -#include +#include #endif /* _ASM_POWERPC_PAGE_64_H */ diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 32e8f6aa438..3e3594d01f8 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -150,7 +150,7 @@ void arch_alloc_page(struct page *page, int order); VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) #include -#include +#include #define __HAVE_ARCH_GATE_AREA 1 diff --git a/arch/sh/include/asm/page.h b/arch/sh/include/asm/page.h index 9c6d21ec024..49592c780a6 100644 --- a/arch/sh/include/asm/page.h +++ b/arch/sh/include/asm/page.h @@ -163,7 +163,7 @@ typedef struct page *pgtable_t; VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) #include -#include +#include /* vDSO support */ #ifdef CONFIG_VSYSCALL diff --git a/arch/sparc/include/asm/page_32.h b/arch/sparc/include/asm/page_32.h index d1806edc095..f72080bdda9 100644 --- a/arch/sparc/include/asm/page_32.h +++ b/arch/sparc/include/asm/page_32.h @@ -152,6 +152,6 @@ extern unsigned long pfn_base; VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) #include -#include +#include #endif /* _SPARC_PAGE_H */ diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h index 4274ed13ddb..f0d09b40103 100644 --- a/arch/sparc/include/asm/page_64.h +++ b/arch/sparc/include/asm/page_64.h @@ -132,6 +132,6 @@ typedef struct page *pgtable_t; #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) -#include +#include #endif /* _SPARC64_PAGE_H */ diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h index c64e767a3e4..a38c0323891 100644 --- a/arch/sparc/include/asm/uaccess_64.h +++ b/arch/sparc/include/asm/uaccess_64.h @@ -12,7 +12,7 @@ #include #include #include -#include +#include #endif #ifndef __ASSEMBLY__ diff --git a/arch/um/include/asm/page.h b/arch/um/include/asm/page.h index 55f28a0bae6..4cc9b6cf480 100644 --- a/arch/um/include/asm/page.h +++ b/arch/um/include/asm/page.h @@ -116,7 +116,7 @@ extern unsigned long uml_physmem; #define virt_addr_valid(v) pfn_valid(phys_to_pfn(__pa(v))) #include -#include +#include #endif /* __ASSEMBLY__ */ #endif /* __UM_PAGE_H */ diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index 89ed9d70b0a..625c3f0e741 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -56,7 +56,7 @@ extern bool __virt_addr_valid(unsigned long kaddr); #endif /* __ASSEMBLY__ */ #include -#include +#include #define __HAVE_ARCH_GATE_AREA 1 diff --git a/arch/xtensa/include/asm/page.h b/arch/xtensa/include/asm/page.h index 17e0c5383b1..161bb89e98c 100644 --- a/arch/xtensa/include/asm/page.h +++ b/arch/xtensa/include/asm/page.h @@ -129,7 +129,7 @@ static inline __attribute_const__ int get_order(unsigned long size) #else -# include +# include #endif diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h new file mode 100644 index 00000000000..67e7245dc9b --- /dev/null +++ b/include/asm-generic/getorder.h @@ -0,0 +1,24 @@ +#ifndef __ASM_GENERIC_GETORDER_H +#define __ASM_GENERIC_GETORDER_H + +#ifndef __ASSEMBLY__ + +#include + +/* Pure 2^n version of get_order */ +static inline __attribute_const__ int get_order(unsigned long size) +{ + int order; + + size = (size - 1) >> (PAGE_SHIFT - 1); + order = -1; + do { + size >>= 1; + order++; + } while (size); + return order; +} + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_GENERIC_GETORDER_H */ diff --git a/include/asm-generic/page.h b/include/asm-generic/page.h deleted file mode 100644 index 14db733b8e6..00000000000 --- a/include/asm-generic/page.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef _ASM_GENERIC_PAGE_H -#define _ASM_GENERIC_PAGE_H - -#ifndef __ASSEMBLY__ - -#include - -/* Pure 2^n version of get_order */ -static __inline__ __attribute_const__ int get_order(unsigned long size) -{ - int order; - - size = (size - 1) >> (PAGE_SHIFT - 1); - order = -1; - do { - size >>= 1; - order++; - } while (size); - return order; -} - -#endif /* __ASSEMBLY__ */ - -#endif /* _ASM_GENERIC_PAGE_H */ diff --git a/include/asm-generic/uaccess-unaligned.h b/include/asm-generic/uaccess-unaligned.h new file mode 100644 index 00000000000..67deb898f0c --- /dev/null +++ b/include/asm-generic/uaccess-unaligned.h @@ -0,0 +1,26 @@ +#ifndef __ASM_GENERIC_UACCESS_UNALIGNED_H +#define __ASM_GENERIC_UACCESS_UNALIGNED_H + +/* + * This macro should be used instead of __get_user() when accessing + * values at locations that are not known to be aligned. + */ +#define __get_user_unaligned(x, ptr) \ +({ \ + __typeof__ (*(ptr)) __x; \ + __copy_from_user(&__x, (ptr), sizeof(*(ptr))) ? -EFAULT : 0; \ + (x) = __x; \ +}) + + +/* + * This macro should be used instead of __put_user() when accessing + * values at locations that are not known to be aligned. + */ +#define __put_user_unaligned(x, ptr) \ +({ \ + __typeof__ (*(ptr)) __x = (x); \ + __copy_to_user((ptr), &__x, sizeof(*(ptr))) ? -EFAULT : 0; \ +}) + +#endif /* __ASM_GENERIC_UACCESS_UNALIGNED_H */ diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h deleted file mode 100644 index 549cb3a1640..00000000000 --- a/include/asm-generic/uaccess.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef _ASM_GENERIC_UACCESS_H_ -#define _ASM_GENERIC_UACCESS_H_ - -/* - * This macro should be used instead of __get_user() when accessing - * values at locations that are not known to be aligned. - */ -#define __get_user_unaligned(x, ptr) \ -({ \ - __typeof__ (*(ptr)) __x; \ - __copy_from_user(&__x, (ptr), sizeof(*(ptr))) ? -EFAULT : 0; \ - (x) = __x; \ -}) - - -/* - * This macro should be used instead of __put_user() when accessing - * values at locations that are not known to be aligned. - */ -#define __put_user_unaligned(x, ptr) \ -({ \ - __typeof__ (*(ptr)) __x = (x); \ - __copy_to_user((ptr), &__x, sizeof(*(ptr))) ? -EFAULT : 0; \ -}) - -#endif /* _ASM_GENERIC_UACCESS_H */ -- cgit v1.2.3-70-g09d2 From 3aef392822e1a42e80077a332ea2efdfd8a4248a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 17 May 2009 21:00:05 +0000 Subject: asm-generic: make get_rtc_time overridable Evidently, set_rtc_time is supposed to be overridable by architectures that define their own version, but unfortunately, get_rtc_ss would in that case still use the generic version. This makes get_rtc_ss call the real set_rtc_time to let architectures define their own version. The change should fix the "Extended RTC operation" on Alpha, which uses the incorrect get_rtc_ss call. It also allows PowerPC to use the asm-generic/rtc.h file in the future. Cc: Richard Henderson Cc: linux-alpha@vger.kernel.org Cc: Tom Rini Cc: rtc-linux@googlegroups.com Cc: Alessandro Zummo Cc: Paul Gortmaker Signed-off-by: Arnd Bergmann --- include/asm-generic/rtc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/rtc.h b/include/asm-generic/rtc.h index 763e3b060f4..fa86f240c87 100644 --- a/include/asm-generic/rtc.h +++ b/include/asm-generic/rtc.h @@ -202,7 +202,7 @@ static inline unsigned int get_rtc_ss(void) { struct rtc_time h; - __get_rtc_time(&h); + get_rtc_time(&h); return h.tm_sec; } -- cgit v1.2.3-70-g09d2 From d7c4f1b78afeedfc22b1756fcdc1acbe84284d74 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2009 22:56:31 +0000 Subject: asm-generic: make pci.h usable directly Some generic code is using the horribly misnamed PCI_DMA_BUS_IS_PHYS from asm/pci.h. This makes sure that an architecture without PCI support does not have to define this itself but can rely on the asm-generic version. Signed-off-by: Remis Lima Baima Signed-off-by: Arnd Bergmann --- arch/frv/include/asm/pci.h | 13 +++---------- arch/m32r/include/asm/pci.h | 2 -- include/asm-generic/pci.h | 8 ++++++++ 3 files changed, 11 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/arch/frv/include/asm/pci.h b/arch/frv/include/asm/pci.h index 585d9b49949..3ce227ba774 100644 --- a/arch/frv/include/asm/pci.h +++ b/arch/frv/include/asm/pci.h @@ -10,8 +10,8 @@ * 2 of the License, or (at your option) any later version. */ -#ifndef ASM_PCI_H -#define ASM_PCI_H +#ifndef _ASM_FRV_PCI_H +#define _ASM_FRV_PCI_H #include #include @@ -43,12 +43,6 @@ extern void pci_free_consistent(struct pci_dev *hwdev, size_t size, /* Return the index of the PCI controller for device PDEV. */ #define pci_controller_num(PDEV) (0) -/* The PCI address space does equal the physical memory - * address space. The networking and block device layers use - * this boolean for bounce buffer decisions. - */ -#define PCI_DMA_BUS_IS_PHYS (1) - /* pci_unmap_{page,single} is a nop so... */ #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) #define DECLARE_PCI_UNMAP_LEN(LEN_NAME) @@ -114,5 +108,4 @@ static inline void pci_dma_sync_sg(struct pci_dev *hwdev, sg_dma_address(&sg[i])+sg_dma_len(&sg[i])); } - -#endif +#endif /* _ASM_FRV_PCI_H */ diff --git a/arch/m32r/include/asm/pci.h b/arch/m32r/include/asm/pci.h index fe785d167db..07d3834c6de 100644 --- a/arch/m32r/include/asm/pci.h +++ b/arch/m32r/include/asm/pci.h @@ -3,6 +3,4 @@ #include -#define PCI_DMA_BUS_IS_PHYS (1) - #endif /* _ASM_M32R_PCI_H */ diff --git a/include/asm-generic/pci.h b/include/asm-generic/pci.h index c36a77d3bf4..515c6e2e321 100644 --- a/include/asm-generic/pci.h +++ b/include/asm-generic/pci.h @@ -52,4 +52,12 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) } #endif /* HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ */ +/* + * By default, assume that no iommu is in use and that the PCI + * space is mapped to address physical 0. + */ +#ifndef PCI_DMA_BUS_IS_PHYS +#define PCI_DMA_BUS_IS_PHYS (1) #endif + +#endif /* _ASM_GENERIC_PCI_H */ -- cgit v1.2.3-70-g09d2 From 9858c60cc2d33b18367b2bc6947e3ea23db26ccb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2009 22:56:32 +0000 Subject: asm-generic: make bitops.h usable bitops.h apparently suffered from some level of bitrot, it was missing the smp_mb__{before,after}_clear_bit functions, and included other headers in an invalid order. This changes the file so that new architectures can use it out of the box. Signed-off-by: Remis Lima Baima Signed-off-by: Arnd Bergmann --- include/asm-generic/bitops.h | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/asm-generic/bitops.h b/include/asm-generic/bitops.h index c9f369c4bd7..a54f4421a24 100644 --- a/include/asm-generic/bitops.h +++ b/include/asm-generic/bitops.h @@ -1,19 +1,29 @@ -#ifndef _ASM_GENERIC_BITOPS_H_ -#define _ASM_GENERIC_BITOPS_H_ +#ifndef __ASM_GENERIC_BITOPS_H +#define __ASM_GENERIC_BITOPS_H /* * For the benefit of those who are trying to port Linux to another * architecture, here are some C-language equivalents. You should * recode these in the native assembly language, if at all possible. - * + * * C language equivalents written by Theodore Ts'o, 9/26/92 */ -#include -#include +#include +#include + +/* + * clear_bit may not imply a memory barrier + */ +#ifndef smp_mb__before_clear_bit +#define smp_mb__before_clear_bit() smp_mb() +#define smp_mb__after_clear_bit() smp_mb() +#endif + #include #include #include +#include #include #include @@ -26,8 +36,10 @@ #include #include +#include +#include #include #include #include -#endif /* _ASM_GENERIC_BITOPS_H */ +#endif /* __ASM_GENERIC_BITOPS_H */ -- cgit v1.2.3-70-g09d2 From aafe4dbed0bf6cbdb2e9f03e1d42f8a540d8541d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2009 22:56:33 +0000 Subject: asm-generic: add generic versions of common headers These are all kernel internal interfaces that get copied around a lot. In most cases, architectures can provide their own optimized versions, but these generic versions can work as well. I have tried to use the most common contents of each header to allow existing architectures to migrate easily. Thanks to Remis for suggesting a number of cleanups. Signed-off-by: Remis Lima Baima Signed-off-by: Arnd Bergmann --- include/asm-generic/bugs.h | 10 +++ include/asm-generic/current.h | 9 +++ include/asm-generic/delay.h | 9 +++ include/asm-generic/fb.h | 12 +++ include/asm-generic/hardirq.h | 34 ++++++++ include/asm-generic/irq.h | 18 +++++ include/asm-generic/irqflags.h | 72 +++++++++++++++++ include/asm-generic/kmap_types.h | 32 ++++++++ include/asm-generic/linkage.h | 8 ++ include/asm-generic/module.h | 22 ++++++ include/asm-generic/mutex.h | 9 +++ include/asm-generic/scatterlist.h | 43 ++++++++++ include/asm-generic/spinlock.h | 11 +++ include/asm-generic/string.h | 10 +++ include/asm-generic/syscalls.h | 60 ++++++++++++++ include/asm-generic/system.h | 161 ++++++++++++++++++++++++++++++++++++++ include/asm-generic/unaligned.h | 30 +++++++ include/asm-generic/user.h | 8 ++ 18 files changed, 558 insertions(+) create mode 100644 include/asm-generic/bugs.h create mode 100644 include/asm-generic/current.h create mode 100644 include/asm-generic/delay.h create mode 100644 include/asm-generic/fb.h create mode 100644 include/asm-generic/hardirq.h create mode 100644 include/asm-generic/irq.h create mode 100644 include/asm-generic/irqflags.h create mode 100644 include/asm-generic/kmap_types.h create mode 100644 include/asm-generic/linkage.h create mode 100644 include/asm-generic/module.h create mode 100644 include/asm-generic/mutex.h create mode 100644 include/asm-generic/scatterlist.h create mode 100644 include/asm-generic/spinlock.h create mode 100644 include/asm-generic/string.h create mode 100644 include/asm-generic/syscalls.h create mode 100644 include/asm-generic/system.h create mode 100644 include/asm-generic/unaligned.h create mode 100644 include/asm-generic/user.h (limited to 'include') diff --git a/include/asm-generic/bugs.h b/include/asm-generic/bugs.h new file mode 100644 index 00000000000..6c4f62ea714 --- /dev/null +++ b/include/asm-generic/bugs.h @@ -0,0 +1,10 @@ +#ifndef __ASM_GENERIC_BUGS_H +#define __ASM_GENERIC_BUGS_H +/* + * This file is included by 'init/main.c' to check for + * architecture-dependent bugs. + */ + +static inline void check_bugs(void) { } + +#endif /* __ASM_GENERIC_BUGS_H */ diff --git a/include/asm-generic/current.h b/include/asm-generic/current.h new file mode 100644 index 00000000000..5e86f6ae7ca --- /dev/null +++ b/include/asm-generic/current.h @@ -0,0 +1,9 @@ +#ifndef __ASM_GENERIC_CURRENT_H +#define __ASM_GENERIC_CURRENT_H + +#include + +#define get_current() (current_thread_info()->task) +#define current get_current() + +#endif /* __ASM_GENERIC_CURRENT_H */ diff --git a/include/asm-generic/delay.h b/include/asm-generic/delay.h new file mode 100644 index 00000000000..4586fec75dd --- /dev/null +++ b/include/asm-generic/delay.h @@ -0,0 +1,9 @@ +#ifndef __ASM_GENERIC_DELAY_H +#define __ASM_GENERIC_DELAY_H + +extern void __udelay(unsigned long usecs); +extern void __delay(unsigned long loops); + +#define udelay(n) __udelay(n) + +#endif /* __ASM_GENERIC_DELAY_H */ diff --git a/include/asm-generic/fb.h b/include/asm-generic/fb.h new file mode 100644 index 00000000000..fe8ca7fcea0 --- /dev/null +++ b/include/asm-generic/fb.h @@ -0,0 +1,12 @@ +#ifndef __ASM_GENERIC_FB_H_ +#define __ASM_GENERIC_FB_H_ +#include + +#define fb_pgprotect(...) do {} while (0) + +static inline int fb_is_primary_device(struct fb_info *info) +{ + return 0; +} + +#endif /* __ASM_GENERIC_FB_H_ */ diff --git a/include/asm-generic/hardirq.h b/include/asm-generic/hardirq.h new file mode 100644 index 00000000000..3d5d2c906ab --- /dev/null +++ b/include/asm-generic/hardirq.h @@ -0,0 +1,34 @@ +#ifndef __ASM_GENERIC_HARDIRQ_H +#define __ASM_GENERIC_HARDIRQ_H + +#include +#include +#include + +typedef struct { + unsigned long __softirq_pending; +} ____cacheline_aligned irq_cpustat_t; + +#include /* Standard mappings for irq_cpustat_t above */ + +#ifndef HARDIRQ_BITS +#define HARDIRQ_BITS 8 +#endif + +/* + * The hardirq mask has to be large enough to have + * space for potentially all IRQ sources in the system + * nesting on a single CPU: + */ +#if (1 << HARDIRQ_BITS) < NR_IRQS +# error HARDIRQ_BITS is too low! +#endif + +#ifndef ack_bad_irq +static inline void ack_bad_irq(unsigned int irq) +{ + printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq); +} +#endif + +#endif /* __ASM_GENERIC_HARDIRQ_H */ diff --git a/include/asm-generic/irq.h b/include/asm-generic/irq.h new file mode 100644 index 00000000000..b90ec0bc485 --- /dev/null +++ b/include/asm-generic/irq.h @@ -0,0 +1,18 @@ +#ifndef __ASM_GENERIC_IRQ_H +#define __ASM_GENERIC_IRQ_H + +/* + * NR_IRQS is the upper bound of how many interrupts can be handled + * in the platform. It is used to size the static irq_map array, + * so don't make it too big. + */ +#ifndef NR_IRQS +#define NR_IRQS 64 +#endif + +static inline int irq_canonicalize(int irq) +{ + return irq; +} + +#endif /* __ASM_GENERIC_IRQ_H */ diff --git a/include/asm-generic/irqflags.h b/include/asm-generic/irqflags.h new file mode 100644 index 00000000000..9aebf618275 --- /dev/null +++ b/include/asm-generic/irqflags.h @@ -0,0 +1,72 @@ +#ifndef __ASM_GENERIC_IRQFLAGS_H +#define __ASM_GENERIC_IRQFLAGS_H + +/* + * All architectures should implement at least the first two functions, + * usually inline assembly will be the best way. + */ +#ifndef RAW_IRQ_DISABLED +#define RAW_IRQ_DISABLED 0 +#define RAW_IRQ_ENABLED 1 +#endif + +/* read interrupt enabled status */ +#ifndef __raw_local_save_flags +unsigned long __raw_local_save_flags(void); +#endif + +/* set interrupt enabled status */ +#ifndef raw_local_irq_restore +void raw_local_irq_restore(unsigned long flags); +#endif + +/* get status and disable interrupts */ +#ifndef __raw_local_irq_save +static inline unsigned long __raw_local_irq_save(void) +{ + unsigned long flags; + flags = __raw_local_save_flags(); + raw_local_irq_restore(RAW_IRQ_DISABLED); + return flags; +} +#endif + +/* test flags */ +#ifndef raw_irqs_disabled_flags +static inline int raw_irqs_disabled_flags(unsigned long flags) +{ + return flags == RAW_IRQ_DISABLED; +} +#endif + +/* unconditionally enable interrupts */ +#ifndef raw_local_irq_enable +static inline void raw_local_irq_enable(void) +{ + raw_local_irq_restore(RAW_IRQ_ENABLED); +} +#endif + +/* unconditionally disable interrupts */ +#ifndef raw_local_irq_disable +static inline void raw_local_irq_disable(void) +{ + raw_local_irq_restore(RAW_IRQ_DISABLED); +} +#endif + +/* test hardware interrupt enable bit */ +#ifndef raw_irqs_disabled +static inline int raw_irqs_disabled(void) +{ + return raw_irqs_disabled_flags(__raw_local_save_flags()); +} +#endif + +#define raw_local_save_flags(flags) \ + do { (flags) = __raw_local_save_flags(); } while (0) + +#define raw_local_irq_save(flags) \ + do { (flags) = __raw_local_irq_save(); } while (0) + +#endif /* __ASM_GENERIC_IRQFLAGS_H */ diff --git a/include/asm-generic/kmap_types.h b/include/asm-generic/kmap_types.h new file mode 100644 index 00000000000..58c33055c30 --- /dev/null +++ b/include/asm-generic/kmap_types.h @@ -0,0 +1,32 @@ +#ifndef _ASM_GENERIC_KMAP_TYPES_H +#define _ASM_GENERIC_KMAP_TYPES_H + +#ifdef CONFIG_DEBUG_HIGHMEM +# define D(n) __KM_FENCE_##n , +#else +# define D(n) +#endif + +enum km_type { +D(0) KM_BOUNCE_READ, +D(1) KM_SKB_SUNRPC_DATA, +D(2) KM_SKB_DATA_SOFTIRQ, +D(3) KM_USER0, +D(4) KM_USER1, +D(5) KM_BIO_SRC_IRQ, +D(6) KM_BIO_DST_IRQ, +D(7) KM_PTE0, +D(8) KM_PTE1, +D(9) KM_IRQ0, +D(10) KM_IRQ1, +D(11) KM_SOFTIRQ0, +D(12) KM_SOFTIRQ1, +D(13) KM_SYNC_ICACHE, +D(14) KM_SYNC_DCACHE, +D(15) KM_UML_USERCOPY, /* UML specific, for copy_*_user - used in do_op_one_page */ +D(16) KM_TYPE_NR +}; + +#undef D + +#endif diff --git a/include/asm-generic/linkage.h b/include/asm-generic/linkage.h new file mode 100644 index 00000000000..fef7a01e541 --- /dev/null +++ b/include/asm-generic/linkage.h @@ -0,0 +1,8 @@ +#ifndef __ASM_GENERIC_LINKAGE_H +#define __ASM_GENERIC_LINKAGE_H +/* + * linux/linkage.h provides reasonable defaults. + * an architecture can override them by providing its own version. + */ + +#endif /* __ASM_GENERIC_LINKAGE_H */ diff --git a/include/asm-generic/module.h b/include/asm-generic/module.h new file mode 100644 index 00000000000..ed5b44de4c9 --- /dev/null +++ b/include/asm-generic/module.h @@ -0,0 +1,22 @@ +#ifndef __ASM_GENERIC_MODULE_H +#define __ASM_GENERIC_MODULE_H + +/* + * Many architectures just need a simple module + * loader without arch specific data. + */ +struct mod_arch_specific +{ +}; + +#ifdef CONFIG_64BIT +#define Elf_Shdr Elf64_Shdr +#define Elf_Sym Elf64_Sym +#define Elf_Ehdr Elf64_Ehdr +#else +#define Elf_Shdr Elf32_Shdr +#define Elf_Sym Elf32_Sym +#define Elf_Ehdr Elf32_Ehdr +#endif + +#endif /* __ASM_GENERIC_MODULE_H */ diff --git a/include/asm-generic/mutex.h b/include/asm-generic/mutex.h new file mode 100644 index 00000000000..fe91ab50279 --- /dev/null +++ b/include/asm-generic/mutex.h @@ -0,0 +1,9 @@ +#ifndef __ASM_GENERIC_MUTEX_H +#define __ASM_GENERIC_MUTEX_H +/* + * Pull in the generic implementation for the mutex fastpath, + * which is a reasonable default on many architectures. + */ + +#include +#endif /* __ASM_GENERIC_MUTEX_H */ diff --git a/include/asm-generic/scatterlist.h b/include/asm-generic/scatterlist.h new file mode 100644 index 00000000000..8b9454496a7 --- /dev/null +++ b/include/asm-generic/scatterlist.h @@ -0,0 +1,43 @@ +#ifndef __ASM_GENERIC_SCATTERLIST_H +#define __ASM_GENERIC_SCATTERLIST_H + +#include + +struct scatterlist { +#ifdef CONFIG_DEBUG_SG + unsigned long sg_magic; +#endif + unsigned long page_link; + unsigned int offset; + unsigned int length; + dma_addr_t dma_address; + unsigned int dma_length; +}; + +/* + * These macros should be used after a dma_map_sg call has been done + * to get bus addresses of each of the SG entries and their lengths. + * You should only work with the number of sg entries pci_map_sg + * returns, or alternatively stop on the first sg_dma_len(sg) which + * is 0. + */ +#define sg_dma_address(sg) ((sg)->dma_address) +#ifndef sg_dma_len +/* + * Normally, you have an iommu on 64 bit machines, but not on 32 bit + * machines. Architectures that are differnt should override this. + */ +#if __BITS_PER_LONG == 64 +#define sg_dma_len(sg) ((sg)->dma_length) +#else +#define sg_dma_len(sg) ((sg)->length) +#endif /* 64 bit */ +#endif /* sg_dma_len */ + +#ifndef ISA_DMA_THRESHOLD +#define ISA_DMA_THRESHOLD (~0UL) +#endif + +#define ARCH_HAS_SG_CHAIN + +#endif /* __ASM_GENERIC_SCATTERLIST_H */ diff --git a/include/asm-generic/spinlock.h b/include/asm-generic/spinlock.h new file mode 100644 index 00000000000..1547a03ac50 --- /dev/null +++ b/include/asm-generic/spinlock.h @@ -0,0 +1,11 @@ +#ifndef __ASM_GENERIC_SPINLOCK_H +#define __ASM_GENERIC_SPINLOCK_H +/* + * You need to implement asm/spinlock.h for SMP support. The generic + * version does not handle SMP. + */ +#ifdef CONFIG_SMP +#error need an architecture specific asm/spinlock.h +#endif + +#endif /* __ASM_GENERIC_SPINLOCK_H */ diff --git a/include/asm-generic/string.h b/include/asm-generic/string.h new file mode 100644 index 00000000000..de5e0201459 --- /dev/null +++ b/include/asm-generic/string.h @@ -0,0 +1,10 @@ +#ifndef __ASM_GENERIC_STRING_H +#define __ASM_GENERIC_STRING_H +/* + * The kernel provides all required functions in lib/string.c + * + * Architectures probably want to provide at least their own optimized + * memcpy and memset functions though. + */ + +#endif /* __ASM_GENERIC_STRING_H */ diff --git a/include/asm-generic/syscalls.h b/include/asm-generic/syscalls.h new file mode 100644 index 00000000000..df84e3b0455 --- /dev/null +++ b/include/asm-generic/syscalls.h @@ -0,0 +1,60 @@ +#ifndef __ASM_GENERIC_SYSCALLS_H +#define __ASM_GENERIC_SYSCALLS_H + +#include +#include + +/* + * Calling conventions for these system calls can differ, so + * it's possible to override them. + */ +#ifndef sys_clone +asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp, + void __user *parent_tid, void __user *child_tid, + struct pt_regs *regs); +#endif + +#ifndef sys_fork +asmlinkage long sys_fork(struct pt_regs *regs); +#endif + +#ifndef sys_vfork +asmlinkage long sys_vfork(struct pt_regs *regs); +#endif + +#ifndef sys_execve +asmlinkage long sys_execve(char __user *filename, char __user * __user *argv, + char __user * __user *envp, struct pt_regs *regs); +#endif + +#ifndef sys_mmap2 +asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff); +#endif + +#ifndef sys_mmap +asmlinkage long sys_mmap(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, off_t pgoff); +#endif + +#ifndef sys_sigaltstack +asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *, + struct pt_regs *); +#endif + +#ifndef sys_rt_sigreturn +asmlinkage long sys_rt_sigreturn(struct pt_regs *regs); +#endif + +#ifndef sys_rt_sigsuspend +asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize); +#endif + +#ifndef sys_rt_sigaction +asmlinkage long sys_rt_sigaction(int sig, const struct sigaction __user *act, + struct sigaction __user *oact, size_t sigsetsize); +#endif + +#endif /* __ASM_GENERIC_SYSCALLS_H */ diff --git a/include/asm-generic/system.h b/include/asm-generic/system.h new file mode 100644 index 00000000000..efa403b5e12 --- /dev/null +++ b/include/asm-generic/system.h @@ -0,0 +1,161 @@ +/* Generic system definitions, based on MN10300 definitions. + * + * It should be possible to use these on really simple architectures, + * but it serves more as a starting point for new ports. + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ +#ifndef __ASM_GENERIC_SYSTEM_H +#define __ASM_GENERIC_SYSTEM_H + +#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ + +#include +#include + +#include + +struct task_struct; + +/* context switching is now performed out-of-line in switch_to.S */ +extern struct task_struct *__switch_to(struct task_struct *, + struct task_struct *); +#define switch_to(prev, next, last) \ + do { \ + ((last) = __switch_to((prev), (next))); \ + } while (0) + +#define arch_align_stack(x) (x) + +#define nop() asm volatile ("nop") + +#endif /* !__ASSEMBLY__ */ + +/* + * Force strict CPU ordering. + * And yes, this is required on UP too when we're talking + * to devices. + * + * This implementation only contains a compiler barrier. + */ + +#define mb() asm volatile ("": : :"memory") +#define rmb() mb() +#define wmb() asm volatile ("": : :"memory") + +#ifdef CONFIG_SMP +#define smp_mb() mb() +#define smp_rmb() rmb() +#define smp_wmb() wmb() +#else +#define smp_mb() barrier() +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#endif + +#define set_mb(var, value) do { var = value; mb(); } while (0) +#define set_wmb(var, value) do { var = value; wmb(); } while (0) + +#define read_barrier_depends() do {} while (0) +#define smp_read_barrier_depends() do {} while (0) + +/* + * we make sure local_irq_enable() doesn't cause priority inversion + */ +#ifndef __ASSEMBLY__ + +/* This function doesn't exist, so you'll get a linker error + * if something tries to do an invalid xchg(). */ +extern void __xchg_called_with_bad_pointer(void); + +static inline +unsigned long __xchg(unsigned long x, volatile void *ptr, int size) +{ + unsigned long ret, flags; + + switch (size) { + case 1: +#ifdef __xchg_u8 + return __xchg_u8(x, ptr); +#else + local_irq_save(flags); + ret = *(volatile u8 *)ptr; + *(volatile u8 *)ptr = x; + local_irq_restore(flags); + return ret; +#endif /* __xchg_u8 */ + + case 2: +#ifdef __xchg_u16 + return __xchg_u16(x, ptr); +#else + local_irq_save(flags); + ret = *(volatile u16 *)ptr; + *(volatile u16 *)ptr = x; + local_irq_restore(flags); + return ret; +#endif /* __xchg_u16 */ + + case 4: +#ifdef __xchg_u32 + return __xchg_u32(x, ptr); +#else + local_irq_save(flags); + ret = *(volatile u32 *)ptr; + *(volatile u32 *)ptr = x; + local_irq_restore(flags); + return ret; +#endif /* __xchg_u32 */ + +#ifdef CONFIG_64BIT + case 8: +#ifdef __xchg_u64 + return __xchg_u64(x, ptr); +#else + local_irq_save(flags); + ret = *(volatile u64 *)ptr; + *(volatile u64 *)ptr = x; + local_irq_restore(flags); + return ret; +#endif /* __xchg_u64 */ +#endif /* CONFIG_64BIT */ + + default: + __xchg_called_with_bad_pointer(); + return x; + } +} + +#define xchg(ptr, x) \ + ((__typeof__(*(ptr))) __xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))) + +static inline unsigned long __cmpxchg(volatile unsigned long *m, + unsigned long old, unsigned long new) +{ + unsigned long retval; + unsigned long flags; + + local_irq_save(flags); + retval = *m; + if (retval == old) + *m = new; + local_irq_restore(flags); + return retval; +} + +#define cmpxchg(ptr, o, n) \ + ((__typeof__(*(ptr))) __cmpxchg((unsigned long *)(ptr), \ + (unsigned long)(o), \ + (unsigned long)(n))) + +#endif /* !__ASSEMBLY__ */ + +#endif /* __KERNEL__ */ +#endif /* __ASM_GENERIC_SYSTEM_H */ diff --git a/include/asm-generic/unaligned.h b/include/asm-generic/unaligned.h new file mode 100644 index 00000000000..03cf5936bad --- /dev/null +++ b/include/asm-generic/unaligned.h @@ -0,0 +1,30 @@ +#ifndef __ASM_GENERIC_UNALIGNED_H +#define __ASM_GENERIC_UNALIGNED_H + +/* + * This is the most generic implementation of unaligned accesses + * and should work almost anywhere. + * + * If an architecture can handle unaligned accesses in hardware, + * it may want to use the linux/unaligned/access_ok.h implementation + * instead. + */ +#include + +#if defined(__LITTLE_ENDIAN) +# include +# include +# include +# define get_unaligned __get_unaligned_le +# define put_unaligned __put_unaligned_le +#elif defined(__BIG_ENDIAN) +# include +# include +# include +# define get_unaligned __get_unaligned_be +# define put_unaligned __put_unaligned_be +#else +# error need to define endianess +#endif + +#endif /* __ASM_GENERIC_UNALIGNED_H */ diff --git a/include/asm-generic/user.h b/include/asm-generic/user.h new file mode 100644 index 00000000000..8b9c3c960ae --- /dev/null +++ b/include/asm-generic/user.h @@ -0,0 +1,8 @@ +#ifndef __ASM_GENERIC_USER_H +#define __ASM_GENERIC_USER_H +/* + * This file may define a 'struct user' structure. However, it it only + * used for a.out file, which are not supported on new architectures. + */ + +#endif /* __ASM_GENERIC_USER_H */ -- cgit v1.2.3-70-g09d2 From ae49e807951d5e26767bc55d1dc29671c596c450 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2009 22:56:34 +0000 Subject: asm-generic: add legacy I/O header files The dma.h, hw_irq.h, serial.h and timex.h files originally described PC-style i8237, i8259A, i8250, i8253 and i8255 chips as well as the VGA style text mode graphics. Modern architectures live happily without these specific interfaces, but a few definitions from these headers keep getting used in common code. The new generic headers are what most architectures use anyway nowadays, just implementing the minimal definitions. Signed-off-by: Remis Lima Baima Signed-off-by: Arnd Bergmann --- include/asm-generic/dma.h | 15 +++++++++++++++ include/asm-generic/hw_irq.h | 9 +++++++++ include/asm-generic/parport.h | 23 +++++++++++++++++++++++ include/asm-generic/serial.h | 13 +++++++++++++ include/asm-generic/timex.h | 22 ++++++++++++++++++++++ include/asm-generic/vga.h | 24 ++++++++++++++++++++++++ 6 files changed, 106 insertions(+) create mode 100644 include/asm-generic/dma.h create mode 100644 include/asm-generic/hw_irq.h create mode 100644 include/asm-generic/parport.h create mode 100644 include/asm-generic/serial.h create mode 100644 include/asm-generic/timex.h create mode 100644 include/asm-generic/vga.h (limited to 'include') diff --git a/include/asm-generic/dma.h b/include/asm-generic/dma.h new file mode 100644 index 00000000000..9dfc3a7f36d --- /dev/null +++ b/include/asm-generic/dma.h @@ -0,0 +1,15 @@ +#ifndef __ASM_GENERIC_DMA_H +#define __ASM_GENERIC_DMA_H +/* + * This file traditionally describes the i8237 PC style DMA controller. + * Most architectures don't have these any more and can get the minimal + * implementation from kernel/dma.c by not defining MAX_DMA_CHANNELS. + * + * Some code relies on seeing MAX_DMA_ADDRESS though. + */ +#define MAX_DMA_ADDRESS PAGE_OFFSET + +extern int request_dma(unsigned int dmanr, const char *device_id); +extern void free_dma(unsigned int dmanr); + +#endif /* __ASM_GENERIC_DMA_H */ diff --git a/include/asm-generic/hw_irq.h b/include/asm-generic/hw_irq.h new file mode 100644 index 00000000000..89036d7b40e --- /dev/null +++ b/include/asm-generic/hw_irq.h @@ -0,0 +1,9 @@ +#ifndef __ASM_GENERIC_HW_IRQ_H +#define __ASM_GENERIC_HW_IRQ_H +/* + * hw_irq.h has internal declarations for the low-level interrupt + * controller, like the original i8259A. + * In general, this is not needed for new architectures. + */ + +#endif /* __ASM_GENERIC_HW_IRQ_H */ diff --git a/include/asm-generic/parport.h b/include/asm-generic/parport.h new file mode 100644 index 00000000000..40528cb977e --- /dev/null +++ b/include/asm-generic/parport.h @@ -0,0 +1,23 @@ +#ifndef __ASM_GENERIC_PARPORT_H +#define __ASM_GENERIC_PARPORT_H + +/* + * An ISA bus may have i8255 parallel ports at well-known + * locations in the I/O space, which are scanned by + * parport_pc_find_isa_ports. + * + * Without ISA support, the driver will only attach + * to devices on the PCI bus. + */ + +static int __devinit parport_pc_find_isa_ports(int autoirq, int autodma); +static int __devinit parport_pc_find_nonpci_ports(int autoirq, int autodma) +{ +#ifdef CONFIG_ISA + return parport_pc_find_isa_ports(autoirq, autodma); +#else + return 0; +#endif +} + +#endif /* __ASM_GENERIC_PARPORT_H */ diff --git a/include/asm-generic/serial.h b/include/asm-generic/serial.h new file mode 100644 index 00000000000..5e291090fe0 --- /dev/null +++ b/include/asm-generic/serial.h @@ -0,0 +1,13 @@ +#ifndef __ASM_GENERIC_SERIAL_H +#define __ASM_GENERIC_SERIAL_H + +/* + * This should not be an architecture specific #define, oh well. + * + * Traditionally, it just describes i8250 and related serial ports + * that have this clock rate. + */ + +#define BASE_BAUD (1843200 / 16) + +#endif /* __ASM_GENERIC_SERIAL_H */ diff --git a/include/asm-generic/timex.h b/include/asm-generic/timex.h new file mode 100644 index 00000000000..b2243cb8d6f --- /dev/null +++ b/include/asm-generic/timex.h @@ -0,0 +1,22 @@ +#ifndef __ASM_GENERIC_TIMEX_H +#define __ASM_GENERIC_TIMEX_H + +/* + * If you have a cycle counter, return the value here. + */ +typedef unsigned long cycles_t; +#ifndef get_cycles +static inline cycles_t get_cycles(void) +{ + return 0; +} +#endif + +/* + * Architectures are encouraged to implement read_current_timer + * and define this in order to avoid the expensive delay loop + * calibration during boot. + */ +#undef ARCH_HAS_READ_CURRENT_TIMER + +#endif /* __ASM_GENERIC_TIMEX_H */ diff --git a/include/asm-generic/vga.h b/include/asm-generic/vga.h new file mode 100644 index 00000000000..36c8ff52016 --- /dev/null +++ b/include/asm-generic/vga.h @@ -0,0 +1,24 @@ +/* + * Access to VGA videoram + * + * (c) 1998 Martin Mares + */ +#ifndef __ASM_GENERIC_VGA_H +#define __ASM_GENERIC_VGA_H + +/* + * On most architectures that support VGA, we can just + * recalculate addresses and then access the videoram + * directly without any black magic. + * + * Everyone else needs to ioremap the address and use + * proper I/O accesses. + */ +#ifndef VGA_MAP_MEM +#define VGA_MAP_MEM(x, s) (unsigned long)phys_to_virt(x) +#endif + +#define vga_readb(x) (*(x)) +#define vga_writeb(x, y) (*(y) = (x)) + +#endif /* _ASM_GENERIC_VGA_H */ -- cgit v1.2.3-70-g09d2 From 3f7e212df82ca0459d44c91d9e019efd1b5f936c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2009 22:56:35 +0000 Subject: asm-generic: add generic atomic.h and io.h atomic.h and io.h are based on the mn10300 architecture, which is already pretty generic and can be used by other architectures that do not have hardware support for atomic operations or out-of-order I/O access. Signed-off-by: Arnd Bergmann --- include/asm-generic/atomic.h | 165 ++++++++++++++++++++++++ include/asm-generic/io.h | 300 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 465 insertions(+) create mode 100644 include/asm-generic/atomic.h create mode 100644 include/asm-generic/io.h (limited to 'include') diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h new file mode 100644 index 00000000000..c99c64dc5f3 --- /dev/null +++ b/include/asm-generic/atomic.h @@ -0,0 +1,165 @@ +/* + * Generic C implementation of atomic counter operations + * Originally implemented for MN10300. + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ +#ifndef __ASM_GENERIC_ATOMIC_H +#define __ASM_GENERIC_ATOMIC_H + +#ifdef CONFIG_SMP +#error not SMP safe +#endif + +/* + * Atomic operations that C can't guarantee us. Useful for + * resource counting etc.. + */ + +#define ATOMIC_INIT(i) { (i) } + +#ifdef __KERNEL__ + +/** + * atomic_read - read atomic variable + * @v: pointer of type atomic_t + * + * Atomically reads the value of @v. Note that the guaranteed + * useful range of an atomic_t is only 24 bits. + */ +#define atomic_read(v) ((v)->counter) + +/** + * atomic_set - set atomic variable + * @v: pointer of type atomic_t + * @i: required value + * + * Atomically sets the value of @v to @i. Note that the guaranteed + * useful range of an atomic_t is only 24 bits. + */ +#define atomic_set(v, i) (((v)->counter) = (i)) + +#include + +/** + * atomic_add_return - add integer to atomic variable + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v and returns the result + * Note that the guaranteed useful range of an atomic_t is only 24 bits. + */ +static inline int atomic_add_return(int i, atomic_t *v) +{ + unsigned long flags; + int temp; + + local_irq_save(flags); + temp = v->counter; + temp += i; + v->counter = temp; + local_irq_restore(flags); + + return temp; +} + +/** + * atomic_sub_return - subtract integer from atomic variable + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v and returns the result + * Note that the guaranteed useful range of an atomic_t is only 24 bits. + */ +static inline int atomic_sub_return(int i, atomic_t *v) +{ + unsigned long flags; + int temp; + + local_irq_save(flags); + temp = v->counter; + temp -= i; + v->counter = temp; + local_irq_restore(flags); + + return temp; +} + +static inline int atomic_add_negative(int i, atomic_t *v) +{ + return atomic_add_return(i, v) < 0; +} + +static inline void atomic_add(int i, atomic_t *v) +{ + atomic_add_return(i, v); +} + +static inline void atomic_sub(int i, atomic_t *v) +{ + atomic_sub_return(i, v); +} + +static inline void atomic_inc(atomic_t *v) +{ + atomic_add_return(1, v); +} + +static inline void atomic_dec(atomic_t *v) +{ + atomic_sub_return(1, v); +} + +#define atomic_dec_return(v) atomic_sub_return(1, (v)) +#define atomic_inc_return(v) atomic_add_return(1, (v)) + +#define atomic_sub_and_test(i, v) (atomic_sub_return((i), (v)) == 0) +#define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0) +#define atomic_inc_and_test(v) (atomic_add_return(1, (v)) == 0) + +#define atomic_add_unless(v, a, u) \ +({ \ + int c, old; \ + c = atomic_read(v); \ + while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \ + c = old; \ + c != (u); \ +}) + +#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) + +static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr) +{ + unsigned long flags; + + mask = ~mask; + local_irq_save(flags); + *addr &= mask; + local_irq_restore(flags); +} + +#define atomic_xchg(ptr, v) (xchg(&(ptr)->counter, (v))) +#define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), (old), (new))) + +#define cmpxchg_local(ptr, o, n) \ + ((__typeof__(*(ptr)))__cmpxchg_local_generic((ptr), (unsigned long)(o),\ + (unsigned long)(n), sizeof(*(ptr)))) + +#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n)) + +/* Assume that atomic operations are already serializing */ +#define smp_mb__before_atomic_dec() barrier() +#define smp_mb__after_atomic_dec() barrier() +#define smp_mb__before_atomic_inc() barrier() +#define smp_mb__after_atomic_inc() barrier() + +#include + +#endif /* __KERNEL__ */ +#endif /* __ASM_GENERIC_ATOMIC_H */ diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h new file mode 100644 index 00000000000..bcee6365dca --- /dev/null +++ b/include/asm-generic/io.h @@ -0,0 +1,300 @@ +/* Generic I/O port emulation, based on MN10300 code + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ +#ifndef __ASM_GENERIC_IO_H +#define __ASM_GENERIC_IO_H + +#include /* I/O is all done through memory accesses */ +#include +#include + +#ifdef CONFIG_GENERIC_IOMAP +#include +#endif + +#define mmiowb() do {} while (0) + +/*****************************************************************************/ +/* + * readX/writeX() are used to access memory mapped devices. On some + * architectures the memory mapped IO stuff needs to be accessed + * differently. On the simple architectures, we just read/write the + * memory location directly. + */ +static inline u8 __raw_readb(const volatile void __iomem *addr) +{ + return *(const volatile u8 __force *) addr; +} + +static inline u16 __raw_readw(const volatile void __iomem *addr) +{ + return *(const volatile u16 __force *) addr; +} + +static inline u32 __raw_readl(const volatile void __iomem *addr) +{ + return *(const volatile u32 __force *) addr; +} + +#define readb __raw_readb +#define readw(addr) __le16_to_cpu(__raw_readw(addr)) +#define readl(addr) __le32_to_cpu(__raw_readl(addr)) + +static inline void __raw_writeb(u8 b, volatile void __iomem *addr) +{ + *(volatile u8 __force *) addr = b; +} + +static inline void __raw_writew(u16 b, volatile void __iomem *addr) +{ + *(volatile u16 __force *) addr = b; +} + +static inline void __raw_writel(u32 b, volatile void __iomem *addr) +{ + *(volatile u32 __force *) addr = b; +} + +#define writeb __raw_writeb +#define writew(b,addr) __raw_writew(__cpu_to_le16(b),addr) +#define writel(b,addr) __raw_writel(__cpu_to_le32(b),addr) + +#ifdef CONFIG_64BIT +static inline u64 __raw_readq(const volatile void __iomem *addr) +{ + return *(const volatile u64 __force *) addr; +} +#define readq(addr) __le64_to_cpu(__raw_readq(addr)) + +static inline void __raw_writeq(u64 b, volatile void __iomem *addr) +{ + *(volatile u64 __force *) addr = b; +} +#define writeq(b,addr) __raw_writeq(__cpu_to_le64(b),addr) +#endif + +/*****************************************************************************/ +/* + * traditional input/output functions + */ + +static inline u8 inb(unsigned long addr) +{ + return readb((volatile void __iomem *) addr); +} + +static inline u16 inw(unsigned long addr) +{ + return readw((volatile void __iomem *) addr); +} + +static inline u32 inl(unsigned long addr) +{ + return readl((volatile void __iomem *) addr); +} + +static inline void outb(u8 b, unsigned long addr) +{ + writeb(b, (volatile void __iomem *) addr); +} + +static inline void outw(u16 b, unsigned long addr) +{ + writew(b, (volatile void __iomem *) addr); +} + +static inline void outl(u32 b, unsigned long addr) +{ + writel(b, (volatile void __iomem *) addr); +} + +#define inb_p(addr) inb(addr) +#define inw_p(addr) inw(addr) +#define inl_p(addr) inl(addr) +#define outb_p(x, addr) outb((x), (addr)) +#define outw_p(x, addr) outw((x), (addr)) +#define outl_p(x, addr) outl((x), (addr)) + +static inline void insb(unsigned long addr, void *buffer, int count) +{ + if (count) { + u8 *buf = buffer; + do { + u8 x = inb(addr); + *buf++ = x; + } while (--count); + } +} + +static inline void insw(unsigned long addr, void *buffer, int count) +{ + if (count) { + u16 *buf = buffer; + do { + u16 x = inw(addr); + *buf++ = x; + } while (--count); + } +} + +static inline void insl(unsigned long addr, void *buffer, int count) +{ + if (count) { + u32 *buf = buffer; + do { + u32 x = inl(addr); + *buf++ = x; + } while (--count); + } +} + +static inline void outsb(unsigned long addr, const void *buffer, int count) +{ + if (count) { + const u8 *buf = buffer; + do { + outb(*buf++, addr); + } while (--count); + } +} + +static inline void outsw(unsigned long addr, const void *buffer, int count) +{ + if (count) { + const u16 *buf = buffer; + do { + outw(*buf++, addr); + } while (--count); + } +} + +static inline void outsl(unsigned long addr, const void *buffer, int count) +{ + if (count) { + const u32 *buf = buffer; + do { + outl(*buf++, addr); + } while (--count); + } +} + +#ifndef CONFIG_GENERIC_IOMAP +#define ioread8(addr) readb(addr) +#define ioread16(addr) readw(addr) +#define ioread32(addr) readl(addr) + +#define iowrite8(v, addr) writeb((v), (addr)) +#define iowrite16(v, addr) writew((v), (addr)) +#define iowrite32(v, addr) writel((v), (addr)) + +#define ioread8_rep(p, dst, count) \ + insb((unsigned long) (p), (dst), (count)) +#define ioread16_rep(p, dst, count) \ + insw((unsigned long) (p), (dst), (count)) +#define ioread32_rep(p, dst, count) \ + insl((unsigned long) (p), (dst), (count)) + +#define iowrite8_rep(p, src, count) \ + outsb((unsigned long) (p), (src), (count)) +#define iowrite16_rep(p, src, count) \ + outsw((unsigned long) (p), (src), (count)) +#define iowrite32_rep(p, src, count) \ + outsl((unsigned long) (p), (src), (count)) +#endif /* CONFIG_GENERIC_IOMAP */ + + +#define IO_SPACE_LIMIT 0xffffffff + +#ifdef __KERNEL__ + +#include +#define __io_virt(x) ((void __force *) (x)) + +#ifndef CONFIG_GENERIC_IOMAP +/* Create a virtual mapping cookie for a PCI BAR (memory or IO) */ +struct pci_dev; +extern void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max); +static inline void pci_iounmap(struct pci_dev *dev, void __iomem *p) +{ +} +#endif /* CONFIG_GENERIC_IOMAP */ + +/* + * Change virtual addresses to physical addresses and vv. + * These are pretty trivial + */ +static inline unsigned long virt_to_phys(volatile void *address) +{ + return __pa((unsigned long)address); +} + +static inline void *phys_to_virt(unsigned long address) +{ + return __va(address); +} + +/* + * Change "struct page" to physical address. + */ +static inline void __iomem *ioremap(phys_addr_t offset, unsigned long size) +{ + return (void __iomem*) (unsigned long)offset; +} + +#define __ioremap(offset, size, flags) ioremap(offset, size) + +#ifndef ioremap_nocache +#define ioremap_nocache ioremap +#endif + +#ifndef ioremap_wc +#define ioremap_wc ioremap_nocache +#endif + +static inline void iounmap(void *addr) +{ +} + +#ifndef CONFIG_GENERIC_IOMAP +static inline void __iomem *ioport_map(unsigned long port, unsigned int nr) +{ + return (void __iomem *) port; +} + +static inline void ioport_unmap(void __iomem *p) +{ +} +#else /* CONFIG_GENERIC_IOMAP */ +extern void __iomem *ioport_map(unsigned long port, unsigned int nr); +extern void ioport_unmap(void __iomem *p); +#endif /* CONFIG_GENERIC_IOMAP */ + +#define xlate_dev_kmem_ptr(p) p +#define xlate_dev_mem_ptr(p) ((void *) (p)) + +#ifndef virt_to_bus +static inline unsigned long virt_to_bus(volatile void *address) +{ + return ((unsigned long) address); +} + +static inline void *bus_to_virt(unsigned long address) +{ + return (void *) address; +} +#endif + +#define memset_io(a, b, c) memset(__io_virt(a), (b), (c)) +#define memcpy_fromio(a, b, c) memcpy((a), __io_virt(b), (c)) +#define memcpy_toio(a, b, c) memcpy(__io_virt(a), (b), (c)) + +#endif /* __KERNEL__ */ + +#endif /* __ASM_GENERIC_IO_H */ -- cgit v1.2.3-70-g09d2 From 5c01b46bb6bb8f2662573c05c87b5d68fa25af89 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2009 22:56:36 +0000 Subject: asm-generic: add generic NOMMU versions of some headers Memory management in generic is highly architecture specific, but on NOMMU architectures, it is mostly trivial, so just add a default implementation in asm-generic that applies to all NOMMU architectures. The two files cache.h and cacheflush.h can possibly also be used by architectures that have an MMU but never require flushing the cache or have cache lines larger than 32 bytes. Signed-off-by: Remis Lima Baima Signed-off-by: Arnd Bergmann --- include/asm-generic/cache.h | 12 +++++ include/asm-generic/cacheflush.h | 30 ++++++++++++ include/asm-generic/mmu.h | 15 ++++++ include/asm-generic/mmu_context.h | 45 ++++++++++++++++++ include/asm-generic/page.h | 99 +++++++++++++++++++++++++++++++++++++++ include/asm-generic/pgalloc.h | 12 +++++ include/asm-generic/segment.h | 9 ++++ include/asm-generic/tlbflush.h | 18 +++++++ 8 files changed, 240 insertions(+) create mode 100644 include/asm-generic/cache.h create mode 100644 include/asm-generic/cacheflush.h create mode 100644 include/asm-generic/mmu.h create mode 100644 include/asm-generic/mmu_context.h create mode 100644 include/asm-generic/page.h create mode 100644 include/asm-generic/pgalloc.h create mode 100644 include/asm-generic/segment.h create mode 100644 include/asm-generic/tlbflush.h (limited to 'include') diff --git a/include/asm-generic/cache.h b/include/asm-generic/cache.h new file mode 100644 index 00000000000..1bfcfe5c223 --- /dev/null +++ b/include/asm-generic/cache.h @@ -0,0 +1,12 @@ +#ifndef __ASM_GENERIC_CACHE_H +#define __ASM_GENERIC_CACHE_H +/* + * 32 bytes appears to be the most common cache line size, + * so make that the default here. Architectures with larger + * cache lines need to provide their own cache.h. + */ + +#define L1_CACHE_SHIFT 5 +#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) + +#endif /* __ASM_GENERIC_CACHE_H */ diff --git a/include/asm-generic/cacheflush.h b/include/asm-generic/cacheflush.h new file mode 100644 index 00000000000..ba4ec39a113 --- /dev/null +++ b/include/asm-generic/cacheflush.h @@ -0,0 +1,30 @@ +#ifndef __ASM_CACHEFLUSH_H +#define __ASM_CACHEFLUSH_H + +/* Keep includes the same across arches. */ +#include + +/* + * The cache doesn't need to be flushed when TLB entries change when + * the cache is mapped to physical memory, not virtual memory + */ +#define flush_cache_all() do { } while (0) +#define flush_cache_mm(mm) do { } while (0) +#define flush_cache_dup_mm(mm) do { } while (0) +#define flush_cache_range(vma, start, end) do { } while (0) +#define flush_cache_page(vma, vmaddr, pfn) do { } while (0) +#define flush_dcache_page(page) do { } while (0) +#define flush_dcache_mmap_lock(mapping) do { } while (0) +#define flush_dcache_mmap_unlock(mapping) do { } while (0) +#define flush_icache_range(start, end) do { } while (0) +#define flush_icache_page(vma,pg) do { } while (0) +#define flush_icache_user_range(vma,pg,adr,len) do { } while (0) +#define flush_cache_vmap(start, end) do { } while (0) +#define flush_cache_vunmap(start, end) do { } while (0) + +#define copy_to_user_page(vma, page, vaddr, dst, src, len) \ + memcpy(dst, src, len) +#define copy_from_user_page(vma, page, vaddr, dst, src, len) \ + memcpy(dst, src, len) + +#endif /* __ASM_CACHEFLUSH_H */ diff --git a/include/asm-generic/mmu.h b/include/asm-generic/mmu.h new file mode 100644 index 00000000000..4f4aa56d6b5 --- /dev/null +++ b/include/asm-generic/mmu.h @@ -0,0 +1,15 @@ +#ifndef __ASM_GENERIC_MMU_H +#define __ASM_GENERIC_MMU_H + +/* + * This is the mmu.h header for nommu implementations. + * Architectures with an MMU need something more complex. + */ +#ifndef __ASSEMBLY__ +typedef struct { + struct vm_list_struct *vmlist; + unsigned long end_brk; +} mm_context_t; +#endif + +#endif /* __ASM_GENERIC_MMU_H */ diff --git a/include/asm-generic/mmu_context.h b/include/asm-generic/mmu_context.h new file mode 100644 index 00000000000..a7eec910ba6 --- /dev/null +++ b/include/asm-generic/mmu_context.h @@ -0,0 +1,45 @@ +#ifndef __ASM_GENERIC_MMU_CONTEXT_H +#define __ASM_GENERIC_MMU_CONTEXT_H + +/* + * Generic hooks for NOMMU architectures, which do not need to do + * anything special here. + */ + +#include + +struct task_struct; +struct mm_struct; + +static inline void enter_lazy_tlb(struct mm_struct *mm, + struct task_struct *tsk) +{ +} + +static inline int init_new_context(struct task_struct *tsk, + struct mm_struct *mm) +{ + return 0; +} + +static inline void destroy_context(struct mm_struct *mm) +{ +} + +static inline void deactivate_mm(struct task_struct *task, + struct mm_struct *mm) +{ +} + +static inline void switch_mm(struct mm_struct *prev, + struct mm_struct *next, + struct task_struct *tsk) +{ +} + +static inline void activate_mm(struct mm_struct *prev_mm, + struct mm_struct *next_mm) +{ +} + +#endif /* __ASM_GENERIC_MMU_CONTEXT_H */ diff --git a/include/asm-generic/page.h b/include/asm-generic/page.h new file mode 100644 index 00000000000..75fec18cdc5 --- /dev/null +++ b/include/asm-generic/page.h @@ -0,0 +1,99 @@ +#ifndef __ASM_GENERIC_PAGE_H +#define __ASM_GENERIC_PAGE_H +/* + * Generic page.h implementation, for NOMMU architectures. + * This provides the dummy definitions for the memory management. + */ + +#ifdef CONFIG_MMU +#error need to prove a real asm/page.h +#endif + + +/* PAGE_SHIFT determines the page size */ + +#define PAGE_SHIFT 12 +#ifdef __ASSEMBLY__ +#define PAGE_SIZE (1 << PAGE_SHIFT) +#else +#define PAGE_SIZE (1UL << PAGE_SHIFT) +#endif +#define PAGE_MASK (~(PAGE_SIZE-1)) + +#include + +#ifndef __ASSEMBLY__ + +#define get_user_page(vaddr) __get_free_page(GFP_KERNEL) +#define free_user_page(page, addr) free_page(addr) + +#define clear_page(page) memset((page), 0, PAGE_SIZE) +#define copy_page(to,from) memcpy((to), (from), PAGE_SIZE) + +#define clear_user_page(page, vaddr, pg) clear_page(page) +#define copy_user_page(to, from, vaddr, pg) copy_page(to, from) + +/* + * These are used to make use of C type-checking.. + */ +typedef struct { + unsigned long pte; +} pte_t; +typedef struct { + unsigned long pmd[16]; +} pmd_t; +typedef struct { + unsigned long pgd; +} pgd_t; +typedef struct { + unsigned long pgprot; +} pgprot_t; +typedef struct page *pgtable_t; + +#define pte_val(x) ((x).pte) +#define pmd_val(x) ((&x)->pmd[0]) +#define pgd_val(x) ((x).pgd) +#define pgprot_val(x) ((x).pgprot) + +#define __pte(x) ((pte_t) { (x) } ) +#define __pmd(x) ((pmd_t) { (x) } ) +#define __pgd(x) ((pgd_t) { (x) } ) +#define __pgprot(x) ((pgprot_t) { (x) } ) + +extern unsigned long memory_start; +extern unsigned long memory_end; + +#endif /* !__ASSEMBLY__ */ + +#ifdef CONFIG_KERNEL_RAM_BASE_ADDRESS +#define PAGE_OFFSET (CONFIG_KERNEL_RAM_BASE_ADDRESS) +#else +#define PAGE_OFFSET (0) +#endif + +#ifndef __ASSEMBLY__ + +#define __va(x) ((void *)((unsigned long)(x) + PAGE_OFFSET)) +#define __pa(x) ((unsigned long) (x) - PAGE_OFFSET) + +#define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT) +#define pfn_to_virt(pfn) __va((pfn) << PAGE_SHIFT) + +#define virt_to_page(addr) (mem_map + (((unsigned long)(addr)-PAGE_OFFSET) >> PAGE_SHIFT)) +#define page_to_virt(page) ((((page) - mem_map) << PAGE_SHIFT) + PAGE_OFFSET) + +#ifndef page_to_phys +#define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT) +#endif + +#define pfn_valid(pfn) ((pfn) < max_mapnr) + +#define virt_addr_valid(kaddr) (((void *)(kaddr) >= (void *)PAGE_OFFSET) && \ + ((void *)(kaddr) < (void *)memory_end)) + +#endif /* __ASSEMBLY__ */ + +#include +#include + +#endif /* __ASM_GENERIC_PAGE_H */ diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h new file mode 100644 index 00000000000..9e429d08b1f --- /dev/null +++ b/include/asm-generic/pgalloc.h @@ -0,0 +1,12 @@ +#ifndef __ASM_GENERIC_PGALLOC_H +#define __ASM_GENERIC_PGALLOC_H +/* + * an empty file is enough for a nommu architecture + */ +#ifdef CONFIG_MMU +#error need to implement an architecture specific asm/pgalloc.h +#endif + +#define check_pgt_cache() do { } while (0) + +#endif /* __ASM_GENERIC_PGALLOC_H */ diff --git a/include/asm-generic/segment.h b/include/asm-generic/segment.h new file mode 100644 index 00000000000..5580eace622 --- /dev/null +++ b/include/asm-generic/segment.h @@ -0,0 +1,9 @@ +#ifndef __ASM_GENERIC_SEGMENT_H +#define __ASM_GENERIC_SEGMENT_H +/* + * Only here because we have some old header files that expect it... + * + * New architectures probably don't want to have their own version. + */ + +#endif /* __ASM_GENERIC_SEGMENT_H */ diff --git a/include/asm-generic/tlbflush.h b/include/asm-generic/tlbflush.h new file mode 100644 index 00000000000..c7af037024c --- /dev/null +++ b/include/asm-generic/tlbflush.h @@ -0,0 +1,18 @@ +#ifndef __ASM_GENERIC_TLBFLUSH_H +#define __ASM_GENERIC_TLBFLUSH_H +/* + * This is a dummy tlbflush implementation that can be used on all + * nommu architectures. + * If you have an MMU, you need to write your own functions. + */ +#ifdef CONFIG_MMU +#error need to implement an architecture specific asm/tlbflush.h +#endif + +static inline void flush_tlb_mm(struct mm_struct *mm) +{ + BUG(); +} + + +#endif /* __ASM_GENERIC_TLBFLUSH_H */ -- cgit v1.2.3-70-g09d2 From eed417ddd52146f446595b5a7d8f21b1814b95b7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2009 22:56:37 +0000 Subject: asm-generic: add a generic uaccess.h Based on discussions with Michal Simek and code from m68knommu and h8300, this version of uaccess.h should be usable by most architectures, by overriding some parts of it. Simple NOMMU architectures can use it out of the box, but a minimal __access_ok() should be added there as well. Cc: Michal Simek Signed-off-by: Arnd Bergmann --- include/asm-generic/uaccess.h | 325 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 325 insertions(+) create mode 100644 include/asm-generic/uaccess.h (limited to 'include') diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h new file mode 100644 index 00000000000..6d8cab22e29 --- /dev/null +++ b/include/asm-generic/uaccess.h @@ -0,0 +1,325 @@ +#ifndef __ASM_GENERIC_UACCESS_H +#define __ASM_GENERIC_UACCESS_H + +/* + * User space memory access functions, these should work + * on a ny machine that has kernel and user data in the same + * address space, e.g. all NOMMU machines. + */ +#include +#include +#include + +#include + +#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) + +#ifndef KERNEL_DS +#define KERNEL_DS MAKE_MM_SEG(~0UL) +#endif + +#ifndef USER_DS +#define USER_DS MAKE_MM_SEG(TASK_SIZE - 1) +#endif + +#ifndef get_fs +#define get_ds() (KERNEL_DS) +#define get_fs() (current_thread_info()->addr_limit) + +static inline void set_fs(mm_segment_t fs) +{ + current_thread_info()->addr_limit = fs; +} +#endif + +#define segment_eq(a, b) ((a).seg == (b).seg) + +#define VERIFY_READ 0 +#define VERIFY_WRITE 1 + +#define access_ok(type, addr, size) __access_ok((unsigned long)(addr),(size)) + +/* + * The architecture should really override this if possible, at least + * doing a check on the get_fs() + */ +#ifndef __access_ok +static inline int __access_ok(unsigned long addr, unsigned long size) +{ + return 1; +} +#endif + +/* + * The exception table consists of pairs of addresses: the first is the + * address of an instruction that is allowed to fault, and the second is + * the address at which the program should continue. No registers are + * modified, so it is entirely up to the continuation code to figure out + * what to do. + * + * All the routines below use bits of fixup code that are out of line + * with the main instruction path. This means when everything is well, + * we don't even have to jump over them. Further, they do not intrude + * on our cache or tlb entries. + */ + +struct exception_table_entry +{ + unsigned long insn, fixup; +}; + +/* Returns 0 if exception not found and fixup otherwise. */ +extern unsigned long search_exception_table(unsigned long); + +/* + * architectures with an MMU should override these two + */ +#ifndef __copy_from_user +static inline __must_check long __copy_from_user(void *to, + const void __user * from, unsigned long n) +{ + if (__builtin_constant_p(n)) { + switch(n) { + case 1: + *(u8 *)to = *(u8 __force *)from; + return 0; + case 2: + *(u16 *)to = *(u16 __force *)from; + return 0; + case 4: + *(u32 *)to = *(u32 __force *)from; + return 0; +#ifdef CONFIG_64BIT + case 8: + *(u64 *)to = *(u64 __force *)from; + return 0; +#endif + default: + break; + } + } + + memcpy(to, (const void __force *)from, n); + return 0; +} +#endif + +#ifndef __copy_to_user +static inline __must_check long __copy_to_user(void __user *to, + const void *from, unsigned long n) +{ + if (__builtin_constant_p(n)) { + switch(n) { + case 1: + *(u8 __force *)to = *(u8 *)from; + return 0; + case 2: + *(u16 __force *)to = *(u16 *)from; + return 0; + case 4: + *(u32 __force *)to = *(u32 *)from; + return 0; +#ifdef CONFIG_64BIT + case 8: + *(u64 __force *)to = *(u64 *)from; + return 0; +#endif + default: + break; + } + } + + memcpy((void __force *)to, from, n); + return 0; +} +#endif + +/* + * These are the main single-value transfer routines. They automatically + * use the right size if we just have the right pointer type. + * This version just falls back to copy_{from,to}_user, which should + * provide a fast-path for small values. + */ +#define __put_user(x, ptr) \ +({ \ + __typeof__(*(ptr)) __x = (x); \ + int __pu_err = -EFAULT; \ + __chk_user_ptr(ptr); \ + switch (sizeof (*(ptr))) { \ + case 1: \ + case 2: \ + case 4: \ + case 8: \ + __pu_err = __put_user_fn(sizeof (*(ptr)), \ + ptr, &__x); \ + break; \ + default: \ + __put_user_bad(); \ + break; \ + } \ + __pu_err; \ +}) + +#define put_user(x, ptr) \ +({ \ + might_sleep(); \ + __access_ok(ptr, sizeof (*ptr)) ? \ + __put_user(x, ptr) : \ + -EFAULT; \ +}) + +static inline int __put_user_fn(size_t size, void __user *ptr, void *x) +{ + size = __copy_to_user(ptr, x, size); + return size ? -EFAULT : size; +} + +extern int __put_user_bad(void) __attribute__((noreturn)); + +#define __get_user(x, ptr) \ +({ \ + int __gu_err = -EFAULT; \ + __chk_user_ptr(ptr); \ + switch (sizeof(*(ptr))) { \ + case 1: { \ + unsigned char __x; \ + __gu_err = __get_user_fn(sizeof (*(ptr)), \ + ptr, &__x); \ + (x) = *(__force __typeof__(*(ptr)) *) &__x; \ + break; \ + }; \ + case 2: { \ + unsigned short __x; \ + __gu_err = __get_user_fn(sizeof (*(ptr)), \ + ptr, &__x); \ + (x) = *(__force __typeof__(*(ptr)) *) &__x; \ + break; \ + }; \ + case 4: { \ + unsigned int __x; \ + __gu_err = __get_user_fn(sizeof (*(ptr)), \ + ptr, &__x); \ + (x) = *(__force __typeof__(*(ptr)) *) &__x; \ + break; \ + }; \ + case 8: { \ + unsigned long long __x; \ + __gu_err = __get_user_fn(sizeof (*(ptr)), \ + ptr, &__x); \ + (x) = *(__force __typeof__(*(ptr)) *) &__x; \ + break; \ + }; \ + default: \ + __get_user_bad(); \ + break; \ + } \ + __gu_err; \ +}) + +#define get_user(x, ptr) \ +({ \ + might_sleep(); \ + __access_ok(ptr, sizeof (*ptr)) ? \ + __get_user(x, ptr) : \ + -EFAULT; \ +}) + +static inline int __get_user_fn(size_t size, const void __user *ptr, void *x) +{ + size = __copy_from_user(x, ptr, size); + return size ? -EFAULT : size; +} + +extern int __get_user_bad(void) __attribute__((noreturn)); + +#ifndef __copy_from_user_inatomic +#define __copy_from_user_inatomic __copy_from_user +#endif + +#ifndef __copy_to_user_inatomic +#define __copy_to_user_inatomic __copy_to_user +#endif + +static inline long copy_from_user(void *to, + const void __user * from, unsigned long n) +{ + might_sleep(); + if (__access_ok(from, n)) + return __copy_from_user(to, from, n); + else + return n; +} + +static inline long copy_to_user(void __user *to, + const void *from, unsigned long n) +{ + might_sleep(); + if (__access_ok(to, n)) + return __copy_to_user(to, from, n); + else + return n; +} + +/* + * Copy a null terminated string from userspace. + */ +#ifndef __strncpy_from_user +static inline long +__strncpy_from_user(char *dst, const char __user *src, long count) +{ + char *tmp; + strncpy(dst, (const char __force *)src, count); + for (tmp = dst; *tmp && count > 0; tmp++, count--) + ; + return (tmp - dst); +} +#endif + +static inline long +strncpy_from_user(char *dst, const char __user *src, long count) +{ + if (!__access_ok(src, 1)) + return -EFAULT; + return __strncpy_from_user(dst, src, count); +} + +/* + * Return the size of a string (including the ending 0) + * + * Return 0 on exception, a value greater than N if too long + */ +#ifndef strnlen_user +static inline long strnlen_user(const char __user *src, long n) +{ + return strlen((void * __force)src) + 1; +} +#endif + +static inline long strlen_user(const char __user *src) +{ + return strnlen_user(src, 32767); +} + +/* + * Zero Userspace + */ +#ifndef __clear_user +static inline __must_check unsigned long +__clear_user(void __user *to, unsigned long n) +{ + memset((void __force *)to, 0, n); + return 0; +} +#endif + +static inline __must_check unsigned long +clear_user(void __user *to, unsigned long n) +{ + might_sleep(); + if (!__access_ok(to, n)) + return n; + + return __clear_user(to, n); +} + +#endif /* __ASM_GENERIC_UACCESS_H */ -- cgit v1.2.3-70-g09d2 From 26a28fa4fea5b8c65713aa50c124f76a88c7924d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2009 22:56:38 +0000 Subject: add generic lib/checksum.c Add a generic (unoptimized) implementation of checksum.c in pure C for use by all architectures that cannot be bother with implementing their own version. Based on microblaze code by Michal Simek Cc: Michal Simek Signed-off-by: Remis Lima Baima Signed-off-by: Arnd Bergmann --- include/asm-generic/checksum.h | 79 +++++++++++++++++ lib/Makefile | 2 + lib/checksum.c | 193 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 274 insertions(+) create mode 100644 include/asm-generic/checksum.h create mode 100644 lib/checksum.c (limited to 'include') diff --git a/include/asm-generic/checksum.h b/include/asm-generic/checksum.h new file mode 100644 index 00000000000..4647c762d97 --- /dev/null +++ b/include/asm-generic/checksum.h @@ -0,0 +1,79 @@ +#ifndef __ASM_GENERIC_CHECKSUM_H +#define __ASM_GENERIC_CHECKSUM_H + +/* + * computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic + * + * this function must be called with even lengths, except + * for the last fragment, which may be odd + * + * it's best to have buff aligned on a 32-bit boundary + */ +extern __wsum csum_partial(const void *buff, int len, __wsum sum); + +/* + * the same as csum_partial, but copies from src while it + * checksums + * + * here even more important to align src and dst on a 32-bit (or even + * better 64-bit) boundary + */ +extern __wsum csum_partial_copy(const void *src, void *dst, int len, __wsum sum); + +/* + * the same as csum_partial_copy, but copies from user space. + * + * here even more important to align src and dst on a 32-bit (or even + * better 64-bit) boundary + */ +extern __wsum csum_partial_copy_from_user(const void __user *src, void *dst, + int len, __wsum sum, int *csum_err); + +#define csum_partial_copy_nocheck(src, dst, len, sum) \ + csum_partial_copy((src), (dst), (len), (sum)) + +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. + */ +extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl); + +/* + * Fold a partial checksum + */ +static inline __sum16 csum_fold(__wsum csum) +{ + u32 sum = (__force u32)csum; + sum = (sum & 0xffff) + (sum >> 16); + sum = (sum & 0xffff) + (sum >> 16); + return (__force __sum16)~sum; +} + +#ifndef csum_tcpudp_nofold +/* + * computes the checksum of the TCP/UDP pseudo-header + * returns a 16-bit checksum, already complemented + */ +extern __wsum +csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len, + unsigned short proto, __wsum sum); +#endif + +static inline __sum16 +csum_tcpudp_magic(__be32 saddr, __be32 daddr, unsigned short len, + unsigned short proto, __wsum sum) +{ + return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); +} + +/* + * this routine is used for miscellaneous IP-like checksums, mainly + * in icmp.c + */ +extern __sum16 ip_compute_csum(const void *buff, int len); + +#endif /* __ASM_GENERIC_CHECKSUM_H */ diff --git a/lib/Makefile b/lib/Makefile index 33a40e40e3e..b6869e12e74 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -92,6 +92,8 @@ obj-$(CONFIG_NLATTR) += nlattr.o obj-$(CONFIG_DMA_API_DEBUG) += dma-debug.o +obj-$(CONFIG_GENERIC_CSUM) += checksum.o + hostprogs-y := gen_crc32table clean-files := crc32table.h diff --git a/lib/checksum.c b/lib/checksum.c new file mode 100644 index 00000000000..12e5a1c91cd --- /dev/null +++ b/lib/checksum.c @@ -0,0 +1,193 @@ +/* + * + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * IP/TCP/UDP checksumming routines + * + * Authors: Jorge Cwik, + * Arnt Gulbrandsen, + * Tom May, + * Andreas Schwab, + * Lots of code moved from tcp.c and ip.c; see those files + * for more names. + * + * 03/02/96 Jes Sorensen, Andreas Schwab, Roman Hodek: + * Fixed some nasty bugs, causing some horrible crashes. + * A: At some points, the sum (%0) was used as + * length-counter instead of the length counter + * (%1). Thanks to Roman Hodek for pointing this out. + * B: GCC seems to mess up if one uses too many + * data-registers to hold input values and one tries to + * specify d0 and d1 as scratch registers. Letting gcc + * choose these registers itself solves the problem. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* Revised by Kenneth Albanowski for m68knommu. Basic problem: unaligned access + kills, so most of the assembly has to go. */ + +#include +#include + +#include + +static inline unsigned short from32to16(unsigned long x) +{ + /* add up 16-bit and 16-bit for 16+c bit */ + x = (x & 0xffff) + (x >> 16); + /* add up carry.. */ + x = (x & 0xffff) + (x >> 16); + return x; +} + +static unsigned int do_csum(const unsigned char *buff, int len) +{ + int odd, count; + unsigned long result = 0; + + if (len <= 0) + goto out; + odd = 1 & (unsigned long) buff; + if (odd) { + result = *buff; + len--; + buff++; + } + count = len >> 1; /* nr of 16-bit words.. */ + if (count) { + if (2 & (unsigned long) buff) { + result += *(unsigned short *) buff; + count--; + len -= 2; + buff += 2; + } + count >>= 1; /* nr of 32-bit words.. */ + if (count) { + unsigned long carry = 0; + do { + unsigned long w = *(unsigned long *) buff; + count--; + buff += 4; + result += carry; + result += w; + carry = (w > result); + } while (count); + result += carry; + result = (result & 0xffff) + (result >> 16); + } + if (len & 2) { + result += *(unsigned short *) buff; + buff += 2; + } + } + if (len & 1) + result += (*buff << 8); + result = from32to16(result); + if (odd) + result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); +out: + return result; +} + +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. + */ +__sum16 ip_fast_csum(const void *iph, unsigned int ihl) +{ + return (__force __sum16)~do_csum(iph, ihl*4); +} +EXPORT_SYMBOL(ip_fast_csum); + +/* + * computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic + * + * this function must be called with even lengths, except + * for the last fragment, which may be odd + * + * it's best to have buff aligned on a 32-bit boundary + */ +__wsum csum_partial(const void *buff, int len, __wsum wsum) +{ + unsigned int sum = (__force unsigned int)wsum; + unsigned int result = do_csum(buff, len); + + /* add in old sum, and carry.. */ + result += sum; + if (sum > result) + result += 1; + return (__force __wsum)result; +} +EXPORT_SYMBOL(csum_partial); + +/* + * this routine is used for miscellaneous IP-like checksums, mainly + * in icmp.c + */ +__sum16 ip_compute_csum(const void *buff, int len) +{ + return (__force __sum16)~do_csum(buff, len); +} +EXPORT_SYMBOL(ip_compute_csum); + +/* + * copy from fs while checksumming, otherwise like csum_partial + */ +__wsum +csum_partial_copy_from_user(const void __user *src, void *dst, int len, + __wsum sum, int *csum_err) +{ + int missing; + + missing = __copy_from_user(dst, src, len); + if (missing) { + memset(dst + len - missing, 0, missing); + *csum_err = -EFAULT; + } else + *csum_err = 0; + + return csum_partial(dst, len, sum); +} +EXPORT_SYMBOL(csum_partial_copy_from_user); + +/* + * copy from ds while checksumming, otherwise like csum_partial + */ +__wsum +csum_partial_copy(const void *src, void *dst, int len, __wsum sum) +{ + memcpy(dst, src, len); + return csum_partial(dst, len, sum); +} +EXPORT_SYMBOL(csum_partial_copy); + +#ifndef csum_tcpudp_nofold +__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, + unsigned short len, + unsigned short proto, + __wsum sum) +{ + unsigned long long s = (__force u32)sum; + + s += (__force u32)saddr; + s += (__force u32)daddr; +#ifdef __BIG_ENDIAN + s += proto + len; +#else + s += (proto + len) << 8; +#endif + s += (s >> 32); + return (__force __wsum)s; +} +EXPORT_SYMBOL(csum_tcpudp_nofold); +#endif -- cgit v1.2.3-70-g09d2 From 04846b5b8112e53b588038349b3e92b8485c1807 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Mon, 20 Apr 2009 10:54:52 +0900 Subject: PCI MSI: Remove unused/obsolete macros and definitions Impact: cleanup, spec compliance This patch does: - Remove unused msi/msix_enable/disable macros. User should use msi/msix_set_enable() functions instead. - Remove unused msix_mask/unmask/pending macros. These macros are useless because they are not based on any of the PCI Local Bus Specifications properly. It seems that they were written based on a draft of PCI spec, and that the draft was the MSI-X ECN that underwent membership review in September 2002. (* In the draft, the size of a entry in MSI-X table was 64bit, containing 32bit message data and DWORD aligned lower address plus a pending bit and a mask bit.(30+1+1bit) The higher address was placed in MSI-X capability structure and shared by all entries.) - Remove PCI_MSIX_FLAGS_BITMASK. This definition also come from the draft ECN. Signed-off-by: Hidetoshi Seto Reviewed-by: Matthew Wilcox Signed-off-by: Jesse Barnes --- drivers/pci/msi.h | 8 +------- include/linux/pci_regs.h | 1 - 2 files changed, 1 insertion(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/pci/msi.h b/drivers/pci/msi.h index 71f4df2ef65..4fed5926195 100644 --- a/drivers/pci/msi.h +++ b/drivers/pci/msi.h @@ -19,18 +19,12 @@ ( (is64bit == 1) ? base+PCI_MSI_DATA_64 : base+PCI_MSI_DATA_32 ) #define msi_mask_bits_reg(base, is64bit) \ ( (is64bit == 1) ? base+PCI_MSI_MASK_BIT : base+PCI_MSI_MASK_BIT-4) -#define msi_disable(control) control &= ~PCI_MSI_FLAGS_ENABLE #define is_64bit_address(control) (!!(control & PCI_MSI_FLAGS_64BIT)) #define is_mask_bit_support(control) (!!(control & PCI_MSI_FLAGS_MASKBIT)) #define msix_table_offset_reg(base) (base + 0x04) #define msix_pba_offset_reg(base) (base + 0x08) -#define msix_enable(control) control |= PCI_MSIX_FLAGS_ENABLE -#define msix_disable(control) control &= ~PCI_MSIX_FLAGS_ENABLE #define msix_table_size(control) ((control & PCI_MSIX_FLAGS_QSIZE)+1) -#define multi_msix_capable msix_table_size -#define msix_unmask(address) (address & ~PCI_MSIX_FLAGS_BITMASK) -#define msix_mask(address) (address | PCI_MSIX_FLAGS_BITMASK) -#define msix_is_pending(address) (address & PCI_MSIX_FLAGS_PENDMASK) +#define multi_msix_capable(control) msix_table_size((control)) #endif /* MSI_H */ diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index 616bf8b3c8b..dcba7668e0c 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -304,7 +304,6 @@ #define PCI_MSIX_FLAGS_ENABLE (1 << 15) #define PCI_MSIX_FLAGS_MASKALL (1 << 14) #define PCI_MSIX_FLAGS_BIRMASK (7 << 0) -#define PCI_MSIX_FLAGS_BITMASK (1 << 0) /* CompactPCI Hotswap Register */ -- cgit v1.2.3-70-g09d2 From 67b5db6502ddd27d65dea43bf036abbd82d0dfc9 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Mon, 20 Apr 2009 10:54:59 +0900 Subject: PCI MSI: Define PCI_MSI_MASK_32/64 Impact: cleanup, improve readability Define PCI_MSI_MASK_32/64 for 32/64bit devices, instead of using implicit offset (-4), "PCI_MSI_MASK_BIT - 4" and "PCI_MSI_MASK_BIT". Signed-off-by: Hidetoshi Seto Reviewed-by: Matthew Wilcox Signed-off-by: Jesse Barnes --- drivers/pci/msi.c | 2 +- drivers/pci/msi.h | 6 +++--- include/linux/pci_regs.h | 3 ++- 3 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 362773247fb..7ffac27d5d4 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -381,7 +381,7 @@ static int msi_capability_init(struct pci_dev *dev, int nvec) entry->msi_attrib.default_irq = dev->irq; /* Save IOAPIC IRQ */ entry->msi_attrib.pos = pos; - entry->mask_pos = msi_mask_bits_reg(pos, entry->msi_attrib.is_64); + entry->mask_pos = msi_mask_reg(pos, entry->msi_attrib.is_64); /* All MSIs are unmasked by default, Mask them all */ if (entry->msi_attrib.maskbit) pci_read_config_dword(dev, entry->mask_pos, &entry->masked); diff --git a/drivers/pci/msi.h b/drivers/pci/msi.h index 4fed5926195..a0662842550 100644 --- a/drivers/pci/msi.h +++ b/drivers/pci/msi.h @@ -16,9 +16,9 @@ #define msi_lower_address_reg(base) (base + PCI_MSI_ADDRESS_LO) #define msi_upper_address_reg(base) (base + PCI_MSI_ADDRESS_HI) #define msi_data_reg(base, is64bit) \ - ( (is64bit == 1) ? base+PCI_MSI_DATA_64 : base+PCI_MSI_DATA_32 ) -#define msi_mask_bits_reg(base, is64bit) \ - ( (is64bit == 1) ? base+PCI_MSI_MASK_BIT : base+PCI_MSI_MASK_BIT-4) + (base + ((is64bit == 1) ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32)) +#define msi_mask_reg(base, is64bit) \ + (base + ((is64bit == 1) ? PCI_MSI_MASK_64 : PCI_MSI_MASK_32)) #define is_64bit_address(control) (!!(control & PCI_MSI_FLAGS_64BIT)) #define is_mask_bit_support(control) (!!(control & PCI_MSI_FLAGS_MASKBIT)) diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index dcba7668e0c..83b02f5a25b 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -295,8 +295,9 @@ #define PCI_MSI_ADDRESS_LO 4 /* Lower 32 bits */ #define PCI_MSI_ADDRESS_HI 8 /* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */ #define PCI_MSI_DATA_32 8 /* 16 bits of data for 32-bit devices */ +#define PCI_MSI_MASK_32 12 /* Mask bits register for 32-bit devices */ #define PCI_MSI_DATA_64 12 /* 16 bits of data for 64-bit devices */ -#define PCI_MSI_MASK_BIT 16 /* Mask bits register */ +#define PCI_MSI_MASK_64 16 /* Mask bits register for 64-bit devices */ /* MSI-X registers (these are at offset PCI_MSIX_FLAGS) */ #define PCI_MSIX_FLAGS 2 -- cgit v1.2.3-70-g09d2 From 1f82de10d6b1d845155363c895c552e61b36b51a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 23 Apr 2009 20:48:32 -0700 Subject: PCI/x86: don't assume prefetchable ranges are 64bit We should not assign 64bit ranges to PCI devices that only take 32bit prefetchable addresses. Try to set IORESOURCE_MEM_64 in 64bit resource of pci_device/pci_bridge and make the bus resource only have that bit set when all devices under it support 64bit prefetchable memory. Use that flag to allocate resources from that range. Reported-by: Yannick Reviewed-by: Ivan Kokshaysky Signed-off-by: Yinghai Lu Signed-off-by: Jesse Barnes --- arch/x86/include/asm/pci.h | 1 + drivers/pci/bus.c | 7 ++++++- drivers/pci/probe.c | 9 ++++++-- drivers/pci/setup-bus.c | 52 +++++++++++++++++++++++++++++++++++++--------- include/linux/ioport.h | 2 ++ include/linux/pci.h | 4 ++++ 6 files changed, 62 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index b51a1e8b0ba..927958d13c1 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -130,6 +130,7 @@ extern void pci_iommu_alloc(void); /* generic pci stuff */ #include +#define PCIBIOS_MAX_MEM_32 0xffffffff #ifdef CONFIG_NUMA /* Returns the node based on pci bus */ diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 97a8194063b..40af27f3104 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -41,9 +41,14 @@ pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res, void *alignf_data) { int i, ret = -ENOMEM; + resource_size_t max = -1; type_mask |= IORESOURCE_IO | IORESOURCE_MEM; + /* don't allocate too high if the pref mem doesn't support 64bit*/ + if (!(res->flags & IORESOURCE_MEM_64)) + max = PCIBIOS_MAX_MEM_32; + for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) { struct resource *r = bus->resource[i]; if (!r) @@ -62,7 +67,7 @@ pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res, /* Ok, try it out.. */ ret = allocate_resource(r, res, size, r->start ? : min, - -1, align, + max, align, alignf, alignf_data); if (ret == 0) break; diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index f1ae2475fff..b962326e3d9 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -193,7 +193,7 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, res->flags |= pci_calc_resource_flags(l) | IORESOURCE_SIZEALIGN; if (type == pci_bar_io) { l &= PCI_BASE_ADDRESS_IO_MASK; - mask = PCI_BASE_ADDRESS_IO_MASK & 0xffff; + mask = PCI_BASE_ADDRESS_IO_MASK & IO_SPACE_LIMIT; } else { l &= PCI_BASE_ADDRESS_MEM_MASK; mask = (u32)PCI_BASE_ADDRESS_MEM_MASK; @@ -237,6 +237,8 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, dev_printk(KERN_DEBUG, &dev->dev, "reg %x 64bit mmio: %pR\n", pos, res); } + + res->flags |= IORESOURCE_MEM_64; } else { sz = pci_size(l, sz, mask); @@ -362,7 +364,10 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child) } } if (base <= limit) { - res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM | IORESOURCE_PREFETCH; + res->flags = (mem_base_lo & PCI_PREF_RANGE_TYPE_MASK) | + IORESOURCE_MEM | IORESOURCE_PREFETCH; + if (res->flags & PCI_PREF_RANGE_TYPE_64) + res->flags |= IORESOURCE_MEM_64; res->start = base; res->end = limit + 0xfffff; dev_printk(KERN_DEBUG, &dev->dev, "bridge %sbit mmio pref: %pR\n", diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index a00f85471b6..e1c360a5b0d 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -143,6 +143,7 @@ static void pci_setup_bridge(struct pci_bus *bus) struct pci_dev *bridge = bus->self; struct pci_bus_region region; u32 l, bu, lu, io_upper16; + int pref_mem64; if (pci_is_enabled(bridge)) return; @@ -198,16 +199,22 @@ static void pci_setup_bridge(struct pci_bus *bus) pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, 0); /* Set up PREF base/limit. */ + pref_mem64 = 0; bu = lu = 0; pcibios_resource_to_bus(bridge, ®ion, bus->resource[2]); if (bus->resource[2]->flags & IORESOURCE_PREFETCH) { + int width = 8; l = (region.start >> 16) & 0xfff0; l |= region.end & 0xfff00000; - bu = upper_32_bits(region.start); - lu = upper_32_bits(region.end); - dev_info(&bridge->dev, " PREFETCH window: %#016llx-%#016llx\n", - (unsigned long long)region.start, - (unsigned long long)region.end); + if (bus->resource[2]->flags & IORESOURCE_MEM_64) { + pref_mem64 = 1; + bu = upper_32_bits(region.start); + lu = upper_32_bits(region.end); + width = 16; + } + dev_info(&bridge->dev, " PREFETCH window: %#0*llx-%#0*llx\n", + width, (unsigned long long)region.start, + width, (unsigned long long)region.end); } else { l = 0x0000fff0; @@ -215,9 +222,11 @@ static void pci_setup_bridge(struct pci_bus *bus) } pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, l); - /* Set the upper 32 bits of PREF base & limit. */ - pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, bu); - pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, lu); + if (pref_mem64) { + /* Set the upper 32 bits of PREF base & limit. */ + pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, bu); + pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, lu); + } pci_write_config_word(bridge, PCI_BRIDGE_CONTROL, bus->bridge_ctl); } @@ -255,8 +264,25 @@ static void pci_bridge_check_ranges(struct pci_bus *bus) pci_read_config_dword(bridge, PCI_PREF_MEMORY_BASE, &pmem); pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, 0x0); } - if (pmem) + if (pmem) { b_res[2].flags |= IORESOURCE_MEM | IORESOURCE_PREFETCH; + if ((pmem & PCI_PREF_RANGE_TYPE_MASK) == PCI_PREF_RANGE_TYPE_64) + b_res[2].flags |= IORESOURCE_MEM_64; + } + + /* double check if bridge does support 64 bit pref */ + if (b_res[2].flags & IORESOURCE_MEM_64) { + u32 mem_base_hi, tmp; + pci_read_config_dword(bridge, PCI_PREF_BASE_UPPER32, + &mem_base_hi); + pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, + 0xffffffff); + pci_read_config_dword(bridge, PCI_PREF_BASE_UPPER32, &tmp); + if (!tmp) + b_res[2].flags &= ~IORESOURCE_MEM_64; + pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, + mem_base_hi); + } } /* Helper function for sizing routines: find first available @@ -336,6 +362,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long resource_size_t aligns[12]; /* Alignments from 1Mb to 2Gb */ int order, max_order; struct resource *b_res = find_free_bus_resource(bus, type); + unsigned int mem64_mask = 0; if (!b_res) return 0; @@ -344,9 +371,12 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long max_order = 0; size = 0; + mem64_mask = b_res->flags & IORESOURCE_MEM_64; + b_res->flags &= ~IORESOURCE_MEM_64; + list_for_each_entry(dev, &bus->devices, bus_list) { int i; - + for (i = 0; i < PCI_NUM_RESOURCES; i++) { struct resource *r = &dev->resource[i]; resource_size_t r_size; @@ -372,6 +402,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long aligns[order] += align; if (order > max_order) max_order = order; + mem64_mask &= r->flags & IORESOURCE_MEM_64; } } @@ -396,6 +427,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long b_res->start = min_align; b_res->end = size + min_align - 1; b_res->flags |= IORESOURCE_STARTALIGN; + b_res->flags |= mem64_mask; return 1; } diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 32e4b2f7229..786e7b8cece 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -49,6 +49,8 @@ struct resource_list { #define IORESOURCE_SIZEALIGN 0x00020000 /* size indicates alignment */ #define IORESOURCE_STARTALIGN 0x00040000 /* start field is alignment */ +#define IORESOURCE_MEM_64 0x00100000 + #define IORESOURCE_EXCLUSIVE 0x08000000 /* Userland may not map this resource */ #define IORESOURCE_DISABLED 0x10000000 #define IORESOURCE_UNSET 0x20000000 diff --git a/include/linux/pci.h b/include/linux/pci.h index 72698d89e76..6dfa47d25ba 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1097,6 +1097,10 @@ static inline struct pci_dev *pci_get_bus_and_slot(unsigned int bus, #include +#ifndef PCIBIOS_MAX_MEM_32 +#define PCIBIOS_MAX_MEM_32 (-1) +#endif + /* these helpers provide future and backwards compatibility * for accessing popular PCI BAR info */ #define pci_resource_start(dev, bar) ((dev)->resource[(bar)].start) -- cgit v1.2.3-70-g09d2 From 3b073eda9557975a87a27b08a46a545fe8da66fb Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Tue, 31 Mar 2009 09:24:22 -0600 Subject: PCI: remove deprecated pci_find_slot() interface The last in-tree caller of pci_find_slot has been converted, so let's get rid of this deprecated interface. Signed-off-by: Alex Chiang Signed-off-by: Jesse Barnes --- drivers/pci/search.c | 30 ------------------------------ include/linux/pci.h | 8 -------- 2 files changed, 38 deletions(-) (limited to 'include') diff --git a/drivers/pci/search.c b/drivers/pci/search.c index 710d4ea6956..650bc0a538d 100644 --- a/drivers/pci/search.c +++ b/drivers/pci/search.c @@ -114,36 +114,6 @@ pci_find_next_bus(const struct pci_bus *from) } #ifdef CONFIG_PCI_LEGACY -/** - * pci_find_slot - locate PCI device from a given PCI slot - * @bus: number of PCI bus on which desired PCI device resides - * @devfn: encodes number of PCI slot in which the desired PCI - * device resides and the logical device number within that slot - * in case of multi-function devices. - * - * Given a PCI bus and slot/function number, the desired PCI device - * is located in system global list of PCI devices. If the device - * is found, a pointer to its data structure is returned. If no - * device is found, %NULL is returned. - * - * NOTE: Do not use this function any more; use pci_get_slot() instead, as - * the PCI device returned by this function can disappear at any moment in - * time. - */ -struct pci_dev *pci_find_slot(unsigned int bus, unsigned int devfn) -{ - struct pci_dev *dev = NULL; - - while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - if (dev->bus->number == bus && dev->devfn == devfn) { - pci_dev_put(dev); - return dev; - } - } - return NULL; -} -EXPORT_SYMBOL(pci_find_slot); - /** * pci_find_device - begin or continue searching for a PCI device by vendor/device id * @vendor: PCI vendor id to match, or %PCI_ANY_ID to match all vendor ids diff --git a/include/linux/pci.h b/include/linux/pci.h index 6dfa47d25ba..19ee92c53ef 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -599,8 +599,6 @@ extern void pci_sort_breadthfirst(void); struct pci_dev __deprecated *pci_find_device(unsigned int vendor, unsigned int device, struct pci_dev *from); -struct pci_dev __deprecated *pci_find_slot(unsigned int bus, - unsigned int devfn); #endif /* CONFIG_PCI_LEGACY */ enum pci_lost_interrupt_reason { @@ -936,12 +934,6 @@ static inline struct pci_dev *pci_find_device(unsigned int vendor, return NULL; } -static inline struct pci_dev *pci_find_slot(unsigned int bus, - unsigned int devfn) -{ - return NULL; -} - static inline struct pci_dev *pci_get_device(unsigned int vendor, unsigned int device, struct pci_dev *from) -- cgit v1.2.3-70-g09d2 From 43c16408842b0eeb367c23a6fa540ce69f99e347 Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Wed, 22 Apr 2009 16:52:09 -0600 Subject: PCI: Add support for turning PCIe ECRC on or off Adds support for PCI Express transaction layer end-to-end CRC checking (ECRC). This patch will enable/disable ECRC checking by setting/clearing the ECRC Check Enable and/or ECRC Generation Enable bits for devices that support ECRC. The ECRC setting is controlled by the "pci=ecrc=" command-line option. If this option is not set or is set to 'bios", the enable and generation bits are left in whatever state that firmware/BIOS set them to. The "off" setting turns them off, and the "on" option turns them on (if the device supports it). Turning ECRC on or off can be a data integrity versus performance tradeoff. In theory, turning it on will catch more data errors, turning it off means possibly better performance since CRC does not need to be calculated by the PCIe hardware and packet sizes are reduced. Signed-off-by: Andrew Patterson Signed-off-by: Jesse Barnes --- Documentation/kernel-parameters.txt | 6 ++ drivers/pci/pci.c | 2 + drivers/pci/pcie/aer/Kconfig | 13 ++++ drivers/pci/pcie/aer/Makefile | 2 + drivers/pci/pcie/aer/aerdrv_core.c | 16 +++-- drivers/pci/pcie/aer/ecrc.c | 131 ++++++++++++++++++++++++++++++++++++ include/linux/pci.h | 11 +++ 7 files changed, 174 insertions(+), 7 deletions(-) create mode 100644 drivers/pci/pcie/aer/ecrc.c (limited to 'include') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 7bdaf508040..395d1a013eb 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1824,6 +1824,12 @@ and is between 256 and 4096 characters. It is defined in the file PAGE_SIZE is used as alignment. PCI-PCI bridge can be specified, if resource windows need to be expanded. + ecrc= Enable/disable PCIe ECRC (transaction layer + end-to-end CRC checking). + bios: Use BIOS/firmware settings. This is the + the default. + off: Turn ECRC off + on: Turn ECRC on. pcie_aspm= [PCIE] Forcibly enable or disable PCIe Active State Power Management. diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 761557688b1..56fb18d2cb5 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2588,6 +2588,8 @@ static int __init pci_setup(char *str) } else if (!strncmp(str, "resource_alignment=", 19)) { pci_set_resource_alignment_param(str + 19, strlen(str + 19)); + } else if (!strncmp(str, "ecrc=", 5)) { + pcie_ecrc_get_policy(str + 5); } else { printk(KERN_ERR "PCI: Unknown option `%s'\n", str); diff --git a/drivers/pci/pcie/aer/Kconfig b/drivers/pci/pcie/aer/Kconfig index c3bde588aa1..db4cb950933 100644 --- a/drivers/pci/pcie/aer/Kconfig +++ b/drivers/pci/pcie/aer/Kconfig @@ -10,3 +10,16 @@ config PCIEAER This enables PCI Express Root Port Advanced Error Reporting (AER) driver support. Error reporting messages sent to Root Port will be handled by PCI Express AER driver. + + +# +# PCI Express ECRC +# +config PCIE_ECRC + bool "PCI Express ECRC settings control" + depends on PCIEAER + help + Used to override firmware/bios settings for PCI Express ECRC + (transaction layer end-to-end CRC checking). + + When in doubt, say N. diff --git a/drivers/pci/pcie/aer/Makefile b/drivers/pci/pcie/aer/Makefile index 8da3bd8455a..7f93411c56e 100644 --- a/drivers/pci/pcie/aer/Makefile +++ b/drivers/pci/pcie/aer/Makefile @@ -4,6 +4,8 @@ obj-$(CONFIG_PCIEAER) += aerdriver.o +obj-$(CONFIG_PCIE_ECRC) += ecrc.o + aerdriver-objs := aerdrv_errprint.o aerdrv_core.o aerdrv.o aerdriver-$(CONFIG_ACPI) += aerdrv_acpi.o diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 307452f3003..dd3829e68e3 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -113,15 +113,17 @@ static void set_device_error_reporting(struct pci_dev *dev, void *data) { bool enable = *((bool *)data); - if (dev->pcie_type != PCIE_RC_PORT && - dev->pcie_type != PCIE_SW_UPSTREAM_PORT && - dev->pcie_type != PCIE_SW_DOWNSTREAM_PORT) - return; + if (dev->pcie_type == PCIE_RC_PORT || + dev->pcie_type == PCIE_SW_UPSTREAM_PORT || + dev->pcie_type == PCIE_SW_DOWNSTREAM_PORT) { + if (enable) + pci_enable_pcie_error_reporting(dev); + else + pci_disable_pcie_error_reporting(dev); + } if (enable) - pci_enable_pcie_error_reporting(dev); - else - pci_disable_pcie_error_reporting(dev); + pcie_set_ecrc_checking(dev); } /** diff --git a/drivers/pci/pcie/aer/ecrc.c b/drivers/pci/pcie/aer/ecrc.c new file mode 100644 index 00000000000..ece97df4df6 --- /dev/null +++ b/drivers/pci/pcie/aer/ecrc.c @@ -0,0 +1,131 @@ +/* + * Enables/disables PCIe ECRC checking. + * + * (C) Copyright 2009 Hewlett-Packard Development Company, L.P. + * Andrew Patterson + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307, USA. + * + */ + +#include +#include +#include +#include +#include +#include +#include "../../pci.h" + +#define ECRC_POLICY_DEFAULT 0 /* ECRC set by BIOS */ +#define ECRC_POLICY_OFF 1 /* ECRC off for performance */ +#define ECRC_POLICY_ON 2 /* ECRC on for data integrity */ + +static int ecrc_policy = ECRC_POLICY_DEFAULT; + +static const char *ecrc_policy_str[] = { + [ECRC_POLICY_DEFAULT] = "bios", + [ECRC_POLICY_OFF] = "off", + [ECRC_POLICY_ON] = "on" +}; + +/** + * enable_ercr_checking - enable PCIe ECRC checking for a device + * @dev: the PCI device + * + * Returns 0 on success, or negative on failure. + */ +static int enable_ecrc_checking(struct pci_dev *dev) +{ + int pos; + u32 reg32; + + if (!dev->is_pcie) + return -ENODEV; + + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); + if (!pos) + return -ENODEV; + + pci_read_config_dword(dev, pos + PCI_ERR_CAP, ®32); + if (reg32 & PCI_ERR_CAP_ECRC_GENC) + reg32 |= PCI_ERR_CAP_ECRC_GENE; + if (reg32 & PCI_ERR_CAP_ECRC_CHKC) + reg32 |= PCI_ERR_CAP_ECRC_CHKE; + pci_write_config_dword(dev, pos + PCI_ERR_CAP, reg32); + + return 0; +} + +/** + * disable_ercr_checking - disables PCIe ECRC checking for a device + * @dev: the PCI device + * + * Returns 0 on success, or negative on failure. + */ +static int disable_ecrc_checking(struct pci_dev *dev) +{ + int pos; + u32 reg32; + + if (!dev->is_pcie) + return -ENODEV; + + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); + if (!pos) + return -ENODEV; + + pci_read_config_dword(dev, pos + PCI_ERR_CAP, ®32); + reg32 &= ~(PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE); + pci_write_config_dword(dev, pos + PCI_ERR_CAP, reg32); + + return 0; +} + +/** + * pcie_set_ecrc_checking - set/unset PCIe ECRC checking for a device based on global policy + * @dev: the PCI device + */ +void pcie_set_ecrc_checking(struct pci_dev *dev) +{ + switch (ecrc_policy) { + case ECRC_POLICY_DEFAULT: + return; + case ECRC_POLICY_OFF: + disable_ecrc_checking(dev); + break; + case ECRC_POLICY_ON: + enable_ecrc_checking(dev);; + break; + default: + return; + } +} + +/** + * pcie_ecrc_get_policy - parse kernel command-line ecrc option + */ +void pcie_ecrc_get_policy(char *str) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ecrc_policy_str); i++) + if (!strncmp(str, ecrc_policy_str[i], + strlen(ecrc_policy_str[i]))) + break; + if (i >= ARRAY_SIZE(ecrc_policy_str)) + return; + + ecrc_policy = i; +} diff --git a/include/linux/pci.h b/include/linux/pci.h index 19ee92c53ef..ec03b90d351 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -878,6 +878,17 @@ static inline int pcie_aspm_enabled(void) extern int pcie_aspm_enabled(void); #endif +#ifndef CONFIG_PCIE_ECRC +static inline void pcie_set_ecrc_checking(struct pci_dev *dev) +{ + return; +} +static inline void pcie_ecrc_get_policy(char *str) {}; +#else +extern void pcie_set_ecrc_checking(struct pci_dev *dev); +extern void pcie_ecrc_get_policy(char *str); +#endif + #define pci_enable_msi(pdev) pci_enable_msi_block(pdev, 1) #ifdef CONFIG_HT_IRQ -- cgit v1.2.3-70-g09d2 From 73422811d290c628b4ddbf6830e5cd6fa42e84f1 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Sun, 10 May 2009 16:05:39 -0400 Subject: reiserfs: allow exposing privroot w/ xattrs enabled This patch adds an -oexpose_privroot option to allow access to the privroot. Signed-off-by: Jeff Mahoney Signed-off-by: Al Viro --- fs/reiserfs/dir.c | 10 ++++------ fs/reiserfs/super.c | 1 + fs/reiserfs/xattr.c | 3 ++- include/linux/reiserfs_fs_sb.h | 2 ++ 4 files changed, 9 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index 45ee3d357c7..6d2668fdc38 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c @@ -44,13 +44,11 @@ static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry, static inline bool is_privroot_deh(struct dentry *dir, struct reiserfs_de_head *deh) { - int ret = 0; -#ifdef CONFIG_REISERFS_FS_XATTR struct dentry *privroot = REISERFS_SB(dir->d_sb)->priv_root; - ret = (dir == dir->d_parent && privroot->d_inode && - deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid); -#endif - return ret; + if (reiserfs_expose_privroot(dir->d_sb)) + return 0; + return (dir == dir->d_parent && privroot->d_inode && + deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid); } int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent, diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 3567fb9e3fb..9dbdcfb5d31 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -898,6 +898,7 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin {"conv",.setmask = 1 << REISERFS_CONVERT}, {"attrs",.setmask = 1 << REISERFS_ATTRS}, {"noattrs",.clrmask = 1 << REISERFS_ATTRS}, + {"expose_privroot", .setmask = 1 << REISERFS_EXPOSE_PRIVROOT}, #ifdef CONFIG_REISERFS_FS_XATTR {"user_xattr",.setmask = 1 << REISERFS_XATTRS_USER}, {"nouser_xattr",.clrmask = 1 << REISERFS_XATTRS_USER}, diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 8e7deb0e696..f3d47d85684 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -981,7 +981,8 @@ int reiserfs_lookup_privroot(struct super_block *s) strlen(PRIVROOT_NAME)); if (!IS_ERR(dentry)) { REISERFS_SB(s)->priv_root = dentry; - s->s_root->d_op = &xattr_lookup_poison_ops; + if (!reiserfs_expose_privroot(s)) + s->s_root->d_op = &xattr_lookup_poison_ops; if (dentry->d_inode) dentry->d_inode->i_flags |= S_PRIVATE; } else diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h index 6473650c28f..dab68bbed67 100644 --- a/include/linux/reiserfs_fs_sb.h +++ b/include/linux/reiserfs_fs_sb.h @@ -453,6 +453,7 @@ enum reiserfs_mount_options { REISERFS_ATTRS, REISERFS_XATTRS_USER, REISERFS_POSIXACL, + REISERFS_EXPOSE_PRIVROOT, REISERFS_BARRIER_NONE, REISERFS_BARRIER_FLUSH, @@ -490,6 +491,7 @@ enum reiserfs_mount_options { #define reiserfs_data_writeback(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_WRITEBACK)) #define reiserfs_xattrs_user(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_XATTRS_USER)) #define reiserfs_posixacl(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_POSIXACL)) +#define reiserfs_expose_privroot(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_EXPOSE_PRIVROOT)) #define reiserfs_xattrs_optional(s) (reiserfs_xattrs_user(s) || reiserfs_posixacl(s)) #define reiserfs_barrier_none(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_NONE)) #define reiserfs_barrier_flush(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_FLUSH)) -- cgit v1.2.3-70-g09d2 From 2a737871108de9ba8930f7650d549f1383767f8b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 7 Apr 2009 11:49:53 -0400 Subject: Cache root in nameidata New field: nd->root. When pathname resolution wants to know the root, check if nd->root.mnt is non-NULL; use nd->root if it is, otherwise copy current->fs->root there. After path_walk() is finished, we check if we'd got a cached value in nd->root and drop it. Before calling path_walk() we should either set nd->root.mnt to NULL *or* copy (and pin down) some path to nd->root. In the latter case we won't be looking at current->fs->root at all. Signed-off-by: Al Viro --- fs/namei.c | 53 +++++++++++++++++++++++++++++++++------------------ include/linux/namei.h | 1 + 2 files changed, 35 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/fs/namei.c b/fs/namei.c index 895733efc6b..88baaf2b916 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -552,6 +552,17 @@ static __always_inline int link_path_walk(const char *name, struct nameidata *nd return result; } +static __always_inline void set_root(struct nameidata *nd) +{ + if (!nd->root.mnt) { + struct fs_struct *fs = current->fs; + read_lock(&fs->lock); + nd->root = fs->root; + path_get(&nd->root); + read_unlock(&fs->lock); + } +} + static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) { int res = 0; @@ -560,14 +571,10 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l goto fail; if (*link == '/') { - struct fs_struct *fs = current->fs; - + set_root(nd); path_put(&nd->path); - - read_lock(&fs->lock); - nd->path = fs->root; - path_get(&fs->root); - read_unlock(&fs->lock); + nd->path = nd->root; + path_get(&nd->root); } res = link_path_walk(link, nd); @@ -741,19 +748,16 @@ int follow_down(struct vfsmount **mnt, struct dentry **dentry) static __always_inline void follow_dotdot(struct nameidata *nd) { - struct fs_struct *fs = current->fs; + set_root(nd); while(1) { struct vfsmount *parent; struct dentry *old = nd->path.dentry; - read_lock(&fs->lock); - if (nd->path.dentry == fs->root.dentry && - nd->path.mnt == fs->root.mnt) { - read_unlock(&fs->lock); + if (nd->path.dentry == nd->root.dentry && + nd->path.mnt == nd->root.mnt) { break; } - read_unlock(&fs->lock); spin_lock(&dcache_lock); if (nd->path.dentry != nd->path.mnt->mnt_root) { nd->path.dentry = dget(nd->path.dentry->d_parent); @@ -1022,18 +1026,18 @@ static int path_init(int dfd, const char *name, unsigned int flags, struct namei int retval = 0; int fput_needed; struct file *file; - struct fs_struct *fs = current->fs; nd->last_type = LAST_ROOT; /* if there are only slashes... */ nd->flags = flags; nd->depth = 0; + nd->root.mnt = NULL; if (*name=='/') { - read_lock(&fs->lock); - nd->path = fs->root; - path_get(&fs->root); - read_unlock(&fs->lock); + set_root(nd); + nd->path = nd->root; + path_get(&nd->root); } else if (dfd == AT_FDCWD) { + struct fs_struct *fs = current->fs; read_lock(&fs->lock); nd->path = fs->pwd; path_get(&fs->pwd); @@ -1079,6 +1083,10 @@ static int do_path_lookup(int dfd, const char *name, if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && nd->path.dentry->d_inode)) audit_inode(name, nd->path.dentry); + if (nd->root.mnt) { + path_put(&nd->root); + nd->root.mnt = NULL; + } return retval; } @@ -1115,6 +1123,7 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, nd->last_type = LAST_ROOT; nd->flags = flags; nd->depth = 0; + nd->root.mnt = NULL; nd->path.dentry = dentry; nd->path.mnt = mnt; @@ -1125,8 +1134,12 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, nd->path.dentry->d_inode)) audit_inode(name, nd->path.dentry); - return retval; + if (nd->root.mnt) { + path_put(&nd->root); + nd->root.mnt = NULL; + } + return retval; } /** @@ -1817,6 +1830,8 @@ exit: if (!IS_ERR(nd.intent.open.file)) release_open_intent(&nd); exit_parent: + if (nd.root.mnt) + path_put(&nd.root); path_put(&nd.path); return ERR_PTR(error); diff --git a/include/linux/namei.h b/include/linux/namei.h index 518098fe63a..325dd3ad39a 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -18,6 +18,7 @@ enum { MAX_NESTED_LINKS = 8 }; struct nameidata { struct path path; struct qstr last; + struct path root; unsigned int flags; int last_type; unsigned depth; -- cgit v1.2.3-70-g09d2 From 91c9fa8f75877c0c1e455c23e8f8206c91c8f77f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 18 Apr 2009 02:42:05 -0400 Subject: switch rqst_exp_get_by_name() Signed-off-by: Al Viro --- fs/nfsd/export.c | 15 ++++++--------- fs/nfsd/vfs.c | 32 ++++++++++++++++---------------- include/linux/nfsd/export.h | 3 +-- 3 files changed, 23 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 5149dabde55..84f5e5cb086 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -1240,18 +1240,15 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp) * use exp_get_by_name() or exp_find(). */ struct svc_export * -rqst_exp_get_by_name(struct svc_rqst *rqstp, struct vfsmount *mnt, - struct dentry *dentry) +rqst_exp_get_by_name(struct svc_rqst *rqstp, struct path *path) { struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT); - struct path path = {.mnt = mnt, .dentry = dentry}; if (rqstp->rq_client == NULL) goto gss; /* First try the auth_unix client: */ - exp = exp_get_by_name(rqstp->rq_client, &path, - &rqstp->rq_chandle); + exp = exp_get_by_name(rqstp->rq_client, path, &rqstp->rq_chandle); if (PTR_ERR(exp) == -ENOENT) goto gss; if (IS_ERR(exp)) @@ -1263,8 +1260,7 @@ gss: /* Otherwise, try falling back on gss client */ if (rqstp->rq_gssclient == NULL) return exp; - gssexp = exp_get_by_name(rqstp->rq_gssclient, &path, - &rqstp->rq_chandle); + gssexp = exp_get_by_name(rqstp->rq_gssclient, path, &rqstp->rq_chandle); if (PTR_ERR(gssexp) == -ENOENT) return exp; if (!IS_ERR(exp)) @@ -1307,9 +1303,10 @@ rqst_exp_parent(struct svc_rqst *rqstp, struct vfsmount *mnt, struct dentry *dentry) { struct svc_export *exp; + struct path path = {.mnt = mnt, .dentry = dentry}; dget(dentry); - exp = rqst_exp_get_by_name(rqstp, mnt, dentry); + exp = rqst_exp_get_by_name(rqstp, &path); while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(dentry)) { struct dentry *parent; @@ -1317,7 +1314,7 @@ rqst_exp_parent(struct svc_rqst *rqstp, struct vfsmount *mnt, parent = dget_parent(dentry); dput(dentry); dentry = parent; - exp = rqst_exp_get_by_name(rqstp, mnt, dentry); + exp = rqst_exp_get_by_name(rqstp, &path); } dput(dentry); return exp; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index bd584bcf1d9..d84c4eaa526 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -101,36 +101,36 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, { struct svc_export *exp = *expp, *exp2 = NULL; struct dentry *dentry = *dpp; - struct vfsmount *mnt = mntget(exp->ex_path.mnt); - struct dentry *mounts = dget(dentry); + struct path path = {.mnt = mntget(exp->ex_path.mnt), + .dentry = dget(dentry)}; int err = 0; - while (follow_down(&mnt,&mounts)&&d_mountpoint(mounts)); + while (follow_down(&path.mnt, &path.dentry) && + d_mountpoint(path.dentry)) + ; - exp2 = rqst_exp_get_by_name(rqstp, mnt, mounts); + exp2 = rqst_exp_get_by_name(rqstp, &path); if (IS_ERR(exp2)) { if (PTR_ERR(exp2) != -ENOENT) err = PTR_ERR(exp2); - dput(mounts); - mntput(mnt); + path_put(&path); goto out; } if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) { /* successfully crossed mount point */ /* - * This is subtle: dentry is *not* under mnt at this point. - * The only reason we are safe is that original mnt is pinned - * down by exp, so we should dput before putting exp. + * This is subtle: path.dentry is *not* on path.mnt + * at this point. The only reason we are safe is that + * original mnt is pinned down by exp, so we should + * put path *before* putting exp */ - dput(dentry); - *dpp = mounts; - exp_put(exp); + *dpp = path.dentry; + path.dentry = dentry; *expp = exp2; - } else { - exp_put(exp2); - dput(mounts); + exp2 = exp; } - mntput(mnt); + path_put(&path); + exp_put(exp2); out: return err; } diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h index bcd0201589f..98f6fd584d5 100644 --- a/include/linux/nfsd/export.h +++ b/include/linux/nfsd/export.h @@ -125,8 +125,7 @@ void nfsd_export_flush(void); void exp_readlock(void); void exp_readunlock(void); struct svc_export * rqst_exp_get_by_name(struct svc_rqst *, - struct vfsmount *, - struct dentry *); + struct path *); struct svc_export * rqst_exp_parent(struct svc_rqst *, struct vfsmount *mnt, struct dentry *dentry); -- cgit v1.2.3-70-g09d2 From e64c390ca0b60fd2119331ef1fa888d7ea27e424 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 18 Apr 2009 03:00:46 -0400 Subject: switch rqst_exp_parent() Signed-off-by: Al Viro --- fs/nfsd/export.c | 25 ++++++++++--------------- fs/nfsd/vfs.c | 23 ++++++++++++----------- include/linux/nfsd/export.h | 3 +-- 3 files changed, 23 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 84f5e5cb086..8b1f8efb469 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -1299,24 +1299,19 @@ gss: } struct svc_export * -rqst_exp_parent(struct svc_rqst *rqstp, struct vfsmount *mnt, - struct dentry *dentry) +rqst_exp_parent(struct svc_rqst *rqstp, struct path *path) { - struct svc_export *exp; - struct path path = {.mnt = mnt, .dentry = dentry}; - - dget(dentry); - exp = rqst_exp_get_by_name(rqstp, &path); - - while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(dentry)) { - struct dentry *parent; + struct dentry *saved = dget(path->dentry); + struct svc_export *exp = rqst_exp_get_by_name(rqstp, path); - parent = dget_parent(dentry); - dput(dentry); - dentry = parent; - exp = rqst_exp_get_by_name(rqstp, &path); + while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) { + struct dentry *parent = dget_parent(path->dentry); + dput(path->dentry); + path->dentry = parent; + exp = rqst_exp_get_by_name(rqstp, path); } - dput(dentry); + dput(path->dentry); + path->dentry = saved; return exp; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index d84c4eaa526..9f1ea3127f5 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -169,28 +169,29 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, /* checking mountpoint crossing is very different when stepping up */ struct svc_export *exp2 = NULL; struct dentry *dp; - struct vfsmount *mnt = mntget(exp->ex_path.mnt); - dentry = dget(dparent); - while(dentry == mnt->mnt_root && follow_up(&mnt, &dentry)) + struct path path = {.mnt = mntget(exp->ex_path.mnt), + .dentry = dget(dparent)}; + + while (path.dentry == path.mnt->mnt_root && + follow_up(&path.mnt, &path.dentry)) ; - dp = dget_parent(dentry); - dput(dentry); - dentry = dp; + dp = dget_parent(path.dentry); + dput(path.dentry); + path.dentry = dp; - exp2 = rqst_exp_parent(rqstp, mnt, dentry); + exp2 = rqst_exp_parent(rqstp, &path); if (PTR_ERR(exp2) == -ENOENT) { - dput(dentry); dentry = dget(dparent); } else if (IS_ERR(exp2)) { host_err = PTR_ERR(exp2); - dput(dentry); - mntput(mnt); + path_put(&path); goto out_nfserr; } else { + dentry = dget(path.dentry); exp_put(exp); exp = exp2; } - mntput(mnt); + path_put(&path); } } else { fh_lock(fhp); diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h index 98f6fd584d5..a6d9ef2bb34 100644 --- a/include/linux/nfsd/export.h +++ b/include/linux/nfsd/export.h @@ -127,8 +127,7 @@ void exp_readunlock(void); struct svc_export * rqst_exp_get_by_name(struct svc_rqst *, struct path *); struct svc_export * rqst_exp_parent(struct svc_rqst *, - struct vfsmount *mnt, - struct dentry *dentry); + struct path *); int exp_rootfh(struct auth_domain *, char *path, struct knfsd_fh *, int maxsize); __be32 exp_pseudoroot(struct svc_rqst *, struct svc_fh *); -- cgit v1.2.3-70-g09d2 From bab77ebf51e3902f608ecf08c9d34a0a52ac35a9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 18 Apr 2009 03:26:48 -0400 Subject: switch follow_up() to struct path Signed-off-by: Al Viro --- fs/autofs4/dev-ioctl.c | 2 +- fs/namei.c | 16 ++++++++-------- fs/nfsd/vfs.c | 2 +- include/linux/namei.h | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index f71dac9986f..670407576b2 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -212,7 +212,7 @@ static int find_autofs_mount(const char *pathname, err = 0; } } - if (!follow_up(&path.mnt, &path.dentry)) + if (!follow_up(&path)) break; } path_put(&path); diff --git a/fs/namei.c b/fs/namei.c index 4379ef98970..8c1f48ae68e 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -675,23 +675,23 @@ loop: return err; } -int follow_up(struct vfsmount **mnt, struct dentry **dentry) +int follow_up(struct path *path) { struct vfsmount *parent; struct dentry *mountpoint; spin_lock(&vfsmount_lock); - parent=(*mnt)->mnt_parent; - if (parent == *mnt) { + parent = path->mnt->mnt_parent; + if (parent == path->mnt) { spin_unlock(&vfsmount_lock); return 0; } mntget(parent); - mountpoint=dget((*mnt)->mnt_mountpoint); + mountpoint = dget(path->mnt->mnt_mountpoint); spin_unlock(&vfsmount_lock); - dput(*dentry); - *dentry = mountpoint; - mntput(*mnt); - *mnt = parent; + dput(path->dentry); + path->dentry = mountpoint; + mntput(path->mnt); + path->mnt = parent; return 1; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 9f1ea3127f5..7b2b3f77532 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -173,7 +173,7 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, .dentry = dget(dparent)}; while (path.dentry == path.mnt->mnt_root && - follow_up(&path.mnt, &path.dentry)) + follow_up(&path)) ; dp = dget_parent(path.dentry); dput(path.dentry); diff --git a/include/linux/namei.h b/include/linux/namei.h index 325dd3ad39a..9cd5a717be3 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -79,7 +79,7 @@ extern struct dentry *lookup_one_len(const char *, struct dentry *, int); extern struct dentry *lookup_one_noperm(const char *, struct dentry *); extern int follow_down(struct vfsmount **, struct dentry **); -extern int follow_up(struct vfsmount **, struct dentry **); +extern int follow_up(struct path *); extern struct dentry *lock_rename(struct dentry *, struct dentry *); extern void unlock_rename(struct dentry *, struct dentry *); -- cgit v1.2.3-70-g09d2 From 589ff870ed60a9ebdd5ec99ec3f5afe1282fe151 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 18 Apr 2009 03:28:19 -0400 Subject: Switch collect_mounts() to struct path Signed-off-by: Al Viro --- fs/namespace.c | 4 ++-- include/linux/fs.h | 2 +- kernel/audit_tree.c | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/fs/namespace.c b/fs/namespace.c index 88a904d5aa2..c85962206aa 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1253,11 +1253,11 @@ Enomem: return NULL; } -struct vfsmount *collect_mounts(struct vfsmount *mnt, struct dentry *dentry) +struct vfsmount *collect_mounts(struct path *path) { struct vfsmount *tree; down_write(&namespace_sem); - tree = copy_tree(mnt, dentry, CL_COPY_ALL | CL_PRIVATE); + tree = copy_tree(path->mnt, path->dentry, CL_COPY_ALL | CL_PRIVATE); up_write(&namespace_sem); return tree; } diff --git a/include/linux/fs.h b/include/linux/fs.h index 323b5ce474c..03fb2102b8f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1800,7 +1800,7 @@ extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data); extern int may_umount_tree(struct vfsmount *); extern int may_umount(struct vfsmount *); extern long do_mount(char *, char *, char *, unsigned long, void *); -extern struct vfsmount *collect_mounts(struct vfsmount *, struct dentry *); +extern struct vfsmount *collect_mounts(struct path *); extern void drop_collected_mounts(struct vfsmount *); extern int vfs_statfs(struct dentry *, struct kstatfs *); diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 6e7351739a8..1f6396d7668 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -568,7 +568,7 @@ void audit_trim_trees(void) if (err) goto skip_it; - root_mnt = collect_mounts(path.mnt, path.dentry); + root_mnt = collect_mounts(&path); path_put(&path); if (!root_mnt) goto skip_it; @@ -660,7 +660,7 @@ int audit_add_tree_rule(struct audit_krule *rule) err = kern_path(tree->pathname, 0, &path); if (err) goto Err; - mnt = collect_mounts(path.mnt, path.dentry); + mnt = collect_mounts(&path); path_put(&path); if (!mnt) { err = -ENOMEM; @@ -720,7 +720,7 @@ int audit_tag_tree(char *old, char *new) err = kern_path(new, 0, &path); if (err) return err; - tagged = collect_mounts(path.mnt, path.dentry); + tagged = collect_mounts(&path); path_put(&path); if (!tagged) return -ENOMEM; -- cgit v1.2.3-70-g09d2 From 9393bd07cf218ca51d0e627653f906a9d76a9131 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 18 Apr 2009 13:58:15 -0400 Subject: switch follow_down() Signed-off-by: Al Viro --- fs/afs/mntpt.c | 2 +- fs/autofs/dirhash.c | 5 ++--- fs/autofs4/autofs_i.h | 6 +++--- fs/autofs4/dev-ioctl.c | 2 +- fs/autofs4/expire.c | 15 +++++++-------- fs/autofs4/root.c | 7 +++---- fs/cifs/cifs_dfs_ref.c | 2 +- fs/namei.c | 12 ++++++------ fs/namespace.c | 4 ++-- fs/nfs/namespace.c | 2 +- fs/nfsd/vfs.c | 3 +-- include/linux/namei.h | 2 +- 12 files changed, 29 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 2b9e2d03a39..c52be53f694 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -244,7 +244,7 @@ static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd) case -EBUSY: /* someone else made a mount here whilst we were busy */ while (d_mountpoint(nd->path.dentry) && - follow_down(&nd->path.mnt, &nd->path.dentry)) + follow_down(&nd->path)) ; err = 0; default: diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c index 4eb4d8dfb2f..2316e944a10 100644 --- a/fs/autofs/dirhash.c +++ b/fs/autofs/dirhash.c @@ -85,13 +85,12 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb, } path.mnt = mnt; path_get(&path); - if (!follow_down(&path.mnt, &path.dentry)) { + if (!follow_down(&path)) { path_put(&path); DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name)); continue; } - while (d_mountpoint(path.dentry) && - follow_down(&path.mnt, &path.dentry)) + while (d_mountpoint(path.dentry) && follow_down(&path)); ; umount_ok = may_umount(path.mnt); path_put(&path); diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index b7ff33c6310..8f7cdde4173 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -223,12 +223,12 @@ int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify); int autofs4_wait_release(struct autofs_sb_info *,autofs_wqt_t,int); void autofs4_catatonic_mode(struct autofs_sb_info *); -static inline int autofs4_follow_mount(struct vfsmount **mnt, struct dentry **dentry) +static inline int autofs4_follow_mount(struct path *path) { int res = 0; - while (d_mountpoint(*dentry)) { - int followed = follow_down(mnt, dentry); + while (d_mountpoint(path->dentry)) { + int followed = follow_down(path); if (!followed) break; res = 1; diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 670407576b2..f3da2eb51f5 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -562,7 +562,7 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp, err = have_submounts(path.dentry); if (path.mnt->mnt_mountpoint != path.mnt->mnt_root) { - if (follow_down(&path.mnt, &path.dentry)) + if (follow_down(&path)) magic = path.mnt->mnt_sb->s_magic; } } diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 3077d8f1652..aa39ae83f01 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -48,19 +48,19 @@ static inline int autofs4_can_expire(struct dentry *dentry, static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) { struct dentry *top = dentry; + struct path path = {.mnt = mnt, .dentry = dentry}; int status = 1; DPRINTK("dentry %p %.*s", dentry, (int)dentry->d_name.len, dentry->d_name.name); - mntget(mnt); - dget(dentry); + path_get(&path); - if (!follow_down(&mnt, &dentry)) + if (!follow_down(&path)) goto done; - if (is_autofs4_dentry(dentry)) { - struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); + if (is_autofs4_dentry(path.dentry)) { + struct autofs_sb_info *sbi = autofs4_sbi(path.dentry->d_sb); /* This is an autofs submount, we can't expire it */ if (autofs_type_indirect(sbi->type)) @@ -70,7 +70,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) * Otherwise it's an offset mount and we need to check * if we can umount its mount, if there is one. */ - if (!d_mountpoint(dentry)) { + if (!d_mountpoint(path.dentry)) { status = 0; goto done; } @@ -86,8 +86,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) status = 0; done: DPRINTK("returning = %d", status); - dput(dentry); - mntput(mnt); + path_put(&path); return status; } diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index e383bf0334f..b96a3c57359 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -181,7 +181,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) nd->flags); /* * For an expire of a covered direct or offset mount we need - * to beeak out of follow_down() at the autofs mount trigger + * to break out of follow_down() at the autofs mount trigger * (d_mounted--), so we can see the expiring flag, and manage * the blocking and following here until the expire is completed. */ @@ -190,7 +190,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) if (ino->flags & AUTOFS_INF_EXPIRING) { spin_unlock(&sbi->fs_lock); /* Follow down to our covering mount. */ - if (!follow_down(&nd->path.mnt, &nd->path.dentry)) + if (!follow_down(&nd->path)) goto done; goto follow; } @@ -230,8 +230,7 @@ follow: * to follow it. */ if (d_mountpoint(dentry)) { - if (!autofs4_follow_mount(&nd->path.mnt, - &nd->path.dentry)) { + if (!autofs4_follow_mount(&nd->path)) { status = -ENOENT; goto out_error; } diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 83d62759c7c..3bb11be8b6a 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -275,7 +275,7 @@ static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd, case -EBUSY: /* someone else made a mount here whilst we were busy */ while (d_mountpoint(nd->path.dentry) && - follow_down(&nd->path.mnt, &nd->path.dentry)) + follow_down(&nd->path)) ; err = 0; default: diff --git a/fs/namei.c b/fs/namei.c index 8c1f48ae68e..4d49a3eee6d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -731,16 +731,16 @@ static void follow_mount(struct vfsmount **mnt, struct dentry **dentry) /* no need for dcache_lock, as serialization is taken care in * namespace.c */ -int follow_down(struct vfsmount **mnt, struct dentry **dentry) +int follow_down(struct path *path) { struct vfsmount *mounted; - mounted = lookup_mnt(*mnt, *dentry); + mounted = lookup_mnt(path->mnt, path->dentry); if (mounted) { - dput(*dentry); - mntput(*mnt); - *mnt = mounted; - *dentry = dget(mounted->mnt_root); + dput(path->dentry); + mntput(path->mnt); + path->mnt = mounted; + path->dentry = dget(mounted->mnt_root); return 1; } return 0; diff --git a/fs/namespace.c b/fs/namespace.c index c85962206aa..ba5237be1cf 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1601,7 +1601,7 @@ static int do_move_mount(struct path *path, char *old_name) down_write(&namespace_sem); while (d_mountpoint(path->dentry) && - follow_down(&path->mnt, &path->dentry)) + follow_down(path)) ; err = -EINVAL; if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt)) @@ -1695,7 +1695,7 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path, down_write(&namespace_sem); /* Something was mounted here while we slept */ while (d_mountpoint(path->dentry) && - follow_down(&path->mnt, &path->dentry)) + follow_down(path)) ; err = -EINVAL; if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt)) diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 64a288ee046..f01caec8446 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -154,7 +154,7 @@ out_err: goto out; out_follow: while (d_mountpoint(nd->path.dentry) && - follow_down(&nd->path.mnt, &nd->path.dentry)) + follow_down(&nd->path)) ; err = 0; goto out; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 7b2b3f77532..99f83575359 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -105,8 +105,7 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, .dentry = dget(dentry)}; int err = 0; - while (follow_down(&path.mnt, &path.dentry) && - d_mountpoint(path.dentry)) + while (d_mountpoint(path.dentry) && follow_down(&path)) ; exp2 = rqst_exp_get_by_name(rqstp, &path); diff --git a/include/linux/namei.h b/include/linux/namei.h index 9cd5a717be3..d870ae2faed 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -78,7 +78,7 @@ extern void release_open_intent(struct nameidata *); extern struct dentry *lookup_one_len(const char *, struct dentry *, int); extern struct dentry *lookup_one_noperm(const char *, struct dentry *); -extern int follow_down(struct vfsmount **, struct dentry **); +extern int follow_down(struct path *); extern int follow_up(struct path *); extern struct dentry *lock_rename(struct dentry *, struct dentry *); -- cgit v1.2.3-70-g09d2 From 1c755af4df75996b0dd4b7e6cacaf9d57a6ef2ef Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 18 Apr 2009 14:06:57 -0400 Subject: switch lookup_mnt() Signed-off-by: Al Viro --- fs/namei.c | 6 +++--- fs/namespace.c | 4 ++-- include/linux/dcache.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/fs/namei.c b/fs/namei.c index c006bc61d1e..527119afb6a 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -702,7 +702,7 @@ static int __follow_mount(struct path *path) { int res = 0; while (d_mountpoint(path->dentry)) { - struct vfsmount *mounted = lookup_mnt(path->mnt, path->dentry); + struct vfsmount *mounted = lookup_mnt(path); if (!mounted) break; dput(path->dentry); @@ -718,7 +718,7 @@ static int __follow_mount(struct path *path) static void follow_mount(struct path *path) { while (d_mountpoint(path->dentry)) { - struct vfsmount *mounted = lookup_mnt(path->mnt, path->dentry); + struct vfsmount *mounted = lookup_mnt(path); if (!mounted) break; dput(path->dentry); @@ -735,7 +735,7 @@ int follow_down(struct path *path) { struct vfsmount *mounted; - mounted = lookup_mnt(path->mnt, path->dentry); + mounted = lookup_mnt(path); if (mounted) { dput(path->dentry); mntput(path->mnt); diff --git a/fs/namespace.c b/fs/namespace.c index ba5237be1cf..b94ad3d685f 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -442,11 +442,11 @@ struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, * lookup_mnt increments the ref count before returning * the vfsmount struct. */ -struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) +struct vfsmount *lookup_mnt(struct path *path) { struct vfsmount *child_mnt; spin_lock(&vfsmount_lock); - if ((child_mnt = __lookup_mnt(mnt, dentry, 1))) + if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1))) mntget(child_mnt); spin_unlock(&vfsmount_lock); return child_mnt; diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 97978004338..72ce2ae8859 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -370,7 +370,7 @@ static inline int d_mountpoint(struct dentry *dentry) return dentry->d_mounted; } -extern struct vfsmount *lookup_mnt(struct vfsmount *, struct dentry *); +extern struct vfsmount *lookup_mnt(struct path *); extern struct dentry *lookup_create(struct nameidata *nd, int is_dir); extern int sysctl_vfs_cache_pressure; -- cgit v1.2.3-70-g09d2 From 3174c21b74b56c6a53fddd41a30fd6f757a32bd0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 7 Apr 2009 13:19:18 -0400 Subject: Move junk from proc_fs.h to fs/proc/internal.h Signed-off-by: Al Viro --- fs/proc/internal.h | 25 +++++++++++++++++++++++++ fs/proc/proc_devtree.c | 1 + include/linux/proc_fs.h | 24 ------------------------ 3 files changed, 26 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/fs/proc/internal.h b/fs/proc/internal.h index f6db9618a88..753ca37002c 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -92,3 +92,28 @@ struct pde_opener { struct list_head lh; }; void pde_users_dec(struct proc_dir_entry *pde); + +extern spinlock_t proc_subdir_lock; + +struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *); +int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); +unsigned long task_vsize(struct mm_struct *); +int task_statm(struct mm_struct *, int *, int *, int *, int *); +void task_mem(struct seq_file *, struct mm_struct *); + +struct proc_dir_entry *de_get(struct proc_dir_entry *de); +void de_put(struct proc_dir_entry *de); + +extern struct vfsmount *proc_mnt; +int proc_fill_super(struct super_block *); +struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *); + +/* + * These are generic /proc routines that use the internal + * "struct proc_dir_entry" tree to traverse the filesystem. + * + * The /proc root directory has extended versions to take care + * of the /proc/ subdirectories. + */ +int proc_readdir(struct file *, void *, filldir_t); +struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *); diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c index de2bba5a344..fc6c3025bef 100644 --- a/fs/proc/proc_devtree.c +++ b/fs/proc/proc_devtree.c @@ -11,6 +11,7 @@ #include #include #include +#include "internal.h" #ifndef HAVE_ARCH_DEVTREE_FIXUPS static inline void set_node_proc_entry(struct device_node *np, diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index fbfa3d44d33..e6e77d31c41 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -93,20 +93,9 @@ struct vmcore { #ifdef CONFIG_PROC_FS -extern spinlock_t proc_subdir_lock; - extern void proc_root_init(void); void proc_flush_task(struct task_struct *task); -struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *); -int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); -unsigned long task_vsize(struct mm_struct *); -int task_statm(struct mm_struct *, int *, int *, int *, int *); -void task_mem(struct seq_file *, struct mm_struct *); -void clear_refs_smap(struct mm_struct *mm); - -struct proc_dir_entry *de_get(struct proc_dir_entry *de); -void de_put(struct proc_dir_entry *de); extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, struct proc_dir_entry *parent); @@ -116,20 +105,7 @@ struct proc_dir_entry *proc_create_data(const char *name, mode_t mode, void *data); extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent); -extern struct vfsmount *proc_mnt; struct pid_namespace; -extern int proc_fill_super(struct super_block *); -extern struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *); - -/* - * These are generic /proc routines that use the internal - * "struct proc_dir_entry" tree to traverse the filesystem. - * - * The /proc root directory has extended versions to take care - * of the /proc/ subdirectories. - */ -extern int proc_readdir(struct file *, void *, filldir_t); -extern struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *); extern int pid_ns_prepare_proc(struct pid_namespace *ns); extern void pid_ns_release_proc(struct pid_namespace *ns); -- cgit v1.2.3-70-g09d2 From d3ef3d7351ccfbef3e5d926efc5ee332136f40d4 Mon Sep 17 00:00:00 2001 From: "npiggin@suse.de" Date: Sun, 26 Apr 2009 20:25:54 +1000 Subject: fs: mnt_want_write speedup This patch speeds up lmbench lat_mmap test by about 8%. lat_mmap is set up basically to mmap a 64MB file on tmpfs, fault in its pages, then unmap it. A microbenchmark yes, but it exercises some important paths in the mm. Before: avg = 501.9 std = 14.7773 After: avg = 462.286 std = 5.46106 (50 runs of each, stddev gives a reasonable confidence, but there is quite a bit of variation there still) It does this by removing the complex per-cpu locking and counter-cache and replaces it with a percpu counter in struct vfsmount. This makes the code much simpler, and avoids spinlocks (although the msync is still pretty costly, unfortunately). It results in about 900 bytes smaller code too. It does increase the size of a vfsmount, however. It should also give a speedup on large systems if CPUs are frequently operating on different mounts (because the existing scheme has to operate on an atomic in the struct vfsmount when switching between mounts). But I'm most interested in the single threaded path performance for the moment. [AV: minor cleanup] Cc: Dave Hansen Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- fs/namespace.c | 268 +++++++++++++++++--------------------------------- include/linux/mount.h | 21 ++-- 2 files changed, 106 insertions(+), 183 deletions(-) (limited to 'include') diff --git a/fs/namespace.c b/fs/namespace.c index b94ad3d685f..22ae06ad751 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -131,10 +131,20 @@ struct vfsmount *alloc_vfsmnt(const char *name) INIT_LIST_HEAD(&mnt->mnt_share); INIT_LIST_HEAD(&mnt->mnt_slave_list); INIT_LIST_HEAD(&mnt->mnt_slave); - atomic_set(&mnt->__mnt_writers, 0); +#ifdef CONFIG_SMP + mnt->mnt_writers = alloc_percpu(int); + if (!mnt->mnt_writers) + goto out_free_devname; +#else + mnt->mnt_writers = 0; +#endif } return mnt; +#ifdef CONFIG_SMP +out_free_devname: + kfree(mnt->mnt_devname); +#endif out_free_id: mnt_free_id(mnt); out_free_cache: @@ -171,65 +181,38 @@ int __mnt_is_readonly(struct vfsmount *mnt) } EXPORT_SYMBOL_GPL(__mnt_is_readonly); -struct mnt_writer { - /* - * If holding multiple instances of this lock, they - * must be ordered by cpu number. - */ - spinlock_t lock; - struct lock_class_key lock_class; /* compiles out with !lockdep */ - unsigned long count; - struct vfsmount *mnt; -} ____cacheline_aligned_in_smp; -static DEFINE_PER_CPU(struct mnt_writer, mnt_writers); +static inline void inc_mnt_writers(struct vfsmount *mnt) +{ +#ifdef CONFIG_SMP + (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))++; +#else + mnt->mnt_writers++; +#endif +} -static int __init init_mnt_writers(void) +static inline void dec_mnt_writers(struct vfsmount *mnt) { - int cpu; - for_each_possible_cpu(cpu) { - struct mnt_writer *writer = &per_cpu(mnt_writers, cpu); - spin_lock_init(&writer->lock); - lockdep_set_class(&writer->lock, &writer->lock_class); - writer->count = 0; - } - return 0; +#ifdef CONFIG_SMP + (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))--; +#else + mnt->mnt_writers--; +#endif } -fs_initcall(init_mnt_writers); -static void unlock_mnt_writers(void) +static unsigned int count_mnt_writers(struct vfsmount *mnt) { +#ifdef CONFIG_SMP + unsigned int count = 0; int cpu; - struct mnt_writer *cpu_writer; for_each_possible_cpu(cpu) { - cpu_writer = &per_cpu(mnt_writers, cpu); - spin_unlock(&cpu_writer->lock); + count += *per_cpu_ptr(mnt->mnt_writers, cpu); } -} -static inline void __clear_mnt_count(struct mnt_writer *cpu_writer) -{ - if (!cpu_writer->mnt) - return; - /* - * This is in case anyone ever leaves an invalid, - * old ->mnt and a count of 0. - */ - if (!cpu_writer->count) - return; - atomic_add(cpu_writer->count, &cpu_writer->mnt->__mnt_writers); - cpu_writer->count = 0; -} - /* - * must hold cpu_writer->lock - */ -static inline void use_cpu_writer_for_mount(struct mnt_writer *cpu_writer, - struct vfsmount *mnt) -{ - if (cpu_writer->mnt == mnt) - return; - __clear_mnt_count(cpu_writer); - cpu_writer->mnt = mnt; + return count; +#else + return mnt->mnt_writers; +#endif } /* @@ -253,75 +236,34 @@ static inline void use_cpu_writer_for_mount(struct mnt_writer *cpu_writer, int mnt_want_write(struct vfsmount *mnt) { int ret = 0; - struct mnt_writer *cpu_writer; - cpu_writer = &get_cpu_var(mnt_writers); - spin_lock(&cpu_writer->lock); + preempt_disable(); + inc_mnt_writers(mnt); + /* + * The store to inc_mnt_writers must be visible before we pass + * MNT_WRITE_HOLD loop below, so that the slowpath can see our + * incremented count after it has set MNT_WRITE_HOLD. + */ + smp_mb(); + while (mnt->mnt_flags & MNT_WRITE_HOLD) + cpu_relax(); + /* + * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will + * be set to match its requirements. So we must not load that until + * MNT_WRITE_HOLD is cleared. + */ + smp_rmb(); if (__mnt_is_readonly(mnt)) { + dec_mnt_writers(mnt); ret = -EROFS; goto out; } - use_cpu_writer_for_mount(cpu_writer, mnt); - cpu_writer->count++; out: - spin_unlock(&cpu_writer->lock); - put_cpu_var(mnt_writers); + preempt_enable(); return ret; } EXPORT_SYMBOL_GPL(mnt_want_write); -static void lock_mnt_writers(void) -{ - int cpu; - struct mnt_writer *cpu_writer; - - for_each_possible_cpu(cpu) { - cpu_writer = &per_cpu(mnt_writers, cpu); - spin_lock(&cpu_writer->lock); - __clear_mnt_count(cpu_writer); - cpu_writer->mnt = NULL; - } -} - -/* - * These per-cpu write counts are not guaranteed to have - * matched increments and decrements on any given cpu. - * A file open()ed for write on one cpu and close()d on - * another cpu will imbalance this count. Make sure it - * does not get too far out of whack. - */ -static void handle_write_count_underflow(struct vfsmount *mnt) -{ - if (atomic_read(&mnt->__mnt_writers) >= - MNT_WRITER_UNDERFLOW_LIMIT) - return; - /* - * It isn't necessary to hold all of the locks - * at the same time, but doing it this way makes - * us share a lot more code. - */ - lock_mnt_writers(); - /* - * vfsmount_lock is for mnt_flags. - */ - spin_lock(&vfsmount_lock); - /* - * If coalescing the per-cpu writer counts did not - * get us back to a positive writer count, we have - * a bug. - */ - if ((atomic_read(&mnt->__mnt_writers) < 0) && - !(mnt->mnt_flags & MNT_IMBALANCED_WRITE_COUNT)) { - WARN(1, KERN_DEBUG "leak detected on mount(%p) writers " - "count: %d\n", - mnt, atomic_read(&mnt->__mnt_writers)); - /* use the flag to keep the dmesg spam down */ - mnt->mnt_flags |= MNT_IMBALANCED_WRITE_COUNT; - } - spin_unlock(&vfsmount_lock); - unlock_mnt_writers(); -} - /** * mnt_drop_write - give up write access to a mount * @mnt: the mount on which to give up write access @@ -332,37 +274,9 @@ static void handle_write_count_underflow(struct vfsmount *mnt) */ void mnt_drop_write(struct vfsmount *mnt) { - int must_check_underflow = 0; - struct mnt_writer *cpu_writer; - - cpu_writer = &get_cpu_var(mnt_writers); - spin_lock(&cpu_writer->lock); - - use_cpu_writer_for_mount(cpu_writer, mnt); - if (cpu_writer->count > 0) { - cpu_writer->count--; - } else { - must_check_underflow = 1; - atomic_dec(&mnt->__mnt_writers); - } - - spin_unlock(&cpu_writer->lock); - /* - * Logically, we could call this each time, - * but the __mnt_writers cacheline tends to - * be cold, and makes this expensive. - */ - if (must_check_underflow) - handle_write_count_underflow(mnt); - /* - * This could be done right after the spinlock - * is taken because the spinlock keeps us on - * the cpu, and disables preemption. However, - * putting it here bounds the amount that - * __mnt_writers can underflow. Without it, - * we could theoretically wrap __mnt_writers. - */ - put_cpu_var(mnt_writers); + preempt_disable(); + dec_mnt_writers(mnt); + preempt_enable(); } EXPORT_SYMBOL_GPL(mnt_drop_write); @@ -370,24 +284,41 @@ static int mnt_make_readonly(struct vfsmount *mnt) { int ret = 0; - lock_mnt_writers(); + spin_lock(&vfsmount_lock); + mnt->mnt_flags |= MNT_WRITE_HOLD; /* - * With all the locks held, this value is stable + * After storing MNT_WRITE_HOLD, we'll read the counters. This store + * should be visible before we do. */ - if (atomic_read(&mnt->__mnt_writers) > 0) { - ret = -EBUSY; - goto out; - } + smp_mb(); + /* - * nobody can do a successful mnt_want_write() with all - * of the counts in MNT_DENIED_WRITE and the locks held. + * With writers on hold, if this value is zero, then there are + * definitely no active writers (although held writers may subsequently + * increment the count, they'll have to wait, and decrement it after + * seeing MNT_READONLY). + * + * It is OK to have counter incremented on one CPU and decremented on + * another: the sum will add up correctly. The danger would be when we + * sum up each counter, if we read a counter before it is incremented, + * but then read another CPU's count which it has been subsequently + * decremented from -- we would see more decrements than we should. + * MNT_WRITE_HOLD protects against this scenario, because + * mnt_want_write first increments count, then smp_mb, then spins on + * MNT_WRITE_HOLD, so it can't be decremented by another CPU while + * we're counting up here. */ - spin_lock(&vfsmount_lock); - if (!ret) + if (count_mnt_writers(mnt) > 0) + ret = -EBUSY; + else mnt->mnt_flags |= MNT_READONLY; + /* + * MNT_READONLY must become visible before ~MNT_WRITE_HOLD, so writers + * that become unheld will see MNT_READONLY. + */ + smp_wmb(); + mnt->mnt_flags &= ~MNT_WRITE_HOLD; spin_unlock(&vfsmount_lock); -out: - unlock_mnt_writers(); return ret; } @@ -410,6 +341,9 @@ void free_vfsmnt(struct vfsmount *mnt) { kfree(mnt->mnt_devname); mnt_free_id(mnt); +#ifdef CONFIG_SMP + free_percpu(mnt->mnt_writers); +#endif kmem_cache_free(mnt_cache, mnt); } @@ -604,38 +538,18 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root, static inline void __mntput(struct vfsmount *mnt) { - int cpu; struct super_block *sb = mnt->mnt_sb; - /* - * We don't have to hold all of the locks at the - * same time here because we know that we're the - * last reference to mnt and that no new writers - * can come in. - */ - for_each_possible_cpu(cpu) { - struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu); - spin_lock(&cpu_writer->lock); - if (cpu_writer->mnt != mnt) { - spin_unlock(&cpu_writer->lock); - continue; - } - atomic_add(cpu_writer->count, &mnt->__mnt_writers); - cpu_writer->count = 0; - /* - * Might as well do this so that no one - * ever sees the pointer and expects - * it to be valid. - */ - cpu_writer->mnt = NULL; - spin_unlock(&cpu_writer->lock); - } /* * This probably indicates that somebody messed * up a mnt_want/drop_write() pair. If this * happens, the filesystem was probably unable * to make r/w->r/o transitions. */ - WARN_ON(atomic_read(&mnt->__mnt_writers)); + /* + * atomic_dec_and_lock() used to deal with ->mnt_count decrements + * provides barriers, so count_mnt_writers() below is safe. AV + */ + WARN_ON(count_mnt_writers(mnt)); dput(mnt->mnt_root); free_vfsmnt(mnt); deactivate_super(sb); diff --git a/include/linux/mount.h b/include/linux/mount.h index 51f55f903af..ac49c1f8e5c 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -30,7 +30,7 @@ struct mnt_namespace; #define MNT_STRICTATIME 0x80 #define MNT_SHRINKABLE 0x100 -#define MNT_IMBALANCED_WRITE_COUNT 0x200 /* just for debugging */ +#define MNT_WRITE_HOLD 0x200 #define MNT_SHARED 0x1000 /* if the vfsmount is a shared mount */ #define MNT_UNBINDABLE 0x2000 /* if the vfsmount is a unbindable mount */ @@ -65,13 +65,22 @@ struct vfsmount { int mnt_expiry_mark; /* true if marked for expiry */ int mnt_pinned; int mnt_ghosts; - /* - * This value is not stable unless all of the mnt_writers[] spinlocks - * are held, and all mnt_writer[]s on this mount have 0 as their ->count - */ - atomic_t __mnt_writers; +#ifdef CONFIG_SMP + int *mnt_writers; +#else + int mnt_writers; +#endif }; +static inline int *get_mnt_writers_ptr(struct vfsmount *mnt) +{ +#ifdef CONFIG_SMP + return mnt->mnt_writers; +#else + return &mnt->mnt_writers; +#endif +} + static inline struct vfsmount *mntget(struct vfsmount *mnt) { if (mnt) -- cgit v1.2.3-70-g09d2 From 96029c4e09ccbd73a6d0ed2b29e80bf2586ad7ef Mon Sep 17 00:00:00 2001 From: "npiggin@suse.de" Date: Sun, 26 Apr 2009 20:25:55 +1000 Subject: fs: introduce mnt_clone_write This patch speeds up lmbench lat_mmap test by about another 2% after the first patch. Before: avg = 462.286 std = 5.46106 After: avg = 453.12 std = 9.58257 (50 runs of each, stddev gives a reasonable confidence) It does this by introducing mnt_clone_write, which avoids some heavyweight operations of mnt_want_write if called on a vfsmount which we know already has a write count; and mnt_want_write_file, which can call mnt_clone_write if the file is open for write. After these two patches, mnt_want_write and mnt_drop_write go from 7% on the profile down to 1.3% (including mnt_clone_write). [AV: mnt_want_write_file() should take file alone and derive mnt from it; not only all callers have that form, but that's the only mnt about which we know that it's already held for write if file is opened for write] Cc: Dave Hansen Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- fs/file_table.c | 2 +- fs/inode.c | 2 +- fs/namespace.c | 40 ++++++++++++++++++++++++++++++++++++++++ fs/open.c | 4 ++-- fs/xattr.c | 4 ++-- include/linux/mount.h | 4 ++++ 6 files changed, 50 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/fs/file_table.c b/fs/file_table.c index 54018fe4884..3d66dbcebef 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -214,7 +214,7 @@ int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry, */ if ((mode & FMODE_WRITE) && !special_file(dentry->d_inode->i_mode)) { file_take_write(file); - error = mnt_want_write(mnt); + error = mnt_clone_write(mnt); WARN_ON(error); } return error; diff --git a/fs/inode.c b/fs/inode.c index ca337014ae2..a88baebf77c 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1422,7 +1422,7 @@ void file_update_time(struct file *file) if (IS_NOCMTIME(inode)) return; - err = mnt_want_write(file->f_path.mnt); + err = mnt_want_write_file(file); if (err) return; diff --git a/fs/namespace.c b/fs/namespace.c index 22ae06ad751..120b8a6b99e 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -264,6 +264,46 @@ out: } EXPORT_SYMBOL_GPL(mnt_want_write); +/** + * mnt_clone_write - get write access to a mount + * @mnt: the mount on which to take a write + * + * This is effectively like mnt_want_write, except + * it must only be used to take an extra write reference + * on a mountpoint that we already know has a write reference + * on it. This allows some optimisation. + * + * After finished, mnt_drop_write must be called as usual to + * drop the reference. + */ +int mnt_clone_write(struct vfsmount *mnt) +{ + /* superblock may be r/o */ + if (__mnt_is_readonly(mnt)) + return -EROFS; + preempt_disable(); + inc_mnt_writers(mnt); + preempt_enable(); + return 0; +} +EXPORT_SYMBOL_GPL(mnt_clone_write); + +/** + * mnt_want_write_file - get write access to a file's mount + * @file: the file who's mount on which to take a write + * + * This is like mnt_want_write, but it takes a file and can + * do some optimisations if the file is open for write already + */ +int mnt_want_write_file(struct file *file) +{ + if (!(file->f_mode & FMODE_WRITE)) + return mnt_want_write(file->f_path.mnt); + else + return mnt_clone_write(file->f_path.mnt); +} +EXPORT_SYMBOL_GPL(mnt_want_write_file); + /** * mnt_drop_write - give up write access to a mount * @mnt: the mount on which to give up write access diff --git a/fs/open.c b/fs/open.c index bdfbf03615a..7200e23d925 100644 --- a/fs/open.c +++ b/fs/open.c @@ -612,7 +612,7 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode) audit_inode(NULL, dentry); - err = mnt_want_write(file->f_path.mnt); + err = mnt_want_write_file(file); if (err) goto out_putf; mutex_lock(&inode->i_mutex); @@ -761,7 +761,7 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) if (!file) goto out; - error = mnt_want_write(file->f_path.mnt); + error = mnt_want_write_file(file); if (error) goto out_fput; dentry = file->f_path.dentry; diff --git a/fs/xattr.c b/fs/xattr.c index d51b8f9db92..1c3d0af59dd 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -297,7 +297,7 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, return error; dentry = f->f_path.dentry; audit_inode(NULL, dentry); - error = mnt_want_write(f->f_path.mnt); + error = mnt_want_write_file(f); if (!error) { error = setxattr(dentry, name, value, size, flags); mnt_drop_write(f->f_path.mnt); @@ -524,7 +524,7 @@ SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name) return error; dentry = f->f_path.dentry; audit_inode(NULL, dentry); - error = mnt_want_write(f->f_path.mnt); + error = mnt_want_write_file(f); if (!error) { error = removexattr(dentry, name); mnt_drop_write(f->f_path.mnt); diff --git a/include/linux/mount.h b/include/linux/mount.h index ac49c1f8e5c..5d527536486 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -88,7 +88,11 @@ static inline struct vfsmount *mntget(struct vfsmount *mnt) return mnt; } +struct file; /* forward dec */ + extern int mnt_want_write(struct vfsmount *mnt); +extern int mnt_want_write_file(struct file *file); +extern int mnt_clone_write(struct vfsmount *mnt); extern void mnt_drop_write(struct vfsmount *mnt); extern void mntput_no_expire(struct vfsmount *mnt); extern void mnt_pin(struct vfsmount *mnt); -- cgit v1.2.3-70-g09d2 From 876a9f76abbcb775f8d21cbc99fa161f9e5937f1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 28 Apr 2009 18:05:55 +0200 Subject: remove s_async_list Remove the unused s_async_list in the superblock, a leftover of the broken async inode deletion code that leaked into mainline. Having this in the middle of the sync/unmount path is not helpful for the following cleanups. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/super.c | 8 -------- include/linux/fs.h | 5 ----- 2 files changed, 13 deletions(-) (limited to 'include') diff --git a/fs/super.c b/fs/super.c index c170551c23f..3d9f117dd2a 100644 --- a/fs/super.c +++ b/fs/super.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include "internal.h" @@ -72,7 +71,6 @@ static struct super_block *alloc_super(struct file_system_type *type) INIT_HLIST_HEAD(&s->s_anon); INIT_LIST_HEAD(&s->s_inodes); INIT_LIST_HEAD(&s->s_dentry_lru); - INIT_LIST_HEAD(&s->s_async_list); init_rwsem(&s->s_umount); mutex_init(&s->s_lock); lockdep_set_class(&s->s_umount, &type->s_umount_key); @@ -342,11 +340,6 @@ void generic_shutdown_super(struct super_block *sb) lock_super(sb); sb->s_flags &= ~MS_ACTIVE; - /* - * wait for asynchronous fs operations to finish before going further - */ - async_synchronize_full_domain(&sb->s_async_list); - /* bad name - it should be evict_inodes() */ invalidate_inodes(sb); lock_kernel(); @@ -517,7 +510,6 @@ restart: sb->s_count++; spin_unlock(&sb_lock); down_read(&sb->s_umount); - async_synchronize_full_domain(&sb->s_async_list); if (sb->s_root && (wait || sb->s_dirt)) sb->s_op->sync_fs(sb, wait); up_read(&sb->s_umount); diff --git a/include/linux/fs.h b/include/linux/fs.h index 03fb2102b8f..36bcff7036e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1372,11 +1372,6 @@ struct super_block { * generic_show_options() */ char *s_options; - - /* - * storage for asynchronous operations - */ - struct list_head s_async_list; }; extern struct timespec current_fs_time(struct super_block *sb); -- cgit v1.2.3-70-g09d2 From 429479f031322a0cc5c921ffb2321a51718dc875 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 27 Apr 2009 16:43:50 +0200 Subject: vfs: Make __fsync_super() a static function (version 4) __fsync_super() does the same thing as fsync_super(). So change the only caller to use fsync_super() and make __fsync_super() static. This removes unnecessarily duplicated call to sync_blockdev() and prepares ground for the changes to __fsync_super() in the following patches. Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/block_dev.c | 2 +- fs/super.c | 7 +++---- include/linux/fs.h | 1 - 3 files changed, 4 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/fs/block_dev.c b/fs/block_dev.c index 931f6b8c4b2..fe47f722761 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -241,7 +241,7 @@ struct super_block *freeze_bdev(struct block_device *bdev) sb->s_frozen = SB_FREEZE_WRITE; smp_wmb(); - __fsync_super(sb); + fsync_super(sb); sb->s_frozen = SB_FREEZE_TRANS; smp_wmb(); diff --git a/fs/super.c b/fs/super.c index fae91ba38e4..8dbe1ead9dd 100644 --- a/fs/super.c +++ b/fs/super.c @@ -289,7 +289,7 @@ EXPORT_SYMBOL(unlock_super); * device. Takes the superblock lock. Requires a second blkdev * flush by the caller to complete the operation. */ -void __fsync_super(struct super_block *sb) +static int __fsync_super(struct super_block *sb) { sync_inodes_sb(sb, 0); vfs_dq_sync(sb); @@ -300,7 +300,7 @@ void __fsync_super(struct super_block *sb) unlock_super(sb); if (sb->s_op->sync_fs) sb->s_op->sync_fs(sb, 1); - sync_blockdev(sb->s_bdev); + return sync_blockdev(sb->s_bdev); } /* @@ -310,8 +310,7 @@ void __fsync_super(struct super_block *sb) */ int fsync_super(struct super_block *sb) { - __fsync_super(sb); - return sync_blockdev(sb->s_bdev); + return __fsync_super(sb); } EXPORT_SYMBOL_GPL(fsync_super); diff --git a/include/linux/fs.h b/include/linux/fs.h index 36bcff7036e..41a9907f342 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2078,7 +2078,6 @@ extern int filemap_fdatawrite_range(struct address_space *mapping, extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync); extern void sync_supers(void); extern void sync_filesystems(int wait); -extern void __fsync_super(struct super_block *sb); extern void emergency_sync(void); extern void emergency_remount(void); extern int do_remount_sb(struct super_block *sb, int flags, -- cgit v1.2.3-70-g09d2 From 5cee5815d1564bbbd505fea86f4550f1efdb5cd0 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 27 Apr 2009 16:43:51 +0200 Subject: vfs: Make sys_sync() use fsync_super() (version 4) It is unnecessarily fragile to have two places (fsync_super() and do_sync()) doing data integrity sync of the filesystem. Alter __fsync_super() to accommodate needs of both callers and use it. So after this patch __fsync_super() is the only place where we gather all the calls needed to properly send all data on a filesystem to disk. Nice bonus is that we get a complete livelock avoidance and write_supers() is now only used for periodic writeback of superblocks. sync_blockdevs() introduced a couple of patches ago is gone now. [build fixes folded] Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/block_dev.c | 15 ++++++---- fs/fs-writeback.c | 49 -------------------------------- fs/internal.h | 16 +++++------ fs/super.c | 72 +++++++++++++++-------------------------------- fs/sync.c | 31 +++++++------------- include/linux/fs.h | 2 +- include/linux/writeback.h | 1 - 7 files changed, 51 insertions(+), 135 deletions(-) (limited to 'include') diff --git a/fs/block_dev.c b/fs/block_dev.c index fe47f722761..4b6a3b9d01e 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -176,17 +176,22 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, iov, offset, nr_segs, blkdev_get_blocks, NULL); } +int __sync_blockdev(struct block_device *bdev, int wait) +{ + if (!bdev) + return 0; + if (!wait) + return filemap_flush(bdev->bd_inode->i_mapping); + return filemap_write_and_wait(bdev->bd_inode->i_mapping); +} + /* * Write out and wait upon all the dirty data associated with a block * device via its mapping. Does not take the superblock lock. */ int sync_blockdev(struct block_device *bdev) { - int ret = 0; - - if (bdev) - ret = filemap_write_and_wait(bdev->bd_inode->i_mapping); - return ret; + return __sync_blockdev(bdev, 1); } EXPORT_SYMBOL(sync_blockdev); diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 91013ff7dd5..e0fb2e78959 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -678,55 +678,6 @@ void sync_inodes_sb(struct super_block *sb, int wait) sync_sb_inodes(sb, &wbc); } -/** - * sync_inodes - writes all inodes to disk - * @wait: wait for completion - * - * sync_inodes() goes through each super block's dirty inode list, writes the - * inodes out, waits on the writeout and puts the inodes back on the normal - * list. - * - * This is for sys_sync(). fsync_dev() uses the same algorithm. The subtle - * part of the sync functions is that the blockdev "superblock" is processed - * last. This is because the write_inode() function of a typical fs will - * perform no I/O, but will mark buffers in the blockdev mapping as dirty. - * What we want to do is to perform all that dirtying first, and then write - * back all those inode blocks via the blockdev mapping in one sweep. So the - * additional (somewhat redundant) sync_blockdev() calls here are to make - * sure that really happens. Because if we call sync_inodes_sb(wait=1) with - * outstanding dirty inodes, the writeback goes block-at-a-time within the - * filesystem's write_inode(). This is extremely slow. - */ -static void __sync_inodes(int wait) -{ - struct super_block *sb; - - spin_lock(&sb_lock); -restart: - list_for_each_entry(sb, &super_blocks, s_list) { - sb->s_count++; - spin_unlock(&sb_lock); - down_read(&sb->s_umount); - if (sb->s_root) { - sync_inodes_sb(sb, wait); - sync_blockdev(sb->s_bdev); - } - up_read(&sb->s_umount); - spin_lock(&sb_lock); - if (__put_super_and_need_restart(sb)) - goto restart; - } - spin_unlock(&sb_lock); -} - -void sync_inodes(int wait) -{ - __sync_inodes(0); - - if (wait) - __sync_inodes(1); -} - /** * write_inode_now - write an inode to disk * @inode: inode to write to disk diff --git a/fs/internal.h b/fs/internal.h index 343a537ab80..dbec3cc2833 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -25,6 +25,8 @@ static inline int sb_is_blkdev_sb(struct super_block *sb) return sb == blockdev_superblock; } +extern int __sync_blockdev(struct block_device *bdev, int wait); + #else static inline void bdev_cache_init(void) { @@ -34,6 +36,11 @@ static inline int sb_is_blkdev_sb(struct super_block *sb) { return 0; } + +static inline int __sync_blockdev(struct block_device *bdev, int wait) +{ + return 0; +} #endif /* @@ -71,12 +78,3 @@ extern void chroot_fs_refs(struct path *, struct path *); * file_table.c */ extern void mark_files_ro(struct super_block *); - -/* - * super.c - */ -#ifdef CONFIG_BLOCK -extern void sync_blockdevs(void); -#else -static inline void sync_blockdevs(void) { } -#endif diff --git a/fs/super.c b/fs/super.c index 8dbe1ead9dd..c8ce5ed0424 100644 --- a/fs/super.c +++ b/fs/super.c @@ -284,23 +284,23 @@ EXPORT_SYMBOL(lock_super); EXPORT_SYMBOL(unlock_super); /* - * Write out and wait upon all dirty data associated with this - * superblock. Filesystem data as well as the underlying block - * device. Takes the superblock lock. Requires a second blkdev - * flush by the caller to complete the operation. + * Do the filesystem syncing work. For simple filesystems sync_inodes_sb(sb, 0) + * just dirties buffers with inodes so we have to submit IO for these buffers + * via __sync_blockdev(). This also speeds up the wait == 1 case since in that + * case write_inode() functions do sync_dirty_buffer() and thus effectively + * write one block at a time. */ -static int __fsync_super(struct super_block *sb) +static int __fsync_super(struct super_block *sb, int wait) { - sync_inodes_sb(sb, 0); vfs_dq_sync(sb); - sync_inodes_sb(sb, 1); + sync_inodes_sb(sb, wait); lock_super(sb); if (sb->s_dirt && sb->s_op->write_super) sb->s_op->write_super(sb); unlock_super(sb); if (sb->s_op->sync_fs) - sb->s_op->sync_fs(sb, 1); - return sync_blockdev(sb->s_bdev); + sb->s_op->sync_fs(sb, wait); + return __sync_blockdev(sb->s_bdev, wait); } /* @@ -310,7 +310,12 @@ static int __fsync_super(struct super_block *sb) */ int fsync_super(struct super_block *sb) { - return __fsync_super(sb); + int ret; + + ret = __fsync_super(sb, 0); + if (ret < 0) + return ret; + return __fsync_super(sb, 1); } EXPORT_SYMBOL_GPL(fsync_super); @@ -469,20 +474,18 @@ restart: } /* - * Call the ->sync_fs super_op against all filesystems which are r/w and - * which implement it. + * Sync all the data for all the filesystems (called by sys_sync() and + * emergency sync) * * This operation is careful to avoid the livelock which could easily happen - * if two or more filesystems are being continuously dirtied. s_need_sync_fs + * if two or more filesystems are being continuously dirtied. s_need_sync * is used only here. We set it against all filesystems and then clear it as * we sync them. So redirtied filesystems are skipped. * * But if process A is currently running sync_filesystems and then process B - * calls sync_filesystems as well, process B will set all the s_need_sync_fs + * calls sync_filesystems as well, process B will set all the s_need_sync * flags again, which will cause process A to resync everything. Fix that with * a local mutex. - * - * (Fabian) Avoid sync_fs with clean fs & wait mode 0 */ void sync_filesystems(int wait) { @@ -492,25 +495,23 @@ void sync_filesystems(int wait) mutex_lock(&mutex); /* Could be down_interruptible */ spin_lock(&sb_lock); list_for_each_entry(sb, &super_blocks, s_list) { - if (!sb->s_op->sync_fs) - continue; if (sb->s_flags & MS_RDONLY) continue; - sb->s_need_sync_fs = 1; + sb->s_need_sync = 1; } restart: list_for_each_entry(sb, &super_blocks, s_list) { - if (!sb->s_need_sync_fs) + if (!sb->s_need_sync) continue; - sb->s_need_sync_fs = 0; + sb->s_need_sync = 0; if (sb->s_flags & MS_RDONLY) continue; /* hm. Was remounted r/o meanwhile */ sb->s_count++; spin_unlock(&sb_lock); down_read(&sb->s_umount); if (sb->s_root) - sb->s_op->sync_fs(sb, wait); + __fsync_super(sb, wait); up_read(&sb->s_umount); /* restart only when sb is no longer on the list */ spin_lock(&sb_lock); @@ -521,33 +522,6 @@ restart: mutex_unlock(&mutex); } -#ifdef CONFIG_BLOCK -/* - * Sync all block devices underlying some superblock - */ -void sync_blockdevs(void) -{ - struct super_block *sb; - - spin_lock(&sb_lock); -restart: - list_for_each_entry(sb, &super_blocks, s_list) { - if (!sb->s_bdev) - continue; - sb->s_count++; - spin_unlock(&sb_lock); - down_read(&sb->s_umount); - if (sb->s_root) - sync_blockdev(sb->s_bdev); - up_read(&sb->s_umount); - spin_lock(&sb_lock); - if (__put_super_and_need_restart(sb)) - goto restart; - } - spin_unlock(&sb_lock); -} -#endif - /** * get_super - get the superblock of a device * @bdev: device to get the superblock for diff --git a/fs/sync.c b/fs/sync.c index 631fd5aece7..be0798cc33d 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -18,35 +18,24 @@ #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \ SYNC_FILE_RANGE_WAIT_AFTER) -/* - * sync everything. Start out by waking pdflush, because that writes back - * all queues in parallel. - */ -static void do_sync(unsigned long wait) +SYSCALL_DEFINE0(sync) { - wakeup_pdflush(0); - sync_inodes(0); /* All mappings, inodes and their blockdevs */ - vfs_dq_sync(NULL); - sync_inodes(wait); /* Mappings, inodes and blockdevs, again. */ - sync_supers(); /* Write the superblocks */ - sync_filesystems(0); /* Start syncing the filesystems */ - sync_filesystems(wait); /* Waitingly sync the filesystems */ - sync_blockdevs(); - if (!wait) - printk("Emergency Sync complete\n"); + sync_filesystems(0); + sync_filesystems(1); if (unlikely(laptop_mode)) laptop_sync_completion(); -} - -SYSCALL_DEFINE0(sync) -{ - do_sync(1); return 0; } static void do_sync_work(struct work_struct *work) { - do_sync(0); + /* + * Sync twice to reduce the possibility we skipped some inodes / pages + * because they were temporarily locked + */ + sync_filesystems(0); + sync_filesystems(0); + printk("Emergency Sync complete\n"); kfree(work); } diff --git a/include/linux/fs.h b/include/linux/fs.h index 41a9907f342..f00df653cf2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1321,7 +1321,7 @@ struct super_block { struct rw_semaphore s_umount; struct mutex s_lock; int s_count; - int s_need_sync_fs; + int s_need_sync; atomic_t s_active; #ifdef CONFIG_SECURITY void *s_security; diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 93445477f86..3224820c851 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -79,7 +79,6 @@ struct writeback_control { void writeback_inodes(struct writeback_control *wbc); int inode_wait(void *); void sync_inodes_sb(struct super_block *, int wait); -void sync_inodes(int wait); /* writeback.h requires fs.h; it, too, is not included from here. */ static inline void wait_on_inode(struct inode *inode) -- cgit v1.2.3-70-g09d2 From c15c54f5f056ee4819da9fde59a5f2cd45445f23 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 27 Apr 2009 16:43:52 +0200 Subject: vfs: Move syncing code from super.c to sync.c (version 4) Move sync_filesystems(), __fsync_super(), fsync_super() from super.c to sync.c where it fits better. [build fixes folded] Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/super.c | 85 ------------------------------------------------------ fs/sync.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/fs.h | 3 +- 3 files changed, 86 insertions(+), 87 deletions(-) (limited to 'include') diff --git a/fs/super.c b/fs/super.c index c8ce5ed0424..f822c74f25b 100644 --- a/fs/super.c +++ b/fs/super.c @@ -283,42 +283,6 @@ void unlock_super(struct super_block * sb) EXPORT_SYMBOL(lock_super); EXPORT_SYMBOL(unlock_super); -/* - * Do the filesystem syncing work. For simple filesystems sync_inodes_sb(sb, 0) - * just dirties buffers with inodes so we have to submit IO for these buffers - * via __sync_blockdev(). This also speeds up the wait == 1 case since in that - * case write_inode() functions do sync_dirty_buffer() and thus effectively - * write one block at a time. - */ -static int __fsync_super(struct super_block *sb, int wait) -{ - vfs_dq_sync(sb); - sync_inodes_sb(sb, wait); - lock_super(sb); - if (sb->s_dirt && sb->s_op->write_super) - sb->s_op->write_super(sb); - unlock_super(sb); - if (sb->s_op->sync_fs) - sb->s_op->sync_fs(sb, wait); - return __sync_blockdev(sb->s_bdev, wait); -} - -/* - * Write out and wait upon all dirty data associated with this - * superblock. Filesystem data as well as the underlying block - * device. Takes the superblock lock. - */ -int fsync_super(struct super_block *sb) -{ - int ret; - - ret = __fsync_super(sb, 0); - if (ret < 0) - return ret; - return __fsync_super(sb, 1); -} -EXPORT_SYMBOL_GPL(fsync_super); - /** * generic_shutdown_super - common helper for ->kill_sb() * @sb: superblock to kill @@ -473,55 +437,6 @@ restart: spin_unlock(&sb_lock); } -/* - * Sync all the data for all the filesystems (called by sys_sync() and - * emergency sync) - * - * This operation is careful to avoid the livelock which could easily happen - * if two or more filesystems are being continuously dirtied. s_need_sync - * is used only here. We set it against all filesystems and then clear it as - * we sync them. So redirtied filesystems are skipped. - * - * But if process A is currently running sync_filesystems and then process B - * calls sync_filesystems as well, process B will set all the s_need_sync - * flags again, which will cause process A to resync everything. Fix that with - * a local mutex. - */ -void sync_filesystems(int wait) -{ - struct super_block *sb; - static DEFINE_MUTEX(mutex); - - mutex_lock(&mutex); /* Could be down_interruptible */ - spin_lock(&sb_lock); - list_for_each_entry(sb, &super_blocks, s_list) { - if (sb->s_flags & MS_RDONLY) - continue; - sb->s_need_sync = 1; - } - -restart: - list_for_each_entry(sb, &super_blocks, s_list) { - if (!sb->s_need_sync) - continue; - sb->s_need_sync = 0; - if (sb->s_flags & MS_RDONLY) - continue; /* hm. Was remounted r/o meanwhile */ - sb->s_count++; - spin_unlock(&sb_lock); - down_read(&sb->s_umount); - if (sb->s_root) - __fsync_super(sb, wait); - up_read(&sb->s_umount); - /* restart only when sb is no longer on the list */ - spin_lock(&sb_lock); - if (__put_super_and_need_restart(sb)) - goto restart; - } - spin_unlock(&sb_lock); - mutex_unlock(&mutex); -} - /** * get_super - get the superblock of a device * @bdev: device to get the superblock for diff --git a/fs/sync.c b/fs/sync.c index be0798cc33d..d5fa7b79982 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -18,6 +18,91 @@ #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \ SYNC_FILE_RANGE_WAIT_AFTER) +/* + * Do the filesystem syncing work. For simple filesystems sync_inodes_sb(sb, 0) + * just dirties buffers with inodes so we have to submit IO for these buffers + * via __sync_blockdev(). This also speeds up the wait == 1 case since in that + * case write_inode() functions do sync_dirty_buffer() and thus effectively + * write one block at a time. + */ +static int __fsync_super(struct super_block *sb, int wait) +{ + vfs_dq_sync(sb); + sync_inodes_sb(sb, wait); + lock_super(sb); + if (sb->s_dirt && sb->s_op->write_super) + sb->s_op->write_super(sb); + unlock_super(sb); + if (sb->s_op->sync_fs) + sb->s_op->sync_fs(sb, wait); + return __sync_blockdev(sb->s_bdev, wait); +} + +/* + * Write out and wait upon all dirty data associated with this + * superblock. Filesystem data as well as the underlying block + * device. Takes the superblock lock. + */ +int fsync_super(struct super_block *sb) +{ + int ret; + + ret = __fsync_super(sb, 0); + if (ret < 0) + return ret; + return __fsync_super(sb, 1); +} +EXPORT_SYMBOL_GPL(fsync_super); + +/* + * Sync all the data for all the filesystems (called by sys_sync() and + * emergency sync) + * + * This operation is careful to avoid the livelock which could easily happen + * if two or more filesystems are being continuously dirtied. s_need_sync + * is used only here. We set it against all filesystems and then clear it as + * we sync them. So redirtied filesystems are skipped. + * + * But if process A is currently running sync_filesystems and then process B + * calls sync_filesystems as well, process B will set all the s_need_sync + * flags again, which will cause process A to resync everything. Fix that with + * a local mutex. + */ +static void sync_filesystems(int wait) +{ + struct super_block *sb; + static DEFINE_MUTEX(mutex); + + mutex_lock(&mutex); /* Could be down_interruptible */ + spin_lock(&sb_lock); + list_for_each_entry(sb, &super_blocks, s_list) { + if (sb->s_flags & MS_RDONLY) + continue; + sb->s_need_sync = 1; + } + +restart: + list_for_each_entry(sb, &super_blocks, s_list) { + if (!sb->s_need_sync) + continue; + sb->s_need_sync = 0; + if (sb->s_flags & MS_RDONLY) + continue; /* hm. Was remounted r/o meanwhile */ + sb->s_count++; + spin_unlock(&sb_lock); + down_read(&sb->s_umount); + if (sb->s_root) + __fsync_super(sb, wait); + up_read(&sb->s_umount); + /* restart only when sb is no longer on the list */ + spin_lock(&sb_lock); + if (__put_super_and_need_restart(sb)) + goto restart; + } + spin_unlock(&sb_lock); + mutex_unlock(&mutex); +} + SYSCALL_DEFINE0(sync) { sync_filesystems(0); diff --git a/include/linux/fs.h b/include/linux/fs.h index f00df653cf2..d3f7159993c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1942,7 +1942,6 @@ extern struct super_block *freeze_bdev(struct block_device *); extern void emergency_thaw_all(void); extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); extern int fsync_bdev(struct block_device *); -extern int fsync_super(struct super_block *); extern int fsync_no_super(struct block_device *); #else static inline void bd_forget(struct inode *inode) {} @@ -1959,6 +1958,7 @@ static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) return 0; } #endif +extern int fsync_super(struct super_block *); extern const struct file_operations def_blk_fops; extern const struct file_operations def_chr_fops; extern const struct file_operations bad_sock_fops; @@ -2077,7 +2077,6 @@ extern int filemap_fdatawrite_range(struct address_space *mapping, extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync); extern void sync_supers(void); -extern void sync_filesystems(int wait); extern void emergency_sync(void); extern void emergency_remount(void); extern int do_remount_sb(struct super_block *sb, int flags, -- cgit v1.2.3-70-g09d2 From 60b0680fa236ac4e17ce31a50048c9d75f9ec831 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 27 Apr 2009 16:43:53 +0200 Subject: vfs: Rename fsync_super() to sync_filesystem() (version 4) Rename the function so that it better describe what it really does. Also remove the unnecessary include of buffer_head.h. Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/block_dev.c | 4 ++-- fs/cachefiles/interface.c | 2 +- fs/super.c | 5 ++--- fs/sync.c | 12 ++++++------ include/linux/fs.h | 2 +- 5 files changed, 12 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/fs/block_dev.c b/fs/block_dev.c index 4b6a3b9d01e..3a6d4fb2a32 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -204,7 +204,7 @@ int fsync_bdev(struct block_device *bdev) { struct super_block *sb = get_super(bdev); if (sb) { - int res = fsync_super(sb); + int res = sync_filesystem(sb); drop_super(sb); return res; } @@ -246,7 +246,7 @@ struct super_block *freeze_bdev(struct block_device *bdev) sb->s_frozen = SB_FREEZE_WRITE; smp_wmb(); - fsync_super(sb); + sync_filesystem(sb); sb->s_frozen = SB_FREEZE_TRANS; smp_wmb(); diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 1e962348d11..dafd484d7bd 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -354,7 +354,7 @@ static void cachefiles_sync_cache(struct fscache_cache *_cache) /* make sure all pages pinned by operations on behalf of the netfs are * written to disc */ cachefiles_begin_secure(cache, &saved_cred); - ret = fsync_super(cache->mnt->mnt_sb); + ret = sync_filesystem(cache->mnt->mnt_sb); cachefiles_end_secure(cache, saved_cred); if (ret == -EIO) diff --git a/fs/super.c b/fs/super.c index f822c74f25b..721236e6177 100644 --- a/fs/super.c +++ b/fs/super.c @@ -28,7 +28,6 @@ #include #include #include -#include /* for fsync_super() */ #include #include #include @@ -304,7 +303,7 @@ void generic_shutdown_super(struct super_block *sb) if (sb->s_root) { shrink_dcache_for_umount(sb); - fsync_super(sb); + sync_filesystem(sb); lock_super(sb); sb->s_flags &= ~MS_ACTIVE; @@ -543,7 +542,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) if (flags & MS_RDONLY) acct_auto_close(sb); shrink_dcache_sb(sb); - fsync_super(sb); + sync_filesystem(sb); /* If we are remounting RDONLY and current sb is read/write, make sure there are no rw files opened */ diff --git a/fs/sync.c b/fs/sync.c index d5fa7b79982..8aa870a4d40 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -25,7 +25,7 @@ * case write_inode() functions do sync_dirty_buffer() and thus effectively * write one block at a time. */ -static int __fsync_super(struct super_block *sb, int wait) +static int __sync_filesystem(struct super_block *sb, int wait) { vfs_dq_sync(sb); sync_inodes_sb(sb, wait); @@ -43,16 +43,16 @@ static int __fsync_super(struct super_block *sb, int wait) * superblock. Filesystem data as well as the underlying block * device. Takes the superblock lock. */ -int fsync_super(struct super_block *sb) +int sync_filesystem(struct super_block *sb) { int ret; - ret = __fsync_super(sb, 0); + ret = __sync_filesystem(sb, 0); if (ret < 0) return ret; - return __fsync_super(sb, 1); + return __sync_filesystem(sb, 1); } -EXPORT_SYMBOL_GPL(fsync_super); +EXPORT_SYMBOL_GPL(sync_filesystem); /* * Sync all the data for all the filesystems (called by sys_sync() and @@ -92,7 +92,7 @@ restart: spin_unlock(&sb_lock); down_read(&sb->s_umount); if (sb->s_root) - __fsync_super(sb, wait); + __sync_filesystem(sb, wait); up_read(&sb->s_umount); /* restart only when sb is no longer on the list */ spin_lock(&sb_lock); diff --git a/include/linux/fs.h b/include/linux/fs.h index d3f7159993c..fb1822bed7c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1958,7 +1958,7 @@ static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) return 0; } #endif -extern int fsync_super(struct super_block *); +extern int sync_filesystem(struct super_block *); extern const struct file_operations def_blk_fops; extern const struct file_operations def_chr_fops; extern const struct file_operations bad_sock_fops; -- cgit v1.2.3-70-g09d2 From 850b201b087f5525a0a7278551c2bcd0423c3b26 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 27 Apr 2009 16:43:54 +0200 Subject: quota: cleanup dquota sync functions (version 4) Currently the VFS calls vfs_dq_sync to sync out disk quotas for a given superblock. This is a small wrapper around sync_dquots which for the case of a non-NULL superblock is a small wrapper around quota_sync_sb. Just make quota_sync_sb global (rename it to sync_quota_sb) and call it directly. Also call it directly for those cases in quota.c that have a superblock and leave sync_dquots purely an iterator over sync_quota_sb and remove it's superblock argument. To make this nicer move the check for the lack of a quota_sync method from the callers into sync_quota_sb. [folded build fix from Alexander Beregalov ] Signed-off-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/quota/quota.c | 25 ++++++++++++++----------- fs/sync.c | 2 +- include/linux/quotaops.h | 11 +++-------- 3 files changed, 18 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/fs/quota/quota.c b/fs/quota/quota.c index b7f5a468f07..95c5b42384b 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -159,10 +159,14 @@ static int check_quotactl_valid(struct super_block *sb, int type, int cmd, return error; } -static void quota_sync_sb(struct super_block *sb, int type) +#ifdef CONFIG_QUOTA +void sync_quota_sb(struct super_block *sb, int type) { int cnt; + if (!sb->s_qcop->quota_sync) + return; + sb->s_qcop->quota_sync(sb, type); if (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE) @@ -191,17 +195,13 @@ static void quota_sync_sb(struct super_block *sb, int type) } mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); } +#endif -void sync_dquots(struct super_block *sb, int type) +static void sync_dquots(int type) { + struct super_block *sb; int cnt; - if (sb) { - if (sb->s_qcop->quota_sync) - quota_sync_sb(sb, type); - return; - } - spin_lock(&sb_lock); restart: list_for_each_entry(sb, &super_blocks, s_list) { @@ -222,8 +222,8 @@ restart: sb->s_count++; spin_unlock(&sb_lock); down_read(&sb->s_umount); - if (sb->s_root && sb->s_qcop->quota_sync) - quota_sync_sb(sb, type); + if (sb->s_root) + sync_quota_sb(sb, type); up_read(&sb->s_umount); spin_lock(&sb_lock); if (__put_super_and_need_restart(sb)) @@ -301,7 +301,10 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, return sb->s_qcop->set_dqblk(sb, type, id, &idq); } case Q_SYNC: - sync_dquots(sb, type); + if (sb) + sync_quota_sb(sb, type); + else + sync_dquots(type); return 0; case Q_XQUOTAON: diff --git a/fs/sync.c b/fs/sync.c index 8aa870a4d40..d90ab776455 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -27,7 +27,7 @@ */ static int __sync_filesystem(struct super_block *sb, int wait) { - vfs_dq_sync(sb); + sync_quota_sb(sb, -1); sync_inodes_sb(sb, wait); lock_super(sb); if (sb->s_dirt && sb->s_op->write_super) diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 36353d95c8d..047310fa22f 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -20,7 +20,7 @@ static inline struct quota_info *sb_dqopt(struct super_block *sb) /* * declaration of quota_function calls in kernel. */ -void sync_dquots(struct super_block *sb, int type); +void sync_quota_sb(struct super_block *sb, int type); int dquot_initialize(struct inode *inode, int type); int dquot_drop(struct inode *inode); @@ -253,12 +253,7 @@ static inline void vfs_dq_free_inode(struct inode *inode) inode->i_sb->dq_op->free_inode(inode, 1); } -/* The following two functions cannot be called inside a transaction */ -static inline void vfs_dq_sync(struct super_block *sb) -{ - sync_dquots(sb, -1); -} - +/* Cannot be called inside a transaction */ static inline int vfs_dq_off(struct super_block *sb, int remount) { int ret = -ENOSYS; @@ -334,7 +329,7 @@ static inline void vfs_dq_free_inode(struct inode *inode) { } -static inline void vfs_dq_sync(struct super_block *sb) +static inline void sync_quota_sb(struct super_block *sb, int type) { } -- cgit v1.2.3-70-g09d2 From c3f8a40c1cd5591b882497d1d00d43d0e5bb4698 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 27 Apr 2009 16:43:55 +0200 Subject: quota: Introduce writeout_quota_sb() (version 4) Introduce this function which just writes all the quota structures but avoids all the syncing and cache pruning work to expose quota structures to userspace. Use this function from __sync_filesystem when wait == 0. Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/sync.c | 6 +++++- include/linux/quotaops.h | 9 +++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/fs/sync.c b/fs/sync.c index d90ab776455..4487b5560dc 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -27,7 +27,11 @@ */ static int __sync_filesystem(struct super_block *sb, int wait) { - sync_quota_sb(sb, -1); + /* Avoid doing twice syncing and cache pruning for quota sync */ + if (!wait) + writeout_quota_sb(sb, -1); + else + sync_quota_sb(sb, -1); sync_inodes_sb(sb, wait); lock_super(sb); if (sb->s_dirt && sb->s_op->write_super) diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 047310fa22f..7bc45759368 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -21,6 +21,11 @@ static inline struct quota_info *sb_dqopt(struct super_block *sb) * declaration of quota_function calls in kernel. */ void sync_quota_sb(struct super_block *sb, int type); +static inline void writeout_quota_sb(struct super_block *sb, int type) +{ + if (sb->s_qcop->quota_sync) + sb->s_qcop->quota_sync(sb, type); +} int dquot_initialize(struct inode *inode, int type); int dquot_drop(struct inode *inode); @@ -333,6 +338,10 @@ static inline void sync_quota_sb(struct super_block *sb, int type) { } +static inline void writeout_quota_sb(struct super_block *sb, int type) +{ +} + static inline int vfs_dq_off(struct super_block *sb, int remount) { return 0; -- cgit v1.2.3-70-g09d2 From f3da392e9ff14b9f388e74319e6d195848991c07 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 4 May 2009 03:32:03 +0400 Subject: dcache: extrace and use d_unlinked() d_unlinked() will be used in middle-term to ban checkpointing when opened but unlinked file is detected, and in long term, to detect such situation and special case on it. Signed-off-by: Alexey Dobriyan Signed-off-by: Al Viro --- fs/dcache.c | 7 +++---- fs/namespace.c | 8 ++++---- include/linux/dcache.h | 5 +++++ 3 files changed, 12 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/fs/dcache.c b/fs/dcache.c index 75659a6fd1f..9e5cd3c3a6b 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1910,7 +1910,7 @@ char *__d_path(const struct path *path, struct path *root, spin_lock(&vfsmount_lock); prepend(&end, &buflen, "\0", 1); - if (!IS_ROOT(dentry) && d_unhashed(dentry) && + if (d_unlinked(dentry) && (prepend(&end, &buflen, " (deleted)", 10) != 0)) goto Elong; @@ -2035,7 +2035,7 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen) spin_lock(&dcache_lock); prepend(&end, &buflen, "\0", 1); - if (!IS_ROOT(dentry) && d_unhashed(dentry) && + if (d_unlinked(dentry) && (prepend(&end, &buflen, "//deleted", 9) != 0)) goto Elong; if (buflen < 1) @@ -2097,9 +2097,8 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) read_unlock(¤t->fs->lock); error = -ENOENT; - /* Has the current directory has been unlinked? */ spin_lock(&dcache_lock); - if (IS_ROOT(pwd.dentry) || !d_unhashed(pwd.dentry)) { + if (!d_unlinked(pwd.dentry)) { unsigned long len; struct path tmp = root; char * cwd; diff --git a/fs/namespace.c b/fs/namespace.c index 120b8a6b99e..7e537f0393b 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1384,7 +1384,7 @@ static int graft_tree(struct vfsmount *mnt, struct path *path) goto out_unlock; err = -ENOENT; - if (IS_ROOT(path->dentry) || !d_unhashed(path->dentry)) + if (!d_unlinked(path->dentry)) err = attach_recursive_mnt(mnt, path, NULL); out_unlock: mutex_unlock(&path->dentry->d_inode->i_mutex); @@ -1566,7 +1566,7 @@ static int do_move_mount(struct path *path, char *old_name) if (IS_DEADDIR(path->dentry->d_inode)) goto out1; - if (!IS_ROOT(path->dentry) && d_unhashed(path->dentry)) + if (d_unlinked(path->dentry)) goto out1; err = -EINVAL; @@ -2129,9 +2129,9 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, error = -ENOENT; if (IS_DEADDIR(new.dentry->d_inode)) goto out2; - if (d_unhashed(new.dentry) && !IS_ROOT(new.dentry)) + if (d_unlinked(new.dentry)) goto out2; - if (d_unhashed(old.dentry) && !IS_ROOT(old.dentry)) + if (d_unlinked(old.dentry)) goto out2; error = -EBUSY; if (new.mnt == root.mnt || diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 72ce2ae8859..30b93b2a01a 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -353,6 +353,11 @@ static inline int d_unhashed(struct dentry *dentry) return (dentry->d_flags & DCACHE_UNHASHED); } +static inline int d_unlinked(struct dentry *dentry) +{ + return d_unhashed(dentry) && !IS_ROOT(dentry); +} + static inline struct dentry *dget_parent(struct dentry *dentry) { struct dentry *ret; -- cgit v1.2.3-70-g09d2 From 62c6943b4b1e818aea60c11c5a68a50785b83119 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 7 May 2009 03:12:29 -0400 Subject: Trim a bit of crap from fs.h do_remount_sb() is fs/internal.h fodder, fsync_no_super() is long gone. Signed-off-by: Al Viro --- fs/internal.h | 5 +++++ include/linux/fs.h | 3 --- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/fs/internal.h b/fs/internal.h index dbec3cc2833..d55ef562f0b 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -78,3 +78,8 @@ extern void chroot_fs_refs(struct path *, struct path *); * file_table.c */ extern void mark_files_ro(struct super_block *); + +/* + * super.c + */ +extern int do_remount_sb(struct super_block *, int, void *, int); diff --git a/include/linux/fs.h b/include/linux/fs.h index fb1822bed7c..e7833ef5d1d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1942,7 +1942,6 @@ extern struct super_block *freeze_bdev(struct block_device *); extern void emergency_thaw_all(void); extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); extern int fsync_bdev(struct block_device *); -extern int fsync_no_super(struct block_device *); #else static inline void bd_forget(struct inode *inode) {} static inline int sync_blockdev(struct block_device *bdev) { return 0; } @@ -2079,8 +2078,6 @@ extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync); extern void sync_supers(void); extern void emergency_sync(void); extern void emergency_remount(void); -extern int do_remount_sb(struct super_block *sb, int flags, - void *data, int force); #ifdef CONFIG_BLOCK extern sector_t bmap(struct inode *, sector_t); #endif -- cgit v1.2.3-70-g09d2 From 9fd5746fd3d7838bf6ff991d50f1257057d1156f Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 21 May 2009 16:01:00 -0400 Subject: fs: Remove i_cindex from struct inode The only user of the i_cindex element in the inode structure is used is by the firewire drivers. As part of an attempt to slim down the inode structure to save memory --- since a typical Linux system will have hundreds of thousands if not millions of inodes cached, a reduction in the size inode has high leverage. The firewire driver does not need i_cindex in any fast path, so it's simple enough to calculate when it is needed, instead of wasting space in the inode structure. Signed-off-by: "Theodore Ts'o" Cc: krh@redhat.com Cc: stefanr@s5r6.in-berlin.de Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Al Viro --- drivers/ieee1394/dv1394.c | 5 +++-- drivers/ieee1394/ieee1394_core.h | 6 +++++- fs/char_dev.c | 14 +++++++++++++- include/linux/cdev.h | 2 ++ include/linux/fs.h | 1 - 5 files changed, 23 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/ieee1394/dv1394.c b/drivers/ieee1394/dv1394.c index 823a6297a1a..2cd00b5b45b 100644 --- a/drivers/ieee1394/dv1394.c +++ b/drivers/ieee1394/dv1394.c @@ -1789,12 +1789,13 @@ static int dv1394_open(struct inode *inode, struct file *file) } else { /* look up the card by ID */ unsigned long flags; + int idx = ieee1394_file_to_instance(file); spin_lock_irqsave(&dv1394_cards_lock, flags); if (!list_empty(&dv1394_cards)) { struct video_card *p; list_for_each_entry(p, &dv1394_cards, list) { - if ((p->id) == ieee1394_file_to_instance(file)) { + if ((p->id) == idx) { video = p; break; } @@ -1803,7 +1804,7 @@ static int dv1394_open(struct inode *inode, struct file *file) spin_unlock_irqrestore(&dv1394_cards_lock, flags); if (!video) { - debug_printk("dv1394: OHCI card %d not found", ieee1394_file_to_instance(file)); + debug_printk("dv1394: OHCI card %d not found", idx); return -ENODEV; } diff --git a/drivers/ieee1394/ieee1394_core.h b/drivers/ieee1394/ieee1394_core.h index 21d50f73a21..28b9f58bafd 100644 --- a/drivers/ieee1394/ieee1394_core.h +++ b/drivers/ieee1394/ieee1394_core.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include "hosts.h" @@ -155,7 +156,10 @@ void hpsb_packet_received(struct hpsb_host *host, quadlet_t *data, size_t size, */ static inline unsigned char ieee1394_file_to_instance(struct file *file) { - return file->f_path.dentry->d_inode->i_cindex; + int idx = cdev_index(file->f_path.dentry->d_inode); + if (idx < 0) + idx = 0; + return idx; } extern int hpsb_disable_irm; diff --git a/fs/char_dev.c b/fs/char_dev.c index 38f71222a55..b7c9d5187a7 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -375,7 +375,6 @@ static int chrdev_open(struct inode *inode, struct file *filp) p = inode->i_cdev; if (!p) { inode->i_cdev = p = new; - inode->i_cindex = idx; list_add(&inode->i_devices, &p->list); new = NULL; } else if (!cdev_get(p)) @@ -405,6 +404,18 @@ static int chrdev_open(struct inode *inode, struct file *filp) return ret; } +int cdev_index(struct inode *inode) +{ + int idx; + struct kobject *kobj; + + kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx); + if (!kobj) + return -1; + kobject_put(kobj); + return idx; +} + void cd_forget(struct inode *inode) { spin_lock(&cdev_lock); @@ -557,6 +568,7 @@ EXPORT_SYMBOL(cdev_init); EXPORT_SYMBOL(cdev_alloc); EXPORT_SYMBOL(cdev_del); EXPORT_SYMBOL(cdev_add); +EXPORT_SYMBOL(cdev_index); EXPORT_SYMBOL(register_chrdev); EXPORT_SYMBOL(unregister_chrdev); EXPORT_SYMBOL(directly_mappable_cdev_bdi); diff --git a/include/linux/cdev.h b/include/linux/cdev.h index fb4591977b0..f389e319a45 100644 --- a/include/linux/cdev.h +++ b/include/linux/cdev.h @@ -28,6 +28,8 @@ int cdev_add(struct cdev *, dev_t, unsigned); void cdev_del(struct cdev *); +int cdev_index(struct inode *inode); + void cd_forget(struct inode *); extern struct backing_dev_info directly_mappable_cdev_bdi; diff --git a/include/linux/fs.h b/include/linux/fs.h index e7833ef5d1d..bcd63706db8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -751,7 +751,6 @@ struct inode { struct block_device *i_bdev; struct cdev *i_cdev; }; - int i_cindex; __u32 i_generation; -- cgit v1.2.3-70-g09d2 From 28ad0c118b0ed98b042d362acfe0017591921138 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 21 May 2009 16:01:02 -0400 Subject: fs: Rearrange inode structure elements to avoid waste due to padding Signed-off-by: "Theodore Ts'o" Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index bcd63706db8..d883aa1fc2e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -729,8 +729,8 @@ struct inode { struct timespec i_atime; struct timespec i_mtime; struct timespec i_ctime; - unsigned int i_blkbits; blkcnt_t i_blocks; + unsigned int i_blkbits; unsigned short i_bytes; umode_t i_mode; spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ -- cgit v1.2.3-70-g09d2 From 8688b8635266cf98f00c6b0350ea2dbe7c42c321 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 26 May 2009 05:45:04 -0400 Subject: linux/magic.h: move cramfs magic out of cramfs_fs.h Signed-off-by: Mike Frysinger CC: Alexander Viro Signed-off-by: Al Viro --- include/linux/cramfs_fs.h | 3 +-- include/linux/magic.h | 2 ++ 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/cramfs_fs.h b/include/linux/cramfs_fs.h index 3be4e5a27d8..6fc2bed368b 100644 --- a/include/linux/cramfs_fs.h +++ b/include/linux/cramfs_fs.h @@ -2,9 +2,8 @@ #define __CRAMFS_H #include +#include -#define CRAMFS_MAGIC 0x28cd3d45 /* some random number */ -#define CRAMFS_MAGIC_WEND 0x453dcd28 /* magic number with the wrong endianess */ #define CRAMFS_SIGNATURE "Compressed ROMFS" /* diff --git a/include/linux/magic.h b/include/linux/magic.h index 927138cf305..1923327b986 100644 --- a/include/linux/magic.h +++ b/include/linux/magic.h @@ -6,6 +6,8 @@ #define AFS_SUPER_MAGIC 0x5346414F #define AUTOFS_SUPER_MAGIC 0x0187 #define CODA_SUPER_MAGIC 0x73757245 +#define CRAMFS_MAGIC 0x28cd3d45 /* some random number */ +#define CRAMFS_MAGIC_WEND 0x453dcd28 /* magic number with the wrong endianess */ #define DEBUGFS_MAGIC 0x64626720 #define SYSFS_MAGIC 0x62656572 #define SECURITYFS_MAGIC 0x73636673 -- cgit v1.2.3-70-g09d2 From d5aacad548db1ff547adf35d0a77eb2a8ed4fe14 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 7 Jun 2009 14:56:44 -0400 Subject: New helper - simple_fsync() writes associated buffers, then does sync_inode() to write the inode itself (and to make it clean). Depends on ->write_inode() honouring the second argument. Signed-off-by: Al Viro --- fs/libfs.c | 25 +++++++++++++++++++++++++ include/linux/fs.h | 2 ++ 2 files changed, 27 insertions(+) (limited to 'include') diff --git a/fs/libfs.c b/fs/libfs.c index 80046ddf506..ddfa89948c3 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include @@ -807,6 +809,29 @@ struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid, } EXPORT_SYMBOL_GPL(generic_fh_to_parent); +int simple_fsync(struct file *file, struct dentry *dentry, int datasync) +{ + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = 0, /* metadata-only; caller takes care of data */ + }; + struct inode *inode = dentry->d_inode; + int err; + int ret; + + ret = sync_mapping_buffers(inode->i_mapping); + if (!(inode->i_state & I_DIRTY)) + return ret; + if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) + return ret; + + err = sync_inode(inode, &wbc); + if (ret == 0) + ret = err; + return ret; +} +EXPORT_SYMBOL(simple_fsync); + EXPORT_SYMBOL(dcache_dir_close); EXPORT_SYMBOL(dcache_dir_lseek); EXPORT_SYMBOL(dcache_dir_open); diff --git a/include/linux/fs.h b/include/linux/fs.h index d883aa1fc2e..ede84fa7da5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2345,6 +2345,8 @@ extern void simple_release_fs(struct vfsmount **mount, int *count); extern ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, const void *from, size_t available); +extern int simple_fsync(struct file *, struct dentry *, int); + #ifdef CONFIG_MIGRATION extern int buffer_migrate_page(struct address_space *, struct page *, struct page *); -- cgit v1.2.3-70-g09d2 From 79d25767583e4e086f8309bfd1f502660a64fe7f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 7 Jun 2009 09:30:08 -0400 Subject: Sanitize qnx4 fsync handling * have directory operations use mark_buffer_dirty_inode(), so that sync_mapping_buffers() would get those. * make qnx4_write_inode() honour its last argument. * get rid of insane copies of very ancient "walk the indirect blocks" in qnx4/fsync - they never matched the actual fs layout and, fortunately, never'd been called. Again, all this junk is not needed; ->fsync() should just do sync_mapping_buffers + sync_inode (and if we implement block allocation for qnx4, we'll need to use mark_buffer_dirty_inode() for extent blocks) Signed-off-by: Al Viro --- fs/qnx4/Makefile | 2 +- fs/qnx4/dir.c | 2 +- fs/qnx4/file.c | 2 +- fs/qnx4/fsync.c | 169 ------------------------------------------------ fs/qnx4/inode.c | 38 ++++------- fs/qnx4/namei.c | 4 +- include/linux/qnx4_fs.h | 2 - 7 files changed, 17 insertions(+), 202 deletions(-) delete mode 100644 fs/qnx4/fsync.c (limited to 'include') diff --git a/fs/qnx4/Makefile b/fs/qnx4/Makefile index 502d7fe98ba..e4d408cc547 100644 --- a/fs/qnx4/Makefile +++ b/fs/qnx4/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_QNX4FS_FS) += qnx4.o -qnx4-objs := inode.o dir.o namei.o file.o bitmap.o truncate.o fsync.o +qnx4-objs := inode.o dir.o namei.o file.o bitmap.o truncate.o diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c index ea9ffefb48a..ff6c1ba6c4e 100644 --- a/fs/qnx4/dir.c +++ b/fs/qnx4/dir.c @@ -84,7 +84,7 @@ const struct file_operations qnx4_dir_operations = { .read = generic_read_dir, .readdir = qnx4_readdir, - .fsync = file_fsync, + .fsync = simple_fsync, }; const struct inode_operations qnx4_dir_inode_operations = diff --git a/fs/qnx4/file.c b/fs/qnx4/file.c index 867f42b0203..e7033ea10e2 100644 --- a/fs/qnx4/file.c +++ b/fs/qnx4/file.c @@ -29,7 +29,7 @@ const struct file_operations qnx4_file_operations = #ifdef CONFIG_QNX4FS_RW .write = do_sync_write, .aio_write = generic_file_aio_write, - .fsync = qnx4_sync_file, + .fsync = simple_fsync, #endif }; diff --git a/fs/qnx4/fsync.c b/fs/qnx4/fsync.c deleted file mode 100644 index aa3b19544be..00000000000 --- a/fs/qnx4/fsync.c +++ /dev/null @@ -1,169 +0,0 @@ -/* - * QNX4 file system, Linux implementation. - * - * Version : 0.1 - * - * Using parts of the xiafs filesystem. - * - * History : - * - * 24-03-1998 by Richard Frowijn : first release. - */ - -#include -#include -#include -#include -#include -#include - -#include -#include - -#include - -/* - * The functions for qnx4 fs file synchronization. - */ - -#ifdef CONFIG_QNX4FS_RW - -static int sync_block(struct inode *inode, unsigned short *block, int wait) -{ - struct buffer_head *bh; - unsigned short tmp; - - if (!*block) - return 0; - tmp = *block; - bh = sb_find_get_block(inode->i_sb, *block); - if (!bh) - return 0; - if (*block != tmp) { - brelse(bh); - return 1; - } - if (wait && buffer_req(bh) && !buffer_uptodate(bh)) { - brelse(bh); - return -1; - } - if (wait || !buffer_uptodate(bh) || !buffer_dirty(bh)) { - brelse(bh); - return 0; - } - ll_rw_block(WRITE, 1, &bh); - atomic_dec(&bh->b_count); - return 0; -} - -#ifdef WTF -static int sync_iblock(struct inode *inode, unsigned short *iblock, - struct buffer_head **bh, int wait) -{ - int rc; - unsigned short tmp; - - *bh = NULL; - tmp = *iblock; - if (!tmp) - return 0; - rc = sync_block(inode, iblock, wait); - if (rc) - return rc; - *bh = sb_bread(inode->i_sb, tmp); - if (tmp != *iblock) { - brelse(*bh); - *bh = NULL; - return 1; - } - if (!*bh) - return -1; - return 0; -} -#endif - -static int sync_direct(struct inode *inode, int wait) -{ - int i; - int rc, err = 0; - - for (i = 0; i < 7; i++) { - rc = sync_block(inode, - (unsigned short *) qnx4_raw_inode(inode)->di_first_xtnt.xtnt_blk + i, wait); - if (rc > 0) - break; - if (rc) - err = rc; - } - return err; -} - -#ifdef WTF -static int sync_indirect(struct inode *inode, unsigned short *iblock, int wait) -{ - int i; - struct buffer_head *ind_bh; - int rc, err = 0; - - rc = sync_iblock(inode, iblock, &ind_bh, wait); - if (rc || !ind_bh) - return rc; - - for (i = 0; i < 512; i++) { - rc = sync_block(inode, - ((unsigned short *) ind_bh->b_data) + i, - wait); - if (rc > 0) - break; - if (rc) - err = rc; - } - brelse(ind_bh); - return err; -} - -static int sync_dindirect(struct inode *inode, unsigned short *diblock, - int wait) -{ - int i; - struct buffer_head *dind_bh; - int rc, err = 0; - - rc = sync_iblock(inode, diblock, &dind_bh, wait); - if (rc || !dind_bh) - return rc; - - for (i = 0; i < 512; i++) { - rc = sync_indirect(inode, - ((unsigned short *) dind_bh->b_data) + i, - wait); - if (rc > 0) - break; - if (rc) - err = rc; - } - brelse(dind_bh); - return err; -} -#endif - -int qnx4_sync_file(struct file *file, struct dentry *dentry, int unused) -{ - struct inode *inode = dentry->d_inode; - int wait, err = 0; - - (void) file; - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode))) - return -EINVAL; - - lock_kernel(); - for (wait = 0; wait <= 1; wait++) { - err |= sync_direct(inode, wait); - } - err |= qnx4_sync_inode(inode); - unlock_kernel(); - return (err < 0) ? -EIO : 0; -} - -#endif diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 95c12fc613f..40712867b8a 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -34,31 +35,6 @@ static const struct super_operations qnx4_sops; #ifdef CONFIG_QNX4FS_RW -int qnx4_sync_inode(struct inode *inode) -{ - int err = 0; -# if 0 - struct buffer_head *bh; - - bh = qnx4_update_inode(inode); - if (bh && buffer_dirty(bh)) - { - sync_dirty_buffer(bh); - if (buffer_req(bh) && !buffer_uptodate(bh)) - { - printk ("IO error syncing qnx4 inode [%s:%08lx]\n", - inode->i_sb->s_id, inode->i_ino); - err = -1; - } - brelse (bh); - } else if (!bh) { - err = -1; - } -# endif - - return err; -} - static void qnx4_delete_inode(struct inode *inode) { QNX4DEBUG(("qnx4: deleting inode [%lu]\n", (unsigned long) inode->i_ino)); @@ -70,7 +46,7 @@ static void qnx4_delete_inode(struct inode *inode) unlock_kernel(); } -static int qnx4_write_inode(struct inode *inode, int unused) +static int qnx4_write_inode(struct inode *inode, int do_sync) { struct qnx4_inode_entry *raw_inode; int block, ino; @@ -107,6 +83,16 @@ static int qnx4_write_inode(struct inode *inode, int unused) raw_inode->di_ctime = cpu_to_le32(inode->i_ctime.tv_sec); raw_inode->di_first_xtnt.xtnt_size = cpu_to_le32(inode->i_blocks); mark_buffer_dirty(bh); + if (do_sync) { + sync_dirty_buffer(bh); + if (buffer_req(bh) && !buffer_uptodate(bh)) { + printk("qnx4: IO error syncing inode [%s:%08x]\n", + inode->i_sb->s_id, ino); + brelse(bh); + unlock_kernel(); + return -EIO; + } + } brelse(bh); unlock_kernel(); return 0; diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c index 775eed3a408..123270c5376 100644 --- a/fs/qnx4/namei.c +++ b/fs/qnx4/namei.c @@ -187,7 +187,7 @@ int qnx4_rmdir(struct inode *dir, struct dentry *dentry) de->di_status = 0; memset(de->di_fname, 0, sizeof de->di_fname); de->di_mode = 0; - mark_buffer_dirty(bh); + mark_buffer_dirty_inode(bh, dir); clear_nlink(inode); mark_inode_dirty(inode); inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; @@ -232,7 +232,7 @@ int qnx4_unlink(struct inode *dir, struct dentry *dentry) de->di_status = 0; memset(de->di_fname, 0, sizeof de->di_fname); de->di_mode = 0; - mark_buffer_dirty(bh); + mark_buffer_dirty_inode(bh, dir); dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; mark_inode_dirty(dir); inode->i_ctime = dir->i_ctime; diff --git a/include/linux/qnx4_fs.h b/include/linux/qnx4_fs.h index 787d19ea9f4..acbaec3524e 100644 --- a/include/linux/qnx4_fs.h +++ b/include/linux/qnx4_fs.h @@ -126,8 +126,6 @@ extern void qnx4_truncate(struct inode *inode); extern void qnx4_free_inode(struct inode *inode); extern int qnx4_unlink(struct inode *dir, struct dentry *dentry); extern int qnx4_rmdir(struct inode *dir, struct dentry *dentry); -extern int qnx4_sync_file(struct file *file, struct dentry *dentry, int); -extern int qnx4_sync_inode(struct inode *inode); static inline struct qnx4_sb_info *qnx4_sb(struct super_block *sb) { -- cgit v1.2.3-70-g09d2 From 964f5369667b342994fe3f384e9ba41d404ee796 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 7 Jun 2009 09:47:13 -0400 Subject: fs/qnx4: sanitize includes fs-internal parts of qnx4_fs.h taken to fs/qnx4/qnx4.h, includes adjusted, qnx4_fs.h doesn't need unifdef anymore. Signed-off-by: Al Viro --- fs/qnx4/bitmap.c | 7 +----- fs/qnx4/dir.c | 7 +----- fs/qnx4/file.c | 3 +-- fs/qnx4/inode.c | 11 +++------ fs/qnx4/namei.c | 9 +------- fs/qnx4/qnx4.h | 57 +++++++++++++++++++++++++++++++++++++++++++++++ fs/qnx4/truncate.c | 6 +---- include/linux/Kbuild | 2 +- include/linux/qnx4_fs.h | 59 ------------------------------------------------- 9 files changed, 66 insertions(+), 95 deletions(-) create mode 100644 fs/qnx4/qnx4.h (limited to 'include') diff --git a/fs/qnx4/bitmap.c b/fs/qnx4/bitmap.c index 8425cf6e962..e1cd061a25f 100644 --- a/fs/qnx4/bitmap.c +++ b/fs/qnx4/bitmap.c @@ -13,14 +13,9 @@ * 28-06-1998 by Frank Denis : qnx4_free_inode (to be fixed) . */ -#include -#include -#include -#include -#include -#include #include #include +#include "qnx4.h" #if 0 int qnx4_new_block(struct super_block *sb) diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c index ff6c1ba6c4e..003c68f3238 100644 --- a/fs/qnx4/dir.c +++ b/fs/qnx4/dir.c @@ -11,14 +11,9 @@ * 20-06-1998 by Frank Denis : Linux 2.1.99+ & dcache support. */ -#include -#include -#include -#include -#include #include #include - +#include "qnx4.h" static int qnx4_readdir(struct file *filp, void *dirent, filldir_t filldir) { diff --git a/fs/qnx4/file.c b/fs/qnx4/file.c index e7033ea10e2..09b170ac936 100644 --- a/fs/qnx4/file.c +++ b/fs/qnx4/file.c @@ -12,8 +12,7 @@ * 27-06-1998 by Frank Denis : file overwriting. */ -#include -#include +#include "qnx4.h" /* * We have mostly NULL's here: the current defaults are ok for diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 40712867b8a..681df5fcd16 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -13,20 +13,15 @@ */ #include -#include -#include -#include -#include -#include -#include #include +#include #include #include #include #include #include -#include -#include +#include +#include "qnx4.h" #define QNX4_VERSION 4 #define QNX4_BMNAME ".bitmap" diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c index 123270c5376..5972ed21493 100644 --- a/fs/qnx4/namei.c +++ b/fs/qnx4/namei.c @@ -12,16 +12,9 @@ * 04-07-1998 by Frank Denis : first step for rmdir/unlink. */ -#include -#include -#include -#include -#include -#include -#include -#include #include #include +#include "qnx4.h" /* diff --git a/fs/qnx4/qnx4.h b/fs/qnx4/qnx4.h new file mode 100644 index 00000000000..9efc089454f --- /dev/null +++ b/fs/qnx4/qnx4.h @@ -0,0 +1,57 @@ +#include +#include + +#define QNX4_DEBUG 0 + +#if QNX4_DEBUG +#define QNX4DEBUG(X) printk X +#else +#define QNX4DEBUG(X) (void) 0 +#endif + +struct qnx4_sb_info { + struct buffer_head *sb_buf; /* superblock buffer */ + struct qnx4_super_block *sb; /* our superblock */ + unsigned int Version; /* may be useful */ + struct qnx4_inode_entry *BitMap; /* useful */ +}; + +struct qnx4_inode_info { + struct qnx4_inode_entry raw; + loff_t mmu_private; + struct inode vfs_inode; +}; + +extern struct inode *qnx4_iget(struct super_block *, unsigned long); +extern struct dentry *qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd); +extern unsigned long qnx4_count_free_blocks(struct super_block *sb); +extern unsigned long qnx4_block_map(struct inode *inode, long iblock); + +extern struct buffer_head *qnx4_bread(struct inode *, int, int); + +extern const struct inode_operations qnx4_file_inode_operations; +extern const struct inode_operations qnx4_dir_inode_operations; +extern const struct file_operations qnx4_file_operations; +extern const struct file_operations qnx4_dir_operations; +extern int qnx4_is_free(struct super_block *sb, long block); +extern int qnx4_set_bitmap(struct super_block *sb, long block, int busy); +extern int qnx4_create(struct inode *inode, struct dentry *dentry, int mode, struct nameidata *nd); +extern void qnx4_truncate(struct inode *inode); +extern void qnx4_free_inode(struct inode *inode); +extern int qnx4_unlink(struct inode *dir, struct dentry *dentry); +extern int qnx4_rmdir(struct inode *dir, struct dentry *dentry); + +static inline struct qnx4_sb_info *qnx4_sb(struct super_block *sb) +{ + return sb->s_fs_info; +} + +static inline struct qnx4_inode_info *qnx4_i(struct inode *inode) +{ + return container_of(inode, struct qnx4_inode_info, vfs_inode); +} + +static inline struct qnx4_inode_entry *qnx4_raw_inode(struct inode *inode) +{ + return &qnx4_i(inode)->raw; +} diff --git a/fs/qnx4/truncate.c b/fs/qnx4/truncate.c index 6437c1c3d1d..d94d9ee241f 100644 --- a/fs/qnx4/truncate.c +++ b/fs/qnx4/truncate.c @@ -10,12 +10,8 @@ * 30-06-1998 by Frank DENIS : ugly filler. */ -#include -#include -#include -#include #include -#include +#include "qnx4.h" #ifdef CONFIG_QNX4FS_RW diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 3f0eaa397ef..b3afd2219ad 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -135,6 +135,7 @@ header-y += posix_types.h header-y += ppdev.h header-y += prctl.h header-y += qnxtypes.h +header-y += qnx4_fs.h header-y += radeonfb.h header-y += raw.h header-y += resource.h @@ -308,7 +309,6 @@ unifdef-y += poll.h unifdef-y += ppp_defs.h unifdef-y += ppp-comp.h unifdef-y += ptrace.h -unifdef-y += qnx4_fs.h unifdef-y += quota.h unifdef-y += random.h unifdef-y += irqnr.h diff --git a/include/linux/qnx4_fs.h b/include/linux/qnx4_fs.h index acbaec3524e..8b9aee1a9ce 100644 --- a/include/linux/qnx4_fs.h +++ b/include/linux/qnx4_fs.h @@ -85,63 +85,4 @@ struct qnx4_super_block { struct qnx4_inode_entry AltBoot; }; -#ifdef __KERNEL__ - -#define QNX4_DEBUG 0 - -#if QNX4_DEBUG -#define QNX4DEBUG(X) printk X -#else -#define QNX4DEBUG(X) (void) 0 -#endif - -struct qnx4_sb_info { - struct buffer_head *sb_buf; /* superblock buffer */ - struct qnx4_super_block *sb; /* our superblock */ - unsigned int Version; /* may be useful */ - struct qnx4_inode_entry *BitMap; /* useful */ -}; - -struct qnx4_inode_info { - struct qnx4_inode_entry raw; - loff_t mmu_private; - struct inode vfs_inode; -}; - -extern struct inode *qnx4_iget(struct super_block *, unsigned long); -extern struct dentry *qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd); -extern unsigned long qnx4_count_free_blocks(struct super_block *sb); -extern unsigned long qnx4_block_map(struct inode *inode, long iblock); - -extern struct buffer_head *qnx4_bread(struct inode *, int, int); - -extern const struct inode_operations qnx4_file_inode_operations; -extern const struct inode_operations qnx4_dir_inode_operations; -extern const struct file_operations qnx4_file_operations; -extern const struct file_operations qnx4_dir_operations; -extern int qnx4_is_free(struct super_block *sb, long block); -extern int qnx4_set_bitmap(struct super_block *sb, long block, int busy); -extern int qnx4_create(struct inode *inode, struct dentry *dentry, int mode, struct nameidata *nd); -extern void qnx4_truncate(struct inode *inode); -extern void qnx4_free_inode(struct inode *inode); -extern int qnx4_unlink(struct inode *dir, struct dentry *dentry); -extern int qnx4_rmdir(struct inode *dir, struct dentry *dentry); - -static inline struct qnx4_sb_info *qnx4_sb(struct super_block *sb) -{ - return sb->s_fs_info; -} - -static inline struct qnx4_inode_info *qnx4_i(struct inode *inode) -{ - return container_of(inode, struct qnx4_inode_info, vfs_inode); -} - -static inline struct qnx4_inode_entry *qnx4_raw_inode(struct inode *inode) -{ - return &qnx4_i(inode)->raw; -} - -#endif /* __KERNEL__ */ - #endif -- cgit v1.2.3-70-g09d2 From fbe0efb869efde8d847ede3a925230ef88910086 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Tue, 9 Jun 2009 01:50:41 +1000 Subject: drm_calloc_large: check right size, check integer overflow, use GFP_ZERO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously we would check size instead of size * nmemb, and so would never hit the vmalloc path. Also add integer overflow check as in kcalloc, and allocate GFP_ZERO pages instead of memset()ing them. Signed-off-by: Kristian Høgsberg Signed-off-by: Dave Airlie --- include/drm/drmP.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/drm/drmP.h b/include/drm/drmP.h index afc21685230..1cc51a0812f 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1573,18 +1573,14 @@ static __inline__ void *drm_calloc(size_t nmemb, size_t size, int area) static __inline__ void *drm_calloc_large(size_t nmemb, size_t size) { - u8 *addr; - - if (size <= PAGE_SIZE) + if (size * nmemb <= PAGE_SIZE) return kcalloc(nmemb, size, GFP_KERNEL); - addr = vmalloc(nmemb * size); - if (!addr) + if (size != 0 && nmemb > ULONG_MAX / size) return NULL; - memset(addr, 0, nmemb * size); - - return addr; + return __vmalloc(size * nmemb, + GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL); } static __inline void drm_free_large(void *ptr) -- cgit v1.2.3-70-g09d2 From 2a71ebcd85bcc4d6607f577f23a491f796c30e82 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jun 2009 15:53:10 +1000 Subject: drm/radeon: add rv740 drm support. This adds drm support for the RV740 family of chips to the r600 support code. Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/r600_cp.c | 38 ++++++++++++++++++++++++++++++++++--- drivers/gpu/drm/radeon/radeon_drv.h | 1 + include/drm/drm_pciids.h | 5 +++++ 3 files changed, 41 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/radeon/r600_cp.c b/drivers/gpu/drm/radeon/r600_cp.c index bc9d09dfa8e..aa4eee4b7f3 100644 --- a/drivers/gpu/drm/radeon/r600_cp.c +++ b/drivers/gpu/drm/radeon/r600_cp.c @@ -489,15 +489,16 @@ static void r700_cp_load_microcode(drm_radeon_private_t *dev_priv) RADEON_WRITE(R600_CP_ME_RAM_DATA, RV770_cp_microcode[i]); RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0); - } else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV730)) { + } else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV730) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV740)) { RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0); - DRM_INFO("Loading RV730 PFP Microcode\n"); + DRM_INFO("Loading RV730/RV740 PFP Microcode\n"); for (i = 0; i < R700_PFP_UCODE_SIZE; i++) RADEON_WRITE(R600_CP_PFP_UCODE_DATA, RV730_pfp_microcode[i]); RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0); RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0); - DRM_INFO("Loading RV730 CP Microcode\n"); + DRM_INFO("Loading RV730/RV740 CP Microcode\n"); for (i = 0; i < R700_PM4_UCODE_SIZE; i++) RADEON_WRITE(R600_CP_ME_RAM_DATA, RV730_cp_microcode[i]); RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0); @@ -1324,6 +1325,10 @@ static void r700_gfx_init(struct drm_device *dev, dev_priv->r700_sc_prim_fifo_size = 0xf9; dev_priv->r700_sc_hiz_tile_fifo_size = 0x30; dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130; + if (dev_priv->r600_sx_max_export_pos_size > 16) { + dev_priv->r600_sx_max_export_pos_size -= 16; + dev_priv->r600_sx_max_export_smx_size += 16; + } break; case CHIP_RV710: dev_priv->r600_max_pipes = 2; @@ -1345,6 +1350,31 @@ static void r700_gfx_init(struct drm_device *dev, dev_priv->r700_sc_hiz_tile_fifo_size = 0x30; dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130; break; + case CHIP_RV740: + dev_priv->r600_max_pipes = 4; + dev_priv->r600_max_tile_pipes = 4; + dev_priv->r600_max_simds = 8; + dev_priv->r600_max_backends = 4; + dev_priv->r600_max_gprs = 256; + dev_priv->r600_max_threads = 248; + dev_priv->r600_max_stack_entries = 512; + dev_priv->r600_max_hw_contexts = 8; + dev_priv->r600_max_gs_threads = 16 * 2; + dev_priv->r600_sx_max_export_size = 256; + dev_priv->r600_sx_max_export_pos_size = 32; + dev_priv->r600_sx_max_export_smx_size = 224; + dev_priv->r600_sq_num_cf_insts = 2; + + dev_priv->r700_sx_num_of_sets = 7; + dev_priv->r700_sc_prim_fifo_size = 0x100; + dev_priv->r700_sc_hiz_tile_fifo_size = 0x30; + dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130; + + if (dev_priv->r600_sx_max_export_pos_size > 16) { + dev_priv->r600_sx_max_export_pos_size -= 16; + dev_priv->r600_sx_max_export_smx_size += 16; + } + break; default: break; } @@ -1493,6 +1523,7 @@ static void r700_gfx_init(struct drm_device *dev, break; case CHIP_RV730: case CHIP_RV710: + case CHIP_RV740: default: sq_ms_fifo_sizes |= R600_FETCH_FIFO_HIWATER(0x4); break; @@ -1569,6 +1600,7 @@ static void r700_gfx_init(struct drm_device *dev, switch (dev_priv->flags & RADEON_FAMILY_MASK) { case CHIP_RV770: case CHIP_RV730: + case CHIP_RV740: gs_prim_buffer_depth = 384; break; case CHIP_RV710: diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h index 8071d965f14..e266e5f8dc2 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.h +++ b/drivers/gpu/drm/radeon/radeon_drv.h @@ -146,6 +146,7 @@ enum radeon_family { CHIP_RV770, CHIP_RV730, CHIP_RV710, + CHIP_RV740, CHIP_LAST, }; diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h index 8b4c80cdd27..c7a1a8dc5ea 100644 --- a/include/drm/drm_pciids.h +++ b/include/drm/drm_pciids.h @@ -254,6 +254,11 @@ {0x1002, 0x940A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R600|RADEON_NEW_MEMMAP}, \ {0x1002, 0x940B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R600|RADEON_NEW_MEMMAP}, \ {0x1002, 0x940F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R600|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x94A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x94A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x94B1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x94B3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x94B5, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_NEW_MEMMAP}, \ {0x1002, 0x9440, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_NEW_MEMMAP}, \ {0x1002, 0x9441, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_NEW_MEMMAP}, \ {0x1002, 0x9442, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_NEW_MEMMAP}, \ -- cgit v1.2.3-70-g09d2 From 715cbb05c935e8a4306a730d14a72d5af881523e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jun 2009 15:55:44 +1000 Subject: drm/radeon: add support for RV790. This adds the PCI IDs for the rv790 which are equiv to the rv770. Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/r600_cp.c | 4 ++-- include/drm/drm_pciids.h | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/radeon/r600_cp.c b/drivers/gpu/drm/radeon/r600_cp.c index aa4eee4b7f3..146f3570af8 100644 --- a/drivers/gpu/drm/radeon/r600_cp.c +++ b/drivers/gpu/drm/radeon/r600_cp.c @@ -478,13 +478,13 @@ static void r700_cp_load_microcode(drm_radeon_private_t *dev_priv) if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV770)) { RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0); - DRM_INFO("Loading RV770 PFP Microcode\n"); + DRM_INFO("Loading RV770/RV790 PFP Microcode\n"); for (i = 0; i < R700_PFP_UCODE_SIZE; i++) RADEON_WRITE(R600_CP_PFP_UCODE_DATA, RV770_pfp_microcode[i]); RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0); RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0); - DRM_INFO("Loading RV770 CP Microcode\n"); + DRM_INFO("Loading RV770/RV790 CP Microcode\n"); for (i = 0; i < R700_PM4_UCODE_SIZE; i++) RADEON_WRITE(R600_CP_ME_RAM_DATA, RV770_cp_microcode[i]); RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0); diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h index c7a1a8dc5ea..f8634ab53b8 100644 --- a/include/drm/drm_pciids.h +++ b/include/drm/drm_pciids.h @@ -273,6 +273,8 @@ {0x1002, 0x9456, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_NEW_MEMMAP}, \ {0x1002, 0x945A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x945B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x9460, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x9462, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x946A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x946B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x947A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ -- cgit v1.2.3-70-g09d2 From 249d6048ca98b5452105b0824abac1275661b8e3 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Wed, 8 Apr 2009 17:11:16 +0200 Subject: drm: Split out the mm declarations in a separate header. Add atomic operations. this is a TTM preparation patch, it rearranges the mm and add operations needed to do mm operations in atomic context. Signed-off-by: Thomas Hellstrom Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_mm.c | 165 +++++++++++++++++++++++++++++++++++++++-------- include/drm/drmP.h | 37 +---------- include/drm/drm_mm.h | 90 ++++++++++++++++++++++++++ 3 files changed, 228 insertions(+), 64 deletions(-) create mode 100644 include/drm/drm_mm.h (limited to 'include') diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 367c590ffbb..7819fd930a5 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -42,8 +42,11 @@ */ #include "drmP.h" +#include "drm_mm.h" #include +#define MM_UNUSED_TARGET 4 + unsigned long drm_mm_tail_space(struct drm_mm *mm) { struct list_head *tail_node; @@ -74,16 +77,62 @@ int drm_mm_remove_space_from_tail(struct drm_mm *mm, unsigned long size) return 0; } +static struct drm_mm_node *drm_mm_kmalloc(struct drm_mm *mm, int atomic) +{ + struct drm_mm_node *child; + + if (atomic) + child = kmalloc(sizeof(*child), GFP_ATOMIC); + else + child = kmalloc(sizeof(*child), GFP_KERNEL); + + if (unlikely(child == NULL)) { + spin_lock(&mm->unused_lock); + if (list_empty(&mm->unused_nodes)) + child = NULL; + else { + child = + list_entry(mm->unused_nodes.next, + struct drm_mm_node, fl_entry); + list_del(&child->fl_entry); + --mm->num_unused; + } + spin_unlock(&mm->unused_lock); + } + return child; +} + +int drm_mm_pre_get(struct drm_mm *mm) +{ + struct drm_mm_node *node; + + spin_lock(&mm->unused_lock); + while (mm->num_unused < MM_UNUSED_TARGET) { + spin_unlock(&mm->unused_lock); + node = kmalloc(sizeof(*node), GFP_KERNEL); + spin_lock(&mm->unused_lock); + + if (unlikely(node == NULL)) { + int ret = (mm->num_unused < 2) ? -ENOMEM : 0; + spin_unlock(&mm->unused_lock); + return ret; + } + ++mm->num_unused; + list_add_tail(&node->fl_entry, &mm->unused_nodes); + } + spin_unlock(&mm->unused_lock); + return 0; +} +EXPORT_SYMBOL(drm_mm_pre_get); static int drm_mm_create_tail_node(struct drm_mm *mm, - unsigned long start, - unsigned long size) + unsigned long start, + unsigned long size, int atomic) { struct drm_mm_node *child; - child = (struct drm_mm_node *) - drm_alloc(sizeof(*child), DRM_MEM_MM); - if (!child) + child = drm_mm_kmalloc(mm, atomic); + if (unlikely(child == NULL)) return -ENOMEM; child->free = 1; @@ -97,8 +146,7 @@ static int drm_mm_create_tail_node(struct drm_mm *mm, return 0; } - -int drm_mm_add_space_to_tail(struct drm_mm *mm, unsigned long size) +int drm_mm_add_space_to_tail(struct drm_mm *mm, unsigned long size, int atomic) { struct list_head *tail_node; struct drm_mm_node *entry; @@ -106,20 +154,21 @@ int drm_mm_add_space_to_tail(struct drm_mm *mm, unsigned long size) tail_node = mm->ml_entry.prev; entry = list_entry(tail_node, struct drm_mm_node, ml_entry); if (!entry->free) { - return drm_mm_create_tail_node(mm, entry->start + entry->size, size); + return drm_mm_create_tail_node(mm, entry->start + entry->size, + size, atomic); } entry->size += size; return 0; } static struct drm_mm_node *drm_mm_split_at_start(struct drm_mm_node *parent, - unsigned long size) + unsigned long size, + int atomic) { struct drm_mm_node *child; - child = (struct drm_mm_node *) - drm_alloc(sizeof(*child), DRM_MEM_MM); - if (!child) + child = drm_mm_kmalloc(parent->mm, atomic); + if (unlikely(child == NULL)) return NULL; INIT_LIST_HEAD(&child->fl_entry); @@ -151,8 +200,9 @@ struct drm_mm_node *drm_mm_get_block(struct drm_mm_node * parent, tmp = parent->start % alignment; if (tmp) { - align_splitoff = drm_mm_split_at_start(parent, alignment - tmp); - if (!align_splitoff) + align_splitoff = + drm_mm_split_at_start(parent, alignment - tmp, 0); + if (unlikely(align_splitoff == NULL)) return NULL; } @@ -161,7 +211,7 @@ struct drm_mm_node *drm_mm_get_block(struct drm_mm_node * parent, parent->free = 0; return parent; } else { - child = drm_mm_split_at_start(parent, size); + child = drm_mm_split_at_start(parent, size, 0); } if (align_splitoff) @@ -169,14 +219,49 @@ struct drm_mm_node *drm_mm_get_block(struct drm_mm_node * parent, return child; } + EXPORT_SYMBOL(drm_mm_get_block); +struct drm_mm_node *drm_mm_get_block_atomic(struct drm_mm_node *parent, + unsigned long size, + unsigned alignment) +{ + + struct drm_mm_node *align_splitoff = NULL; + struct drm_mm_node *child; + unsigned tmp = 0; + + if (alignment) + tmp = parent->start % alignment; + + if (tmp) { + align_splitoff = + drm_mm_split_at_start(parent, alignment - tmp, 1); + if (unlikely(align_splitoff == NULL)) + return NULL; + } + + if (parent->size == size) { + list_del_init(&parent->fl_entry); + parent->free = 0; + return parent; + } else { + child = drm_mm_split_at_start(parent, size, 1); + } + + if (align_splitoff) + drm_mm_put_block(align_splitoff); + + return child; +} +EXPORT_SYMBOL(drm_mm_get_block_atomic); + /* * Put a block. Merge with the previous and / or next block if they are free. * Otherwise add to the free stack. */ -void drm_mm_put_block(struct drm_mm_node * cur) +void drm_mm_put_block(struct drm_mm_node *cur) { struct drm_mm *mm = cur->mm; @@ -188,21 +273,27 @@ void drm_mm_put_block(struct drm_mm_node * cur) int merged = 0; if (cur_head->prev != root_head) { - prev_node = list_entry(cur_head->prev, struct drm_mm_node, ml_entry); + prev_node = + list_entry(cur_head->prev, struct drm_mm_node, ml_entry); if (prev_node->free) { prev_node->size += cur->size; merged = 1; } } if (cur_head->next != root_head) { - next_node = list_entry(cur_head->next, struct drm_mm_node, ml_entry); + next_node = + list_entry(cur_head->next, struct drm_mm_node, ml_entry); if (next_node->free) { if (merged) { prev_node->size += next_node->size; list_del(&next_node->ml_entry); list_del(&next_node->fl_entry); - drm_free(next_node, sizeof(*next_node), - DRM_MEM_MM); + if (mm->num_unused < MM_UNUSED_TARGET) { + list_add(&next_node->fl_entry, + &mm->unused_nodes); + ++mm->num_unused; + } else + kfree(next_node); } else { next_node->size += cur->size; next_node->start = cur->start; @@ -215,14 +306,19 @@ void drm_mm_put_block(struct drm_mm_node * cur) list_add(&cur->fl_entry, &mm->fl_entry); } else { list_del(&cur->ml_entry); - drm_free(cur, sizeof(*cur), DRM_MEM_MM); + if (mm->num_unused < MM_UNUSED_TARGET) { + list_add(&cur->fl_entry, &mm->unused_nodes); + ++mm->num_unused; + } else + kfree(cur); } } + EXPORT_SYMBOL(drm_mm_put_block); -struct drm_mm_node *drm_mm_search_free(const struct drm_mm * mm, - unsigned long size, - unsigned alignment, int best_match) +struct drm_mm_node *drm_mm_search_free(const struct drm_mm *mm, + unsigned long size, + unsigned alignment, int best_match) { struct list_head *list; const struct list_head *free_stack = &mm->fl_entry; @@ -247,7 +343,6 @@ struct drm_mm_node *drm_mm_search_free(const struct drm_mm * mm, wasted += alignment - tmp; } - if (entry->size >= size + wasted) { if (!best_match) return entry; @@ -260,6 +355,7 @@ struct drm_mm_node *drm_mm_search_free(const struct drm_mm * mm, return best; } +EXPORT_SYMBOL(drm_mm_search_free); int drm_mm_clean(struct drm_mm * mm) { @@ -267,14 +363,17 @@ int drm_mm_clean(struct drm_mm * mm) return (head->next->next == head); } -EXPORT_SYMBOL(drm_mm_search_free); +EXPORT_SYMBOL(drm_mm_clean); int drm_mm_init(struct drm_mm * mm, unsigned long start, unsigned long size) { INIT_LIST_HEAD(&mm->ml_entry); INIT_LIST_HEAD(&mm->fl_entry); + INIT_LIST_HEAD(&mm->unused_nodes); + mm->num_unused = 0; + spin_lock_init(&mm->unused_lock); - return drm_mm_create_tail_node(mm, start, size); + return drm_mm_create_tail_node(mm, start, size, 0); } EXPORT_SYMBOL(drm_mm_init); @@ -282,6 +381,7 @@ void drm_mm_takedown(struct drm_mm * mm) { struct list_head *bnode = mm->fl_entry.next; struct drm_mm_node *entry; + struct drm_mm_node *next; entry = list_entry(bnode, struct drm_mm_node, fl_entry); @@ -293,7 +393,16 @@ void drm_mm_takedown(struct drm_mm * mm) list_del(&entry->fl_entry); list_del(&entry->ml_entry); + kfree(entry); + + spin_lock(&mm->unused_lock); + list_for_each_entry_safe(entry, next, &mm->unused_nodes, fl_entry) { + list_del(&entry->fl_entry); + kfree(entry); + --mm->num_unused; + } + spin_unlock(&mm->unused_lock); - drm_free(entry, sizeof(*entry), DRM_MEM_MM); + BUG_ON(mm->num_unused != 0); } EXPORT_SYMBOL(drm_mm_takedown); diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 1cc51a0812f..d4ddc22e46b 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -86,6 +86,7 @@ struct drm_device; #include "drm_os_linux.h" #include "drm_hashtab.h" +#include "drm_mm.h" #define DRM_UT_CORE 0x01 #define DRM_UT_DRIVER 0x02 @@ -553,26 +554,6 @@ struct drm_sigdata { }; -/* - * Generic memory manager structs - */ - -struct drm_mm_node { - struct list_head fl_entry; - struct list_head ml_entry; - int free; - unsigned long start; - unsigned long size; - struct drm_mm *mm; - void *private; -}; - -struct drm_mm { - struct list_head fl_entry; - struct list_head ml_entry; -}; - - /** * Kernel side of a mapping */ @@ -1436,22 +1417,6 @@ extern char *drm_get_connector_status_name(enum drm_connector_status status); extern int drm_sysfs_connector_add(struct drm_connector *connector); extern void drm_sysfs_connector_remove(struct drm_connector *connector); -/* - * Basic memory manager support (drm_mm.c) - */ -extern struct drm_mm_node *drm_mm_get_block(struct drm_mm_node * parent, - unsigned long size, - unsigned alignment); -extern void drm_mm_put_block(struct drm_mm_node * cur); -extern struct drm_mm_node *drm_mm_search_free(const struct drm_mm *mm, unsigned long size, - unsigned alignment, int best_match); -extern int drm_mm_init(struct drm_mm *mm, unsigned long start, unsigned long size); -extern void drm_mm_takedown(struct drm_mm *mm); -extern int drm_mm_clean(struct drm_mm *mm); -extern unsigned long drm_mm_tail_space(struct drm_mm *mm); -extern int drm_mm_remove_space_from_tail(struct drm_mm *mm, unsigned long size); -extern int drm_mm_add_space_to_tail(struct drm_mm *mm, unsigned long size); - /* Graphics Execution Manager library functions (drm_gem.c) */ int drm_gem_init(struct drm_device *dev); void drm_gem_destroy(struct drm_device *dev); diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h new file mode 100644 index 00000000000..5662f4278ef --- /dev/null +++ b/include/drm/drm_mm.h @@ -0,0 +1,90 @@ +/************************************************************************** + * + * Copyright 2006-2008 Tungsten Graphics, Inc., Cedar Park, TX. USA. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * + **************************************************************************/ +/* + * Authors: + * Thomas Hellstrom + */ + +#ifndef _DRM_MM_H_ +#define _DRM_MM_H_ + +/* + * Generic range manager structs + */ +#include + +struct drm_mm_node { + struct list_head fl_entry; + struct list_head ml_entry; + int free; + unsigned long start; + unsigned long size; + struct drm_mm *mm; + void *private; +}; + +struct drm_mm { + struct list_head fl_entry; + struct list_head ml_entry; + struct list_head unused_nodes; + int num_unused; + spinlock_t unused_lock; +}; + +/* + * Basic range manager support (drm_mm.c) + */ + +extern struct drm_mm_node *drm_mm_get_block(struct drm_mm_node *parent, + unsigned long size, + unsigned alignment); +extern struct drm_mm_node *drm_mm_get_block_atomic(struct drm_mm_node *parent, + unsigned long size, + unsigned alignment); +extern void drm_mm_put_block(struct drm_mm_node *cur); +extern struct drm_mm_node *drm_mm_search_free(const struct drm_mm *mm, + unsigned long size, + unsigned alignment, + int best_match); +extern int drm_mm_init(struct drm_mm *mm, unsigned long start, + unsigned long size); +extern void drm_mm_takedown(struct drm_mm *mm); +extern int drm_mm_clean(struct drm_mm *mm); +extern unsigned long drm_mm_tail_space(struct drm_mm *mm); +extern int drm_mm_remove_space_from_tail(struct drm_mm *mm, + unsigned long size); +extern int drm_mm_add_space_to_tail(struct drm_mm *mm, + unsigned long size, int atomic); +extern int drm_mm_pre_get(struct drm_mm *mm); + +static inline struct drm_mm *drm_get_mm(struct drm_mm_node *block) +{ + return block->mm; +} + +#endif -- cgit v1.2.3-70-g09d2 From 3c24475c1e4e8d10e50df161d8c4f1d382997a7c Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Wed, 8 Apr 2009 18:34:28 +0200 Subject: drm: include kernel list header file in hashtab header Signed-off-by: Dave Airlie --- include/drm/drm_hashtab.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/drm/drm_hashtab.h b/include/drm/drm_hashtab.h index cd2b189e1be..0af087a4d3b 100644 --- a/include/drm/drm_hashtab.h +++ b/include/drm/drm_hashtab.h @@ -35,6 +35,8 @@ #ifndef DRM_HASHTAB_H #define DRM_HASHTAB_H +#include + #define drm_hash_entry(_ptr, _type, _member) container_of(_ptr, _type, _member) struct drm_hash_item { -- cgit v1.2.3-70-g09d2 From ca371c0d7e23d0d0afae65fc83a0e91cf7399573 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Fri, 12 Jun 2009 10:33:53 +0300 Subject: memcg: fix page_cgroup fatal error in FLATMEM Now, SLAB is configured in very early stage and it can be used in init routine now. But replacing alloc_bootmem() in FLAT/DISCONTIGMEM's page_cgroup() initialization breaks the allocation, now. (Works well in SPARSEMEM case...it supports MEMORY_HOTPLUG and size of page_cgroup is in reasonable size (< 1 << MAX_ORDER.) This patch revive FLATMEM+memory cgroup by using alloc_bootmem. In future, We stop to support FLATMEM (if no users) or rewrite codes for flatmem completely.But this will adds more messy codes and overheads. Reported-by: Li Zefan Tested-by: Li Zefan Tested-by: Ingo Molnar Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Pekka Enberg --- include/linux/page_cgroup.h | 18 +++++++++++++++++- init/main.c | 5 +++++ mm/page_cgroup.c | 29 ++++++++++------------------- 3 files changed, 32 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index 7339c7bf733..13f126c89ae 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -18,7 +18,19 @@ struct page_cgroup { }; void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat); -void __init page_cgroup_init(void); + +#ifdef CONFIG_SPARSEMEM +static inline void __init page_cgroup_init_flatmem(void) +{ +} +extern void __init page_cgroup_init(void); +#else +void __init page_cgroup_init_flatmem(void); +static inline void __init page_cgroup_init(void) +{ +} +#endif + struct page_cgroup *lookup_page_cgroup(struct page *page); enum { @@ -87,6 +99,10 @@ static inline void page_cgroup_init(void) { } +static inline void __init page_cgroup_init_flatmem(void) +{ +} + #endif #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP diff --git a/init/main.c b/init/main.c index 5616661eac0..b3e8f14c568 100644 --- a/init/main.c +++ b/init/main.c @@ -539,6 +539,11 @@ void __init __weak thread_info_cache_init(void) */ static void __init mm_init(void) { + /* + * page_cgroup requires countinous pages as memmap + * and it's bigger than MAX_ORDER unless SPARSEMEM. + */ + page_cgroup_init_flatmem(); mem_init(); kmem_cache_init(); vmalloc_init(); diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index 3dd4a909a1d..11a8a10a390 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -47,8 +47,6 @@ static int __init alloc_node_page_cgroup(int nid) struct page_cgroup *base, *pc; unsigned long table_size; unsigned long start_pfn, nr_pages, index; - struct page *page; - unsigned int order; start_pfn = NODE_DATA(nid)->node_start_pfn; nr_pages = NODE_DATA(nid)->node_spanned_pages; @@ -57,13 +55,11 @@ static int __init alloc_node_page_cgroup(int nid) return 0; table_size = sizeof(struct page_cgroup) * nr_pages; - order = get_order(table_size); - page = alloc_pages_node(nid, GFP_NOWAIT | __GFP_ZERO, order); - if (!page) - page = alloc_pages_node(-1, GFP_NOWAIT | __GFP_ZERO, order); - if (!page) + + base = __alloc_bootmem_node_nopanic(NODE_DATA(nid), + table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); + if (!base) return -ENOMEM; - base = page_address(page); for (index = 0; index < nr_pages; index++) { pc = base + index; __init_page_cgroup(pc, start_pfn + index); @@ -73,7 +69,7 @@ static int __init alloc_node_page_cgroup(int nid) return 0; } -void __init page_cgroup_init(void) +void __init page_cgroup_init_flatmem(void) { int nid, fail; @@ -117,16 +113,11 @@ static int __init_refok init_section_page_cgroup(unsigned long pfn) if (!section->page_cgroup) { nid = page_to_nid(pfn_to_page(pfn)); table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; - if (slab_is_available()) { - base = kmalloc_node(table_size, - GFP_KERNEL | __GFP_NOWARN, nid); - if (!base) - base = vmalloc_node(table_size, nid); - } else { - base = __alloc_bootmem_node_nopanic(NODE_DATA(nid), - table_size, - PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); - } + VM_BUG_ON(!slab_is_available()); + base = kmalloc_node(table_size, + GFP_KERNEL | __GFP_NOWARN, nid); + if (!base) + base = vmalloc_node(table_size, nid); } else { /* * We don't have to allocate page_cgroup again, but -- cgit v1.2.3-70-g09d2 From 9a71af2c3627b379b7c31917a7f6ee0d29bc559b Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 12 Jun 2009 21:46:53 -0600 Subject: module_param: invbool should take a 'bool', not an 'int' It takes an 'int' for historical reasons, and there are only two users: simply switch it over to bool. The other user (uvesafb.c) will get a (harmless-on-x86) warning until the next patch is applied. Cc: Brad Douglas Cc: Michal Januszewski Signed-off-by: Rusty Russell --- drivers/video/aty/aty128fb.c | 2 +- include/linux/moduleparam.h | 2 +- kernel/params.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/video/aty/aty128fb.c b/drivers/video/aty/aty128fb.c index 35e8eb02b9e..e4e4d433b00 100644 --- a/drivers/video/aty/aty128fb.c +++ b/drivers/video/aty/aty128fb.c @@ -354,7 +354,7 @@ static int default_crt_on __devinitdata = 0; static int default_lcd_on __devinitdata = 1; #ifdef CONFIG_MTRR -static int mtrr = 1; +static bool mtrr = true; #endif #ifdef CONFIG_PMAC_BACKLIGHT diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index a4f0b931846..9bbca8e8c19 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -192,7 +192,7 @@ extern int param_get_bool(char *buffer, struct kernel_param *kp); extern int param_set_invbool(const char *val, struct kernel_param *kp); extern int param_get_invbool(char *buffer, struct kernel_param *kp); -#define param_check_invbool(name, p) __param_check(name, p, int) +#define param_check_invbool(name, p) __param_check(name, p, bool) /* Comma-separated array: *nump is set to number they actually specified. */ #define module_param_array_named(name, array, type, nump, perm) \ diff --git a/kernel/params.c b/kernel/params.c index de273ec85bd..023abbf5f89 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -272,13 +272,13 @@ int param_set_invbool(const char *val, struct kernel_param *kp) dummy.arg = &boolval; ret = param_set_bool(val, &dummy); if (ret == 0) - *(int *)kp->arg = !boolval; + *(bool *)kp->arg = !boolval; return ret; } int param_get_invbool(char *buffer, struct kernel_param *kp) { - return sprintf(buffer, "%c", (*(int *)kp->arg) ? 'N' : 'Y'); + return sprintf(buffer, "%c", (*(bool *)kp->arg) ? 'N' : 'Y'); } /* We break the rule and mangle the string. */ -- cgit v1.2.3-70-g09d2 From 45fcc70c0b6ee0c508e1fdb5fef735c3546803f4 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 12 Jun 2009 21:46:56 -0600 Subject: module_param: split perm field into flags and perm Impact: cleanup Rather than hack KPARAM_KMALLOCED into the perm field, separate it out. Since the perm field was 32 bits and only needs 16, we don't add bloat. Signed-off-by: Rusty Russell --- include/linux/moduleparam.h | 8 ++++++-- kernel/params.c | 9 +++------ 2 files changed, 9 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 9bbca8e8c19..009a5f76876 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -36,9 +36,13 @@ typedef int (*param_set_fn)(const char *val, struct kernel_param *kp); /* Returns length written or -errno. Buffer is 4k (ie. be short!) */ typedef int (*param_get_fn)(char *buffer, struct kernel_param *kp); +/* Flag bits for kernel_param.flags */ +#define KPARAM_KMALLOCED 1 + struct kernel_param { const char *name; - unsigned int perm; + u16 perm; + u16 flags; param_set_fn set; param_get_fn get; union { @@ -88,7 +92,7 @@ struct kparam_array static struct kernel_param __moduleparam_const __param_##name \ __used \ __attribute__ ((unused,__section__ ("__param"),aligned(sizeof(void *)))) \ - = { __param_str_##name, perm, set, get, { arg } } + = { __param_str_##name, perm, 0, set, get, { arg } } #define module_param_call(name, set, get, arg, perm) \ __module_param_call(MODULE_PARAM_PREFIX, name, set, get, arg, perm) diff --git a/kernel/params.c b/kernel/params.c index 023abbf5f89..b4660dc13db 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -24,9 +24,6 @@ #include #include -/* We abuse the high bits of "perm" to record whether we kmalloc'ed. */ -#define KPARAM_KMALLOCED 0x80000000 - #if 0 #define DEBUGP printk #else @@ -220,13 +217,13 @@ int param_set_charp(const char *val, struct kernel_param *kp) return -ENOSPC; } - if (kp->perm & KPARAM_KMALLOCED) + if (kp->flags & KPARAM_KMALLOCED) kfree(*(char **)kp->arg); /* This is a hack. We can't need to strdup in early boot, and we * don't need to; this mangled commandline is preserved. */ if (slab_is_available()) { - kp->perm |= KPARAM_KMALLOCED; + kp->flags |= KPARAM_KMALLOCED; *(char **)kp->arg = kstrdup(val, GFP_KERNEL); if (!kp->arg) return -ENOMEM; @@ -591,7 +588,7 @@ void destroy_params(const struct kernel_param *params, unsigned num) unsigned int i; for (i = 0; i < num; i++) - if (params[i].perm & KPARAM_KMALLOCED) + if (params[i].flags & KPARAM_KMALLOCED) kfree(*(char **)params[i].arg); } -- cgit v1.2.3-70-g09d2 From d2c123c27db841c6c11a63de9c144823d2b1ba76 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 12 Jun 2009 21:46:56 -0600 Subject: module_param: add __same_type convenience wrapper for __builtin_types_compatible_p Impact: new API __builtin_types_compatible_p() is a little awkward to use: it takes two types rather than types or variables, and it's just damn long. (typeof(type) == type, so this works on types as well as vars). Signed-off-by: Rusty Russell --- include/linux/compiler.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 37bcb50a4d7..04fb5135b4e 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -261,6 +261,11 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); # define __section(S) __attribute__ ((__section__(#S))) #endif +/* Are two types/vars the same type (ignoring qualifiers)? */ +#ifndef __same_type +# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) +#endif + /* * Prevent the compiler from merging or refetching accesses. The compiler * is also forbidden from reordering successive instances of ACCESS_ONCE(), -- cgit v1.2.3-70-g09d2 From fddd520122953550ec2c8b60e7ca0d0f0d115d97 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 12 Jun 2009 21:46:57 -0600 Subject: module_param: allow 'bool' module_params to be bool, not just int. Impact: API cleanup For historical reasons, 'bool' parameters must be an int, not a bool. But there are around 600 users, so a conversion seems like useless churn. So we use __same_type() to distinguish, and handle both cases. Signed-off-by: Rusty Russell --- include/linux/moduleparam.h | 32 +++++++++++++++++++++++--------- kernel/params.c | 33 ++++++++++++++++++++++++++------- 2 files changed, 49 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 009a5f76876..6547c3cdbc4 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -38,6 +38,7 @@ typedef int (*param_get_fn)(char *buffer, struct kernel_param *kp); /* Flag bits for kernel_param.flags */ #define KPARAM_KMALLOCED 1 +#define KPARAM_ISBOOL 2 struct kernel_param { const char *name; @@ -83,7 +84,7 @@ struct kparam_array parameters. perm sets the visibility in sysfs: 000 means it's not there, read bits mean it's readable, write bits mean it's writable. */ -#define __module_param_call(prefix, name, set, get, arg, perm) \ +#define __module_param_call(prefix, name, set, get, arg, isbool, perm) \ /* Default value instead of permissions? */ \ static int __param_perm_check_##name __attribute__((unused)) = \ BUILD_BUG_ON_ZERO((perm) < 0 || (perm) > 0777 || ((perm) & 2)) \ @@ -92,10 +93,13 @@ struct kparam_array static struct kernel_param __moduleparam_const __param_##name \ __used \ __attribute__ ((unused,__section__ ("__param"),aligned(sizeof(void *)))) \ - = { __param_str_##name, perm, 0, set, get, { arg } } + = { __param_str_##name, perm, isbool ? KPARAM_ISBOOL : 0, \ + set, get, { arg } } #define module_param_call(name, set, get, arg, perm) \ - __module_param_call(MODULE_PARAM_PREFIX, name, set, get, arg, perm) + __module_param_call(MODULE_PARAM_PREFIX, \ + name, set, get, arg, \ + __same_type(*(arg), bool), perm) /* Helper functions: type is byte, short, ushort, int, uint, long, ulong, charp, bool or invbool, or XXX if you define param_get_XXX, @@ -124,15 +128,16 @@ struct kparam_array #define core_param(name, var, type, perm) \ param_check_##type(name, &(var)); \ __module_param_call("", name, param_set_##type, param_get_##type, \ - &var, perm) + &var, __same_type(var, bool), perm) #endif /* !MODULE */ /* Actually copy string: maxlen param is usually sizeof(string). */ #define module_param_string(name, string, len, perm) \ static const struct kparam_string __param_string_##name \ = { len, string }; \ - module_param_call(name, param_set_copystring, param_get_string, \ - .str = &__param_string_##name, perm); \ + __module_param_call(MODULE_PARAM_PREFIX, name, \ + param_set_copystring, param_get_string, \ + .str = &__param_string_##name, 0, perm); \ __MODULE_PARM_TYPE(name, "string") /* Called on module insert or kernel boot */ @@ -190,9 +195,16 @@ extern int param_set_charp(const char *val, struct kernel_param *kp); extern int param_get_charp(char *buffer, struct kernel_param *kp); #define param_check_charp(name, p) __param_check(name, p, char *) +/* For historical reasons "bool" parameters can be (unsigned) "int". */ extern int param_set_bool(const char *val, struct kernel_param *kp); extern int param_get_bool(char *buffer, struct kernel_param *kp); -#define param_check_bool(name, p) __param_check(name, p, int) +#define param_check_bool(name, p) \ + static inline void __check_##name(void) \ + { \ + BUILD_BUG_ON(!__same_type(*(p), bool) && \ + !__same_type(*(p), unsigned int) && \ + !__same_type(*(p), int)); \ + } extern int param_set_invbool(const char *val, struct kernel_param *kp); extern int param_get_invbool(char *buffer, struct kernel_param *kp); @@ -203,8 +215,10 @@ extern int param_get_invbool(char *buffer, struct kernel_param *kp); static const struct kparam_array __param_arr_##name \ = { ARRAY_SIZE(array), nump, param_set_##type, param_get_##type,\ sizeof(array[0]), array }; \ - module_param_call(name, param_array_set, param_array_get, \ - .arr = &__param_arr_##name, perm); \ + __module_param_call(MODULE_PARAM_PREFIX, name, \ + param_array_set, param_array_get, \ + .arr = &__param_arr_##name, \ + __same_type(array[0], bool), perm); \ __MODULE_PARM_TYPE(name, "array of " #type) #define module_param_array(name, type, nump, perm) \ diff --git a/kernel/params.c b/kernel/params.c index b4660dc13db..7f6912ced2b 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -238,35 +238,54 @@ int param_get_charp(char *buffer, struct kernel_param *kp) return sprintf(buffer, "%s", *((char **)kp->arg)); } +/* Actually could be a bool or an int, for historical reasons. */ int param_set_bool(const char *val, struct kernel_param *kp) { + bool v; + /* No equals means "set"... */ if (!val) val = "1"; /* One of =[yYnN01] */ switch (val[0]) { case 'y': case 'Y': case '1': - *(int *)kp->arg = 1; - return 0; + v = true; + break; case 'n': case 'N': case '0': - *(int *)kp->arg = 0; - return 0; + v = false; + break; + default: + return -EINVAL; } - return -EINVAL; + + if (kp->flags & KPARAM_ISBOOL) + *(bool *)kp->arg = v; + else + *(int *)kp->arg = v; + return 0; } int param_get_bool(char *buffer, struct kernel_param *kp) { + bool val; + if (kp->flags & KPARAM_ISBOOL) + val = *(bool *)kp->arg; + else + val = *(int *)kp->arg; + /* Y and N chosen as being relatively non-coder friendly */ - return sprintf(buffer, "%c", (*(int *)kp->arg) ? 'Y' : 'N'); + return sprintf(buffer, "%c", val ? 'Y' : 'N'); } +/* This one must be bool. */ int param_set_invbool(const char *val, struct kernel_param *kp) { - int boolval, ret; + int ret; + bool boolval; struct kernel_param dummy; dummy.arg = &boolval; + dummy.flags = KPARAM_ISBOOL; ret = param_set_bool(val, &dummy); if (ret == 0) *(bool *)kp->arg = !boolval; -- cgit v1.2.3-70-g09d2 From ad6561dffa17f17bb68d7207d422c26c381c4313 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 12 Jun 2009 21:47:03 -0600 Subject: module: trim exception table on init free. It's theoretically possible that there are exception table entries which point into the (freed) init text of modules. These could cause future problems if other modules get loaded into that memory and cause an exception as we'd see the wrong fixup. The only case I know of is kvm-intel.ko (when CONFIG_CC_OPTIMIZE_FOR_SIZE=n). Amerigo fixed this long-standing FIXME in the x86 version, but this patch is more general. This implements trim_init_extable(); most archs are simple since they use the standard lib/extable.c sort code. Alpha and IA64 use relative addresses in their fixups, so thier trimming is a slight variation. Sparc32 is unique; it doesn't seem to define ARCH_HAS_SORT_EXTABLE, yet it defines its own sort_extable() which overrides the one in lib. It doesn't sort, so we have to mark deleted entries instead of actually trimming them. Inspired-by: Amerigo Wang Signed-off-by: Rusty Russell Cc: linux-alpha@vger.kernel.org Cc: sparclinux@vger.kernel.org Cc: linux-ia64@vger.kernel.org --- arch/alpha/mm/extable.c | 21 +++++++++++++++++++++ arch/ia64/mm/extable.c | 26 ++++++++++++++++++++++++++ arch/sparc/include/asm/uaccess_32.h | 3 +++ arch/sparc/mm/extable.c | 29 +++++++++++++++++++++++++++++ include/linux/module.h | 1 + kernel/module.c | 1 + lib/extable.c | 21 ++++++++++++++++++++- 7 files changed, 101 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/alpha/mm/extable.c b/arch/alpha/mm/extable.c index 62dc379d301..813c9b63c0e 100644 --- a/arch/alpha/mm/extable.c +++ b/arch/alpha/mm/extable.c @@ -48,6 +48,27 @@ void sort_extable(struct exception_table_entry *start, cmp_ex, swap_ex); } +#ifdef CONFIG_MODULES +/* + * Any entry referring to the module init will be at the beginning or + * the end. + */ +void trim_init_extable(struct module *m) +{ + /*trim the beginning*/ + while (m->num_exentries && + within_module_init(ex_to_addr(&m->extable[0]), m)) { + m->extable++; + m->num_exentries--; + } + /*trim the end*/ + while (m->num_exentries && + within_module_init(ex_to_addr(&m->extable[m->num_exentries-1]), + m)) + m->num_exentries--; +} +#endif /* CONFIG_MODULES */ + const struct exception_table_entry * search_extable(const struct exception_table_entry *first, const struct exception_table_entry *last, diff --git a/arch/ia64/mm/extable.c b/arch/ia64/mm/extable.c index 71c50dd8f87..e95d5ad9285 100644 --- a/arch/ia64/mm/extable.c +++ b/arch/ia64/mm/extable.c @@ -53,6 +53,32 @@ void sort_extable (struct exception_table_entry *start, cmp_ex, swap_ex); } +static inline unsigned long ex_to_addr(const struct exception_table_entry *x) +{ + return (unsigned long)&x->insn + x->insn; +} + +#ifdef CONFIG_MODULES +/* + * Any entry referring to the module init will be at the beginning or + * the end. + */ +void trim_init_extable(struct module *m) +{ + /*trim the beginning*/ + while (m->num_exentries && + within_module_init(ex_to_addr(&m->extable[0]), m)) { + m->extable++; + m->num_exentries--; + } + /*trim the end*/ + while (m->num_exentries && + within_module_init(ex_to_addr(&m->extable[m->num_exentries-1]), + m)) + m->num_exentries--; +} +#endif /* CONFIG_MODULES */ + const struct exception_table_entry * search_extable (const struct exception_table_entry *first, const struct exception_table_entry *last, diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h index 47d5619d43f..8303ac48103 100644 --- a/arch/sparc/include/asm/uaccess_32.h +++ b/arch/sparc/include/asm/uaccess_32.h @@ -17,6 +17,9 @@ #ifndef __ASSEMBLY__ +#define ARCH_HAS_SORT_EXTABLE +#define ARCH_HAS_SEARCH_EXTABLE + /* Sparc is not segmented, however we need to be able to fool access_ok() * when doing system calls from kernel mode legitimately. * diff --git a/arch/sparc/mm/extable.c b/arch/sparc/mm/extable.c index 16cc28935e3..a61c349448e 100644 --- a/arch/sparc/mm/extable.c +++ b/arch/sparc/mm/extable.c @@ -28,6 +28,10 @@ search_extable(const struct exception_table_entry *start, * word 3: last insn address + 4 bytes * word 4: fixup code address * + * Deleted entries are encoded as: + * word 1: unused + * word 2: -1 + * * See asm/uaccess.h for more details. */ @@ -39,6 +43,10 @@ search_extable(const struct exception_table_entry *start, continue; } + /* A deleted entry; see trim_init_extable */ + if (walk->fixup == -1) + continue; + if (walk->insn == value) return walk; } @@ -57,6 +65,27 @@ search_extable(const struct exception_table_entry *start, return NULL; } +#ifdef CONFIG_MODULES +/* We could memmove them around; easier to mark the trimmed ones. */ +void trim_init_extable(struct module *m) +{ + unsigned int i; + bool range; + + for (i = 0; i < m->num_exentries; i += range ? 2 : 1) { + range = m->extable[i].fixup == 0; + + if (within_module_init(m->extable[i].insn, m)) { + m->extable[i].fixup = -1; + if (range) + m->extable[i+1].fixup = -1; + } + if (range) + i++; + } +} +#endif /* CONFIG_MODULES */ + /* Special extable search, which handles ranges. Returns fixup */ unsigned long search_extables_range(unsigned long addr, unsigned long *g2) { diff --git a/include/linux/module.h b/include/linux/module.h index a8f2c0aa4c3..a7bc6e7b43a 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -77,6 +77,7 @@ search_extable(const struct exception_table_entry *first, void sort_extable(struct exception_table_entry *start, struct exception_table_entry *finish); void sort_main_extable(void); +void trim_init_extable(struct module *m); #ifdef MODULE #define MODULE_GENERIC_TABLE(gtype,name) \ diff --git a/kernel/module.c b/kernel/module.c index 35f7de00bf0..e4ab36ce767 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2455,6 +2455,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, mutex_lock(&module_mutex); /* Drop initial reference. */ module_put(mod); + trim_init_extable(mod); module_free(mod, mod->module_init); mod->module_init = NULL; mod->init_size = 0; diff --git a/lib/extable.c b/lib/extable.c index 179c0874559..4cac81ec225 100644 --- a/lib/extable.c +++ b/lib/extable.c @@ -39,7 +39,26 @@ void sort_extable(struct exception_table_entry *start, sort(start, finish - start, sizeof(struct exception_table_entry), cmp_ex, NULL); } -#endif + +#ifdef CONFIG_MODULES +/* + * If the exception table is sorted, any referring to the module init + * will be at the beginning or the end. + */ +void trim_init_extable(struct module *m) +{ + /*trim the beginning*/ + while (m->num_exentries && within_module_init(m->extable[0].insn, m)) { + m->extable++; + m->num_exentries--; + } + /*trim the end*/ + while (m->num_exentries && + within_module_init(m->extable[m->num_exentries-1].insn, m)) + m->num_exentries--; +} +#endif /* CONFIG_MODULES */ +#endif /* !ARCH_HAS_SORT_EXTABLE */ #ifndef ARCH_HAS_SEARCH_EXTABLE /* -- cgit v1.2.3-70-g09d2 From f1a3c979059b2033d0b1cc4f9ee5c90bf92b5f94 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Jun 2009 17:56:09 +0200 Subject: perf_counter: PERF_TYPE_HW_CACHE is a hardware counter too is_software_counter() was missing the new HW_CACHE category. ( This could have caused some counter scheduling artifacts with mixed sw and hw counters and counter groups. ) Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 6e133954e2e..7c4f32f6ae1 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -621,7 +621,8 @@ extern int perf_counter_overflow(struct perf_counter *counter, int nmi, static inline int is_software_counter(struct perf_counter *counter) { return (counter->attr.type != PERF_TYPE_RAW) && - (counter->attr.type != PERF_TYPE_HARDWARE); + (counter->attr.type != PERF_TYPE_HARDWARE) && + (counter->attr.type != PERF_TYPE_HW_CACHE); } extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64); -- cgit v1.2.3-70-g09d2 From 974802eaa1afdc87e00821df7020a2b3c6fee623 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 12 Jun 2009 12:46:55 +0200 Subject: perf_counter: Add forward/backward attribute ABI compatibility Provide for means of extending the perf_counter_attr in a 'natural' way. We allow growing the structure by appending fields at the end by specifying the full structure size inside it. When a new kernel sees a smaller (old) structure, it will 0 pad the tail. When an old kernel sees a larger (new) structure, it will verify the tail consists of 0s, otherwise fail. If we fail due to a size-mismatch, we return -E2BIG and write the kernel's native attribe size back into the provided structure. Furthermore, add some attribute verification, so that we'll fail counter creation when unknown bits are present (PERF_SAMPLE, PERF_FORMAT, or in the __reserved fields). (This ABI detail is introduced while keeping the existing syscall ABI.) Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 19 ++++++++-- include/linux/syscalls.h | 2 +- kernel/perf_counter.c | 89 ++++++++++++++++++++++++++++++++++++++++++-- tools/perf/perf.h | 5 ++- 4 files changed, 105 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 7c4f32f6ae1..1b3118a1023 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -120,6 +120,8 @@ enum perf_counter_sample_format { PERF_SAMPLE_ID = 1U << 6, PERF_SAMPLE_CPU = 1U << 7, PERF_SAMPLE_PERIOD = 1U << 8, + + PERF_SAMPLE_MAX = 1U << 9, /* non-ABI */ }; /* @@ -131,17 +133,26 @@ enum perf_counter_read_format { PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, PERF_FORMAT_ID = 1U << 2, + + PERF_FORMAT_MAX = 1U << 3, /* non-ABI */ }; +#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ + /* * Hardware event to monitor via a performance monitoring counter: */ struct perf_counter_attr { + /* * Major type: hardware/software/tracepoint/etc. */ __u32 type; - __u32 __reserved_1; + + /* + * Size of the attr structure, for fwd/bwd compat. + */ + __u32 size; /* * Type specific configuration information. @@ -168,12 +179,12 @@ struct perf_counter_attr { comm : 1, /* include comm data */ freq : 1, /* use freq, not period */ - __reserved_2 : 53; + __reserved_1 : 53; __u32 wakeup_events; /* wakeup every n events */ - __u32 __reserved_3; + __u32 __reserved_2; - __u64 __reserved_4; + __u64 __reserved_3; }; /* diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index c6c84ad8bd7..418d90f5eff 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -758,6 +758,6 @@ int kernel_execve(const char *filename, char *const argv[], char *const envp[]); asmlinkage long sys_perf_counter_open( - const struct perf_counter_attr __user *attr_uptr, + struct perf_counter_attr __user *attr_uptr, pid_t pid, int cpu, int group_fd, unsigned long flags); #endif diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 663bbe01505..29b685f551a 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -3584,6 +3584,9 @@ perf_counter_alloc(struct perf_counter_attr *attr, case PERF_TYPE_TRACEPOINT: pmu = tp_perf_counter_init(counter); break; + + default: + break; } done: err = 0; @@ -3610,6 +3613,85 @@ done: return counter; } +static int perf_copy_attr(struct perf_counter_attr __user *uattr, + struct perf_counter_attr *attr) +{ + int ret; + u32 size; + + if (!access_ok(VERIFY_WRITE, uattr, PERF_ATTR_SIZE_VER0)) + return -EFAULT; + + /* + * zero the full structure, so that a short copy will be nice. + */ + memset(attr, 0, sizeof(*attr)); + + ret = get_user(size, &uattr->size); + if (ret) + return ret; + + if (size > PAGE_SIZE) /* silly large */ + goto err_size; + + if (!size) /* abi compat */ + size = PERF_ATTR_SIZE_VER0; + + if (size < PERF_ATTR_SIZE_VER0) + goto err_size; + + /* + * If we're handed a bigger struct than we know of, + * ensure all the unknown bits are 0. + */ + if (size > sizeof(*attr)) { + unsigned long val; + unsigned long __user *addr; + unsigned long __user *end; + + addr = PTR_ALIGN((void __user *)uattr + sizeof(*attr), + sizeof(unsigned long)); + end = PTR_ALIGN((void __user *)uattr + size, + sizeof(unsigned long)); + + for (; addr < end; addr += sizeof(unsigned long)) { + ret = get_user(val, addr); + if (ret) + return ret; + if (val) + goto err_size; + } + } + + ret = copy_from_user(attr, uattr, size); + if (ret) + return -EFAULT; + + /* + * If the type exists, the corresponding creation will verify + * the attr->config. + */ + if (attr->type >= PERF_TYPE_MAX) + return -EINVAL; + + if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3) + return -EINVAL; + + if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) + return -EINVAL; + + if (attr->read_format & ~(PERF_FORMAT_MAX-1)) + return -EINVAL; + +out: + return ret; + +err_size: + put_user(sizeof(*attr), &uattr->size); + ret = -E2BIG; + goto out; +} + /** * sys_perf_counter_open - open a performance counter, associate it to a task/cpu * @@ -3619,7 +3701,7 @@ done: * @group_fd: group leader counter fd */ SYSCALL_DEFINE5(perf_counter_open, - const struct perf_counter_attr __user *, attr_uptr, + struct perf_counter_attr __user *, attr_uptr, pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) { struct perf_counter *counter, *group_leader; @@ -3635,8 +3717,9 @@ SYSCALL_DEFINE5(perf_counter_open, if (flags) return -EINVAL; - if (copy_from_user(&attr, attr_uptr, sizeof(attr)) != 0) - return -EFAULT; + ret = perf_copy_attr(attr_uptr, &attr); + if (ret) + return ret; if (!attr.exclude_kernel) { if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) diff --git a/tools/perf/perf.h b/tools/perf/perf.h index af0a5046d74..87a1aca4a42 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -53,11 +53,12 @@ static inline unsigned long long rdclock(void) _min1 < _min2 ? _min1 : _min2; }) static inline int -sys_perf_counter_open(struct perf_counter_attr *attr_uptr, +sys_perf_counter_open(struct perf_counter_attr *attr, pid_t pid, int cpu, int group_fd, unsigned long flags) { - return syscall(__NR_perf_counter_open, attr_uptr, pid, cpu, + attr->size = sizeof(*attr); + return syscall(__NR_perf_counter_open, attr, pid, cpu, group_fd, flags); } -- cgit v1.2.3-70-g09d2 From 20f77f5654042cf484d8964b618faf9d620f639b Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 12 Jun 2009 22:16:33 -0600 Subject: virtio: fix obsolete documentation on probe function Signed-off-by: Rusty Russell --- include/linux/virtio.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 06005fa9e98..9410394bbf9 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -99,8 +99,7 @@ void unregister_virtio_device(struct virtio_device *dev); * @id_table: the ids serviced by this driver. * @feature_table: an array of feature numbers supported by this device. * @feature_table_size: number of entries in the feature table array. - * @probe: the function to call when a device is found. Returns a token for - * remove, or PTR_ERR(). + * @probe: the function to call when a device is found. Returns 0 or -errno. * @remove: the function when a device is removed. * @config_changed: optional function to call when the device configuration * changes; may be called in interrupt context. -- cgit v1.2.3-70-g09d2 From 9499f5e7ed5224c40706f0cec6542a9916bc7606 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 12 Jun 2009 22:16:35 -0600 Subject: virtio: add names to virtqueue struct, mapping from devices to queues. Add a linked list of all virtqueues for a virtio device: this helps for debugging and is also needed for upcoming interface change. Also, add a "name" field for clearer debug messages. Signed-off-by: Rusty Russell --- drivers/block/virtio_blk.c | 2 +- drivers/char/hw_random/virtio-rng.c | 2 +- drivers/char/virtio_console.c | 4 ++-- drivers/lguest/lguest_device.c | 5 +++-- drivers/net/virtio_net.c | 6 +++--- drivers/s390/kvm/kvm_virtio.c | 7 ++++--- drivers/virtio/virtio.c | 2 ++ drivers/virtio/virtio_balloon.c | 4 ++-- drivers/virtio/virtio_pci.c | 5 +++-- drivers/virtio/virtio_ring.c | 27 ++++++++++++++++++++------- include/linux/virtio.h | 12 ++++++++---- include/linux/virtio_config.h | 6 ++++-- include/linux/virtio_ring.h | 3 ++- net/9p/trans_virtio.c | 2 +- 14 files changed, 56 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index c0facaa55cf..db55a50d9f6 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -288,7 +288,7 @@ static int virtblk_probe(struct virtio_device *vdev) sg_init_table(vblk->sg, vblk->sg_elems); /* We expect one virtqueue, for output. */ - vblk->vq = vdev->config->find_vq(vdev, 0, blk_done); + vblk->vq = vdev->config->find_vq(vdev, 0, blk_done, "requests"); if (IS_ERR(vblk->vq)) { err = PTR_ERR(vblk->vq); goto out_free_vblk; diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index 86e83f88313..2aeafcea95f 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -94,7 +94,7 @@ static int virtrng_probe(struct virtio_device *vdev) int err; /* We expect a single virtqueue. */ - vq = vdev->config->find_vq(vdev, 0, random_recv_done); + vq = vdev->config->find_vq(vdev, 0, random_recv_done, "input"); if (IS_ERR(vq)) return PTR_ERR(vq); diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index ff6f5a4b58f..58684e4a081 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -202,13 +202,13 @@ static int __devinit virtcons_probe(struct virtio_device *dev) /* Find the input queue. */ /* FIXME: This is why we want to wean off hvc: we do nothing * when input comes in. */ - in_vq = vdev->config->find_vq(vdev, 0, hvc_handle_input); + in_vq = vdev->config->find_vq(vdev, 0, hvc_handle_input, "input"); if (IS_ERR(in_vq)) { err = PTR_ERR(in_vq); goto free; } - out_vq = vdev->config->find_vq(vdev, 1, NULL); + out_vq = vdev->config->find_vq(vdev, 1, NULL, "output"); if (IS_ERR(out_vq)) { err = PTR_ERR(out_vq); goto free_in_vq; diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c index df44d962626..4babed899d5 100644 --- a/drivers/lguest/lguest_device.c +++ b/drivers/lguest/lguest_device.c @@ -228,7 +228,8 @@ extern void lguest_setup_irq(unsigned int irq); * function. */ static struct virtqueue *lg_find_vq(struct virtio_device *vdev, unsigned index, - void (*callback)(struct virtqueue *vq)) + void (*callback)(struct virtqueue *vq), + const char *name) { struct lguest_device *ldev = to_lgdev(vdev); struct lguest_vq_info *lvq; @@ -263,7 +264,7 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev, /* OK, tell virtio_ring.c to set up a virtqueue now we know its size * and we've got a pointer to its pages. */ vq = vring_new_virtqueue(lvq->config.num, LGUEST_VRING_ALIGN, - vdev, lvq->pages, lg_notify, callback); + vdev, lvq->pages, lg_notify, callback, name); if (!vq) { err = -ENOMEM; goto unmap; diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 4d1d47953fc..be3b734ff5a 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -906,20 +906,20 @@ static int virtnet_probe(struct virtio_device *vdev) vi->mergeable_rx_bufs = true; /* We expect two virtqueues, receive then send. */ - vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done); + vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done, "input"); if (IS_ERR(vi->rvq)) { err = PTR_ERR(vi->rvq); goto free; } - vi->svq = vdev->config->find_vq(vdev, 1, skb_xmit_done); + vi->svq = vdev->config->find_vq(vdev, 1, skb_xmit_done, "output"); if (IS_ERR(vi->svq)) { err = PTR_ERR(vi->svq); goto free_recv; } if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) { - vi->cvq = vdev->config->find_vq(vdev, 2, NULL); + vi->cvq = vdev->config->find_vq(vdev, 2, NULL, "control"); if (IS_ERR(vi->cvq)) { err = PTR_ERR(vi->svq); goto free_send; diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c index cbc8566fab7..ba8995fbf04 100644 --- a/drivers/s390/kvm/kvm_virtio.c +++ b/drivers/s390/kvm/kvm_virtio.c @@ -173,8 +173,9 @@ static void kvm_notify(struct virtqueue *vq) * this device and sets it up. */ static struct virtqueue *kvm_find_vq(struct virtio_device *vdev, - unsigned index, - void (*callback)(struct virtqueue *vq)) + unsigned index, + void (*callback)(struct virtqueue *vq), + const char *name) { struct kvm_device *kdev = to_kvmdev(vdev); struct kvm_vqconfig *config; @@ -194,7 +195,7 @@ static struct virtqueue *kvm_find_vq(struct virtio_device *vdev, vq = vring_new_virtqueue(config->num, KVM_S390_VIRTIO_RING_ALIGN, vdev, (void *) config->address, - kvm_notify, callback); + kvm_notify, callback, name); if (!vq) { err = -ENOMEM; goto unmap; diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 6b681036486..3f52c767dfe 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -186,6 +186,8 @@ int register_virtio_device(struct virtio_device *dev) /* Acknowledge that we've seen the device. */ add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); + INIT_LIST_HEAD(&dev->vqs); + /* device_register() causes the bus infrastructure to look for a * matching driver. */ err = device_register(&dev->dev); diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 9c76a061a04..0fa73b4d18b 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -218,13 +218,13 @@ static int virtballoon_probe(struct virtio_device *vdev) vb->vdev = vdev; /* We expect two virtqueues. */ - vb->inflate_vq = vdev->config->find_vq(vdev, 0, balloon_ack); + vb->inflate_vq = vdev->config->find_vq(vdev, 0, balloon_ack, "inflate"); if (IS_ERR(vb->inflate_vq)) { err = PTR_ERR(vb->inflate_vq); goto out_free_vb; } - vb->deflate_vq = vdev->config->find_vq(vdev, 1, balloon_ack); + vb->deflate_vq = vdev->config->find_vq(vdev, 1, balloon_ack, "deflate"); if (IS_ERR(vb->deflate_vq)) { err = PTR_ERR(vb->deflate_vq); goto out_del_inflate_vq; diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index 330aacbdec1..be4047abd5b 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -208,7 +208,8 @@ static irqreturn_t vp_interrupt(int irq, void *opaque) /* the config->find_vq() implementation */ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, - void (*callback)(struct virtqueue *vq)) + void (*callback)(struct virtqueue *vq), + const char *name) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_vq_info *info; @@ -247,7 +248,7 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, /* create the vring */ vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN, - vdev, info->queue, vp_notify, callback); + vdev, info->queue, vp_notify, callback, name); if (!vq) { err = -ENOMEM; goto out_activate_queue; diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 5c52369ab9b..579fa693d5d 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -23,21 +23,30 @@ #ifdef DEBUG /* For development, we want to crash whenever the ring is screwed. */ -#define BAD_RING(_vq, fmt...) \ - do { dev_err(&(_vq)->vq.vdev->dev, fmt); BUG(); } while(0) +#define BAD_RING(_vq, fmt, args...) \ + do { \ + dev_err(&(_vq)->vq.vdev->dev, \ + "%s:"fmt, (_vq)->vq.name, ##args); \ + BUG(); \ + } while (0) /* Caller is supposed to guarantee no reentry. */ #define START_USE(_vq) \ do { \ if ((_vq)->in_use) \ - panic("in_use = %i\n", (_vq)->in_use); \ + panic("%s:in_use = %i\n", \ + (_vq)->vq.name, (_vq)->in_use); \ (_vq)->in_use = __LINE__; \ mb(); \ - } while(0) + } while (0) #define END_USE(_vq) \ do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; mb(); } while(0) #else -#define BAD_RING(_vq, fmt...) \ - do { dev_err(&_vq->vq.vdev->dev, fmt); (_vq)->broken = true; } while(0) +#define BAD_RING(_vq, fmt, args...) \ + do { \ + dev_err(&_vq->vq.vdev->dev, \ + "%s:"fmt, (_vq)->vq.name, ##args); \ + (_vq)->broken = true; \ + } while (0) #define START_USE(vq) #define END_USE(vq) #endif @@ -284,7 +293,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int num, struct virtio_device *vdev, void *pages, void (*notify)(struct virtqueue *), - void (*callback)(struct virtqueue *)) + void (*callback)(struct virtqueue *), + const char *name) { struct vring_virtqueue *vq; unsigned int i; @@ -303,10 +313,12 @@ struct virtqueue *vring_new_virtqueue(unsigned int num, vq->vq.callback = callback; vq->vq.vdev = vdev; vq->vq.vq_ops = &vring_vq_ops; + vq->vq.name = name; vq->notify = notify; vq->broken = false; vq->last_used_idx = 0; vq->num_added = 0; + list_add_tail(&vq->vq.list, &vdev->vqs); #ifdef DEBUG vq->in_use = false; #endif @@ -327,6 +339,7 @@ EXPORT_SYMBOL_GPL(vring_new_virtqueue); void vring_del_virtqueue(struct virtqueue *vq) { + list_del(&vq->list); kfree(to_vvq(vq)); } EXPORT_SYMBOL_GPL(vring_del_virtqueue); diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 9410394bbf9..4fca4f5440b 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -10,14 +10,17 @@ /** * virtqueue - a queue to register buffers for sending or receiving. + * @list: the chain of virtqueues for this device * @callback: the function to call when buffers are consumed (can be NULL). + * @name: the name of this virtqueue (mainly for debugging) * @vdev: the virtio device this queue was created for. * @vq_ops: the operations for this virtqueue (see below). * @priv: a pointer for the virtqueue implementation to use. */ -struct virtqueue -{ +struct virtqueue { + struct list_head list; void (*callback)(struct virtqueue *vq); + const char *name; struct virtio_device *vdev; struct virtqueue_ops *vq_ops; void *priv; @@ -76,15 +79,16 @@ struct virtqueue_ops { * @dev: underlying device. * @id: the device type identification (used to match it with a driver). * @config: the configuration ops for this device. + * @vqs: the list of virtqueues for this device. * @features: the features supported by both driver and device. * @priv: private pointer for the driver's use. */ -struct virtio_device -{ +struct virtio_device { int index; struct device dev; struct virtio_device_id id; struct virtio_config_ops *config; + struct list_head vqs; /* Note that this is a Linux set_bit-style bitmap. */ unsigned long features[1]; void *priv; diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index bf8ec283b23..9fae274751e 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -55,7 +55,8 @@ * @find_vq: find a virtqueue and instantiate it. * vdev: the virtio_device * index: the 0-based virtqueue number in case there's more than one. - * callback: the virqtueue callback + * callback: the virtqueue callback + * name: the virtqueue name (mainly for debugging) * Returns the new virtqueue or ERR_PTR() (eg. -ENOENT). * @del_vq: free a virtqueue found by find_vq(). * @get_features: get the array of feature bits for this device. @@ -77,7 +78,8 @@ struct virtio_config_ops void (*reset)(struct virtio_device *vdev); struct virtqueue *(*find_vq)(struct virtio_device *vdev, unsigned index, - void (*callback)(struct virtqueue *)); + void (*callback)(struct virtqueue *), + const char *name); void (*del_vq)(struct virtqueue *vq); u32 (*get_features)(struct virtio_device *vdev); void (*finalize_features)(struct virtio_device *vdev); diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index 71e03722fb5..166c519689d 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -119,7 +119,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int num, struct virtio_device *vdev, void *pages, void (*notify)(struct virtqueue *vq), - void (*callback)(struct virtqueue *vq)); + void (*callback)(struct virtqueue *vq), + const char *name); void vring_del_virtqueue(struct virtqueue *vq); /* Filter out transport-specific feature bits. */ void vring_transport_features(struct virtio_device *vdev); diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index bb8579a141a..ab8791f9aba 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -246,7 +246,7 @@ static int p9_virtio_probe(struct virtio_device *vdev) chan->vdev = vdev; /* We expect one virtqueue, for requests. */ - chan->vq = vdev->config->find_vq(vdev, 0, req_done); + chan->vq = vdev->config->find_vq(vdev, 0, req_done, "requests"); if (IS_ERR(chan->vq)) { err = PTR_ERR(chan->vq); goto out_free_vq; -- cgit v1.2.3-70-g09d2 From d2a7ddda9ffb1c8961abff6714b0f1eb925c120f Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 12 Jun 2009 22:16:36 -0600 Subject: virtio: find_vqs/del_vqs virtio operations This replaces find_vq/del_vq with find_vqs/del_vqs virtio operations, and updates all drivers. This is needed for MSI support, because MSI needs to know the total number of vectors upfront. Signed-off-by: Michael S. Tsirkin Signed-off-by: Rusty Russell (+ lguest/9p compile fixes) --- drivers/block/virtio_blk.c | 6 ++--- drivers/char/hw_random/virtio-rng.c | 6 ++--- drivers/char/virtio_console.c | 26 +++++++++----------- drivers/lguest/lguest_device.c | 36 ++++++++++++++++++++++++++-- drivers/net/virtio_net.c | 45 ++++++++++++++--------------------- drivers/s390/kvm/kvm_virtio.c | 36 ++++++++++++++++++++++++++-- drivers/virtio/virtio_balloon.c | 27 +++++++++------------ drivers/virtio/virtio_pci.c | 37 +++++++++++++++++++++++------ include/linux/virtio_config.h | 47 ++++++++++++++++++++++++++++--------- net/9p/trans_virtio.c | 6 ++--- 10 files changed, 183 insertions(+), 89 deletions(-) (limited to 'include') diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index db55a50d9f6..07d8e595e51 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -288,7 +288,7 @@ static int virtblk_probe(struct virtio_device *vdev) sg_init_table(vblk->sg, vblk->sg_elems); /* We expect one virtqueue, for output. */ - vblk->vq = vdev->config->find_vq(vdev, 0, blk_done, "requests"); + vblk->vq = virtio_find_single_vq(vdev, blk_done, "requests"); if (IS_ERR(vblk->vq)) { err = PTR_ERR(vblk->vq); goto out_free_vblk; @@ -388,7 +388,7 @@ out_put_disk: out_mempool: mempool_destroy(vblk->pool); out_free_vq: - vdev->config->del_vq(vblk->vq); + vdev->config->del_vqs(vdev); out_free_vblk: kfree(vblk); out: @@ -409,7 +409,7 @@ static void virtblk_remove(struct virtio_device *vdev) blk_cleanup_queue(vblk->disk->queue); put_disk(vblk->disk); mempool_destroy(vblk->pool); - vdev->config->del_vq(vblk->vq); + vdev->config->del_vqs(vdev); kfree(vblk); } diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index 2aeafcea95f..f2041fede82 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -94,13 +94,13 @@ static int virtrng_probe(struct virtio_device *vdev) int err; /* We expect a single virtqueue. */ - vq = vdev->config->find_vq(vdev, 0, random_recv_done, "input"); + vq = virtio_find_single_vq(vdev, random_recv_done, "input"); if (IS_ERR(vq)) return PTR_ERR(vq); err = hwrng_register(&virtio_hwrng); if (err) { - vdev->config->del_vq(vq); + vdev->config->del_vqs(vdev); return err; } @@ -112,7 +112,7 @@ static void virtrng_remove(struct virtio_device *vdev) { vdev->config->reset(vdev); hwrng_unregister(&virtio_hwrng); - vdev->config->del_vq(vq); + vdev->config->del_vqs(vdev); } static struct virtio_device_id id_table[] = { diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 58684e4a081..c74dacfa679 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -188,6 +188,9 @@ static void hvc_handle_input(struct virtqueue *vq) * Finally we put our input buffer in the input queue, ready to receive. */ static int __devinit virtcons_probe(struct virtio_device *dev) { + vq_callback_t *callbacks[] = { hvc_handle_input, NULL}; + const char *names[] = { "input", "output" }; + struct virtqueue *vqs[2]; int err; vdev = dev; @@ -199,20 +202,15 @@ static int __devinit virtcons_probe(struct virtio_device *dev) goto fail; } - /* Find the input queue. */ + /* Find the queues. */ /* FIXME: This is why we want to wean off hvc: we do nothing * when input comes in. */ - in_vq = vdev->config->find_vq(vdev, 0, hvc_handle_input, "input"); - if (IS_ERR(in_vq)) { - err = PTR_ERR(in_vq); + err = vdev->config->find_vqs(vdev, 2, vqs, callbacks, names); + if (err) goto free; - } - out_vq = vdev->config->find_vq(vdev, 1, NULL, "output"); - if (IS_ERR(out_vq)) { - err = PTR_ERR(out_vq); - goto free_in_vq; - } + in_vq = vqs[0]; + out_vq = vqs[1]; /* Start using the new console output. */ virtio_cons.get_chars = get_chars; @@ -233,17 +231,15 @@ static int __devinit virtcons_probe(struct virtio_device *dev) hvc = hvc_alloc(0, 0, &virtio_cons, PAGE_SIZE); if (IS_ERR(hvc)) { err = PTR_ERR(hvc); - goto free_out_vq; + goto free_vqs; } /* Register the input buffer the first time. */ add_inbuf(); return 0; -free_out_vq: - vdev->config->del_vq(out_vq); -free_in_vq: - vdev->config->del_vq(in_vq); +free_vqs: + vdev->config->del_vqs(vdev); free: kfree(inbuf); fail: diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c index 4babed899d5..e082cdac88b 100644 --- a/drivers/lguest/lguest_device.c +++ b/drivers/lguest/lguest_device.c @@ -313,6 +313,38 @@ static void lg_del_vq(struct virtqueue *vq) kfree(lvq); } +static void lg_del_vqs(struct virtio_device *vdev) +{ + struct virtqueue *vq, *n; + + list_for_each_entry_safe(vq, n, &vdev->vqs, list) + lg_del_vq(vq); +} + +static int lg_find_vqs(struct virtio_device *vdev, unsigned nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], + const char *names[]) +{ + struct lguest_device *ldev = to_lgdev(vdev); + int i; + + /* We must have this many virtqueues. */ + if (nvqs > ldev->desc->num_vq) + return -ENOENT; + + for (i = 0; i < nvqs; ++i) { + vqs[i] = lg_find_vq(vdev, i, callbacks[i], names[i]); + if (IS_ERR(vqs[i])) + goto error; + } + return 0; + +error: + lg_del_vqs(vdev); + return PTR_ERR(vqs[i]); +} + /* The ops structure which hooks everything together. */ static struct virtio_config_ops lguest_config_ops = { .get_features = lg_get_features, @@ -322,8 +354,8 @@ static struct virtio_config_ops lguest_config_ops = { .get_status = lg_get_status, .set_status = lg_set_status, .reset = lg_reset, - .find_vq = lg_find_vq, - .del_vq = lg_del_vq, + .find_vqs = lg_find_vqs, + .del_vqs = lg_del_vqs, }; /* The root device for the lguest virtio devices. This makes them appear as diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index be3b734ff5a..7fa620ddeb2 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -845,6 +845,10 @@ static int virtnet_probe(struct virtio_device *vdev) int err; struct net_device *dev; struct virtnet_info *vi; + struct virtqueue *vqs[3]; + vq_callback_t *callbacks[] = { skb_recv_done, skb_xmit_done, NULL}; + const char *names[] = { "input", "output", "control" }; + int nvqs; /* Allocate ourselves a network device with room for our info */ dev = alloc_etherdev(sizeof(struct virtnet_info)); @@ -905,25 +909,19 @@ static int virtnet_probe(struct virtio_device *vdev) if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) vi->mergeable_rx_bufs = true; - /* We expect two virtqueues, receive then send. */ - vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done, "input"); - if (IS_ERR(vi->rvq)) { - err = PTR_ERR(vi->rvq); + /* We expect two virtqueues, receive then send, + * and optionally control. */ + nvqs = virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2; + + err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names); + if (err) goto free; - } - vi->svq = vdev->config->find_vq(vdev, 1, skb_xmit_done, "output"); - if (IS_ERR(vi->svq)) { - err = PTR_ERR(vi->svq); - goto free_recv; - } + vi->rvq = vqs[0]; + vi->svq = vqs[1]; if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) { - vi->cvq = vdev->config->find_vq(vdev, 2, NULL, "control"); - if (IS_ERR(vi->cvq)) { - err = PTR_ERR(vi->svq); - goto free_send; - } + vi->cvq = vqs[2]; if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN)) dev->features |= NETIF_F_HW_VLAN_FILTER; @@ -941,7 +939,7 @@ static int virtnet_probe(struct virtio_device *vdev) err = register_netdev(dev); if (err) { pr_debug("virtio_net: registering device failed\n"); - goto free_ctrl; + goto free_vqs; } /* Last of all, set up some receive buffers. */ @@ -962,13 +960,8 @@ static int virtnet_probe(struct virtio_device *vdev) unregister: unregister_netdev(dev); -free_ctrl: - if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) - vdev->config->del_vq(vi->cvq); -free_send: - vdev->config->del_vq(vi->svq); -free_recv: - vdev->config->del_vq(vi->rvq); +free_vqs: + vdev->config->del_vqs(vdev); free: free_netdev(dev); return err; @@ -994,12 +987,10 @@ static void virtnet_remove(struct virtio_device *vdev) BUG_ON(vi->num != 0); - vdev->config->del_vq(vi->svq); - vdev->config->del_vq(vi->rvq); - if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) - vdev->config->del_vq(vi->cvq); unregister_netdev(vi->dev); + vdev->config->del_vqs(vi->vdev); + while (vi->pages) __free_pages(get_a_page(vi, GFP_KERNEL), 0); diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c index ba8995fbf04..e38e5d306fa 100644 --- a/drivers/s390/kvm/kvm_virtio.c +++ b/drivers/s390/kvm/kvm_virtio.c @@ -227,6 +227,38 @@ static void kvm_del_vq(struct virtqueue *vq) KVM_S390_VIRTIO_RING_ALIGN)); } +static void kvm_del_vqs(struct virtio_device *vdev) +{ + struct virtqueue *vq, *n; + + list_for_each_entry_safe(vq, n, &vdev->vqs, list) + kvm_del_vq(vq); +} + +static int kvm_find_vqs(struct virtio_device *vdev, unsigned nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], + const char *names[]) +{ + struct kvm_device *kdev = to_kvmdev(vdev); + int i; + + /* We must have this many virtqueues. */ + if (nvqs > kdev->desc->num_vq) + return -ENOENT; + + for (i = 0; i < nvqs; ++i) { + vqs[i] = kvm_find_vq(vdev, i, callbacks[i], names[i]); + if (IS_ERR(vqs[i])) + goto error; + } + return 0; + +error: + kvm_del_vqs(vdev); + return PTR_ERR(vqs[i]); +} + /* * The config ops structure as defined by virtio config */ @@ -238,8 +270,8 @@ static struct virtio_config_ops kvm_vq_configspace_ops = { .get_status = kvm_get_status, .set_status = kvm_set_status, .reset = kvm_reset, - .find_vq = kvm_find_vq, - .del_vq = kvm_del_vq, + .find_vqs = kvm_find_vqs, + .del_vqs = kvm_del_vqs, }; /* diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 0fa73b4d18b..26b27826479 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -204,6 +204,9 @@ static int balloon(void *_vballoon) static int virtballoon_probe(struct virtio_device *vdev) { struct virtio_balloon *vb; + struct virtqueue *vqs[2]; + vq_callback_t *callbacks[] = { balloon_ack, balloon_ack }; + const char *names[] = { "inflate", "deflate" }; int err; vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL); @@ -218,22 +221,17 @@ static int virtballoon_probe(struct virtio_device *vdev) vb->vdev = vdev; /* We expect two virtqueues. */ - vb->inflate_vq = vdev->config->find_vq(vdev, 0, balloon_ack, "inflate"); - if (IS_ERR(vb->inflate_vq)) { - err = PTR_ERR(vb->inflate_vq); + err = vdev->config->find_vqs(vdev, 2, vqs, callbacks, names); + if (err) goto out_free_vb; - } - vb->deflate_vq = vdev->config->find_vq(vdev, 1, balloon_ack, "deflate"); - if (IS_ERR(vb->deflate_vq)) { - err = PTR_ERR(vb->deflate_vq); - goto out_del_inflate_vq; - } + vb->inflate_vq = vqs[0]; + vb->deflate_vq = vqs[1]; vb->thread = kthread_run(balloon, vb, "vballoon"); if (IS_ERR(vb->thread)) { err = PTR_ERR(vb->thread); - goto out_del_deflate_vq; + goto out_del_vqs; } vb->tell_host_first @@ -241,10 +239,8 @@ static int virtballoon_probe(struct virtio_device *vdev) return 0; -out_del_deflate_vq: - vdev->config->del_vq(vb->deflate_vq); -out_del_inflate_vq: - vdev->config->del_vq(vb->inflate_vq); +out_del_vqs: + vdev->config->del_vqs(vdev); out_free_vb: kfree(vb); out: @@ -264,8 +260,7 @@ static void virtballoon_remove(struct virtio_device *vdev) /* Now we reset the device so we can clean up the queues. */ vdev->config->reset(vdev); - vdev->config->del_vq(vb->deflate_vq); - vdev->config->del_vq(vb->inflate_vq); + vdev->config->del_vqs(vdev); kfree(vb); } diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index be4047abd5b..027f13fbe49 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -276,11 +276,7 @@ static void vp_del_vq(struct virtqueue *vq) { struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); struct virtio_pci_vq_info *info = vq->priv; - unsigned long flags, size; - - spin_lock_irqsave(&vp_dev->lock, flags); - list_del(&info->node); - spin_unlock_irqrestore(&vp_dev->lock, flags); + unsigned long size; vring_del_virtqueue(vq); @@ -293,14 +289,41 @@ static void vp_del_vq(struct virtqueue *vq) kfree(info); } +static void vp_del_vqs(struct virtio_device *vdev) +{ + struct virtqueue *vq, *n; + + list_for_each_entry_safe(vq, n, &vdev->vqs, list) + vp_del_vq(vq); +} + +static int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], + const char *names[]) +{ + int i; + + for (i = 0; i < nvqs; ++i) { + vqs[i] = vp_find_vq(vdev, i, callbacks[i], names[i]); + if (IS_ERR(vqs[i])) + goto error; + } + return 0; + +error: + vp_del_vqs(vdev); + return PTR_ERR(vqs[i]); +} + static struct virtio_config_ops virtio_pci_config_ops = { .get = vp_get, .set = vp_set, .get_status = vp_get_status, .set_status = vp_set_status, .reset = vp_reset, - .find_vq = vp_find_vq, - .del_vq = vp_del_vq, + .find_vqs = vp_find_vqs, + .del_vqs = vp_del_vqs, .get_features = vp_get_features, .finalize_features = vp_finalize_features, }; diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 9fae274751e..4cd290c06a8 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -29,6 +29,7 @@ #define VIRTIO_F_NOTIFY_ON_EMPTY 24 #ifdef __KERNEL__ +#include #include /** @@ -49,16 +50,26 @@ * @set_status: write the status byte * vdev: the virtio_device * status: the new status byte + * @request_vqs: request the specified number of virtqueues + * vdev: the virtio_device + * max_vqs: the max number of virtqueues we want + * If supplied, must call before any virtqueues are instantiated. + * To modify the max number of virtqueues after request_vqs has been + * called, call free_vqs and then request_vqs with a new value. + * @free_vqs: cleanup resources allocated by request_vqs + * vdev: the virtio_device + * If supplied, must call after all virtqueues have been deleted. * @reset: reset the device * vdev: the virtio device * After this, status and feature negotiation must be done again - * @find_vq: find a virtqueue and instantiate it. + * @find_vqs: find virtqueues and instantiate them. * vdev: the virtio_device - * index: the 0-based virtqueue number in case there's more than one. - * callback: the virtqueue callback - * name: the virtqueue name (mainly for debugging) - * Returns the new virtqueue or ERR_PTR() (eg. -ENOENT). - * @del_vq: free a virtqueue found by find_vq(). + * nvqs: the number of virtqueues to find + * vqs: on success, includes new virtqueues + * callbacks: array of callbacks, for each virtqueue + * names: array of virtqueue names (mainly for debugging) + * Returns 0 on success or error status + * @del_vqs: free virtqueues found by find_vqs(). * @get_features: get the array of feature bits for this device. * vdev: the virtio_device * Returns the first 32 feature bits (all we currently need). @@ -67,6 +78,7 @@ * This gives the final feature bits for the device: it can change * the dev->feature bits if it wants. */ +typedef void vq_callback_t(struct virtqueue *); struct virtio_config_ops { void (*get)(struct virtio_device *vdev, unsigned offset, @@ -76,11 +88,11 @@ struct virtio_config_ops u8 (*get_status)(struct virtio_device *vdev); void (*set_status)(struct virtio_device *vdev, u8 status); void (*reset)(struct virtio_device *vdev); - struct virtqueue *(*find_vq)(struct virtio_device *vdev, - unsigned index, - void (*callback)(struct virtqueue *), - const char *name); - void (*del_vq)(struct virtqueue *vq); + int (*find_vqs)(struct virtio_device *, unsigned nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], + const char *names[]); + void (*del_vqs)(struct virtio_device *); u32 (*get_features)(struct virtio_device *vdev); void (*finalize_features)(struct virtio_device *vdev); }; @@ -128,5 +140,18 @@ static inline int virtio_config_buf(struct virtio_device *vdev, vdev->config->get(vdev, offset, buf, len); return 0; } + +static inline +struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev, + vq_callback_t *c, const char *n) +{ + vq_callback_t *callbacks[] = { c }; + const char *names[] = { n }; + struct virtqueue *vq; + int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names); + if (err < 0) + return ERR_PTR(err); + return vq; +} #endif /* __KERNEL__ */ #endif /* _LINUX_VIRTIO_CONFIG_H */ diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index ab8791f9aba..a49484e67e1 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -246,7 +246,7 @@ static int p9_virtio_probe(struct virtio_device *vdev) chan->vdev = vdev; /* We expect one virtqueue, for requests. */ - chan->vq = vdev->config->find_vq(vdev, 0, req_done, "requests"); + chan->vq = virtio_find_single_vq(vdev, req_done, "requests"); if (IS_ERR(chan->vq)) { err = PTR_ERR(chan->vq); goto out_free_vq; @@ -261,7 +261,7 @@ static int p9_virtio_probe(struct virtio_device *vdev) return 0; out_free_vq: - vdev->config->del_vq(chan->vq); + vdev->config->del_vqs(vdev); fail: mutex_lock(&virtio_9p_lock); chan_index--; @@ -332,7 +332,7 @@ static void p9_virtio_remove(struct virtio_device *vdev) BUG_ON(chan->inuse); if (chan->initialized) { - vdev->config->del_vq(chan->vq); + vdev->config->del_vqs(vdev); chan->initialized = false; } } -- cgit v1.2.3-70-g09d2 From 82af8ce84ed65d2fb6d8c017d3f2bbbf161061fb Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 14 May 2009 13:55:41 +0300 Subject: virtio_pci: optional MSI-X support This implements optional MSI-X support in virtio_pci. MSI-X is used whenever the host supports at least 2 MSI-X vectors: 1 for configuration changes and 1 for virtqueues. Per-virtqueue vectors are allocated if enough vectors available. Signed-off-by: Michael S. Tsirkin Acked-by: Anthony Liguori Signed-off-by: Rusty Russell (+ whitespace, style) --- drivers/virtio/virtio_pci.c | 228 ++++++++++++++++++++++++++++++++++++++++---- include/linux/virtio_pci.h | 10 +- 2 files changed, 218 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index 951e673e50a..193c8f0e5cc 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -42,6 +42,26 @@ struct virtio_pci_device /* a list of queues so we can dispatch IRQs */ spinlock_t lock; struct list_head virtqueues; + + /* MSI-X support */ + int msix_enabled; + int intx_enabled; + struct msix_entry *msix_entries; + /* Name strings for interrupts. This size should be enough, + * and I'm too lazy to allocate each name separately. */ + char (*msix_names)[256]; + /* Number of available vectors */ + unsigned msix_vectors; + /* Vectors allocated */ + unsigned msix_used_vectors; +}; + +/* Constants for MSI-X */ +/* Use first vector for configuration changes, second and the rest for + * virtqueues Thus, we need at least 2 vectors for MSI. */ +enum { + VP_MSIX_CONFIG_VECTOR = 0, + VP_MSIX_VQ_VECTOR = 1, }; struct virtio_pci_vq_info @@ -60,6 +80,9 @@ struct virtio_pci_vq_info /* the list node for the virtqueues list */ struct list_head node; + + /* MSI-X vector (or none) */ + unsigned vector; }; /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */ @@ -109,7 +132,8 @@ static void vp_get(struct virtio_device *vdev, unsigned offset, void *buf, unsigned len) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - void __iomem *ioaddr = vp_dev->ioaddr + VIRTIO_PCI_CONFIG + offset; + void __iomem *ioaddr = vp_dev->ioaddr + + VIRTIO_PCI_CONFIG(vp_dev) + offset; u8 *ptr = buf; int i; @@ -123,7 +147,8 @@ static void vp_set(struct virtio_device *vdev, unsigned offset, const void *buf, unsigned len) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - void __iomem *ioaddr = vp_dev->ioaddr + VIRTIO_PCI_CONFIG + offset; + void __iomem *ioaddr = vp_dev->ioaddr + + VIRTIO_PCI_CONFIG(vp_dev) + offset; const u8 *ptr = buf; int i; @@ -221,7 +246,122 @@ static irqreturn_t vp_interrupt(int irq, void *opaque) return vp_vring_interrupt(irq, opaque); } -/* the config->find_vq() implementation */ +static void vp_free_vectors(struct virtio_device *vdev) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + int i; + + if (vp_dev->intx_enabled) { + free_irq(vp_dev->pci_dev->irq, vp_dev); + vp_dev->intx_enabled = 0; + } + + for (i = 0; i < vp_dev->msix_used_vectors; ++i) + free_irq(vp_dev->msix_entries[i].vector, vp_dev); + vp_dev->msix_used_vectors = 0; + + if (vp_dev->msix_enabled) { + /* Disable the vector used for configuration */ + iowrite16(VIRTIO_MSI_NO_VECTOR, + vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); + /* Flush the write out to device */ + ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); + + vp_dev->msix_enabled = 0; + pci_disable_msix(vp_dev->pci_dev); + } +} + +static int vp_enable_msix(struct pci_dev *dev, struct msix_entry *entries, + int *options, int noptions) +{ + int i; + for (i = 0; i < noptions; ++i) + if (!pci_enable_msix(dev, entries, options[i])) + return options[i]; + return -EBUSY; +} + +static int vp_request_vectors(struct virtio_device *vdev, unsigned max_vqs) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + const char *name = dev_name(&vp_dev->vdev.dev); + unsigned i, v; + int err = -ENOMEM; + /* We want at most one vector per queue and one for config changes. + * Fallback to separate vectors for config and a shared for queues. + * Finally fall back to regular interrupts. */ + int options[] = { max_vqs + 1, 2 }; + int nvectors = max(options[0], options[1]); + + vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries, + GFP_KERNEL); + if (!vp_dev->msix_entries) + goto error_entries; + vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names, + GFP_KERNEL); + if (!vp_dev->msix_names) + goto error_names; + + for (i = 0; i < nvectors; ++i) + vp_dev->msix_entries[i].entry = i; + + err = vp_enable_msix(vp_dev->pci_dev, vp_dev->msix_entries, + options, ARRAY_SIZE(options)); + if (err < 0) { + /* Can't allocate enough MSI-X vectors, use regular interrupt */ + vp_dev->msix_vectors = 0; + err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, + IRQF_SHARED, name, vp_dev); + if (err) + goto error_irq; + vp_dev->intx_enabled = 1; + } else { + vp_dev->msix_vectors = err; + vp_dev->msix_enabled = 1; + + /* Set the vector used for configuration */ + v = vp_dev->msix_used_vectors; + snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, + "%s-config", name); + err = request_irq(vp_dev->msix_entries[v].vector, + vp_config_changed, 0, vp_dev->msix_names[v], + vp_dev); + if (err) + goto error_irq; + ++vp_dev->msix_used_vectors; + + iowrite16(v, vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); + /* Verify we had enough resources to assign the vector */ + v = ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); + if (v == VIRTIO_MSI_NO_VECTOR) { + err = -EBUSY; + goto error_irq; + } + } + + if (vp_dev->msix_vectors && vp_dev->msix_vectors != max_vqs + 1) { + /* Shared vector for all VQs */ + v = vp_dev->msix_used_vectors; + snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, + "%s-virtqueues", name); + err = request_irq(vp_dev->msix_entries[v].vector, + vp_vring_interrupt, 0, vp_dev->msix_names[v], + vp_dev); + if (err) + goto error_irq; + ++vp_dev->msix_used_vectors; + } + return 0; +error_irq: + vp_free_vectors(vdev); + kfree(vp_dev->msix_names); +error_names: + kfree(vp_dev->msix_entries); +error_entries: + return err; +} + static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, void (*callback)(struct virtqueue *vq), const char *name) @@ -230,7 +370,7 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, struct virtio_pci_vq_info *info; struct virtqueue *vq; unsigned long flags, size; - u16 num; + u16 num, vector; int err; /* Select the queue we're interested in */ @@ -249,6 +389,7 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, info->queue_index = index; info->num = num; + info->vector = VIRTIO_MSI_NO_VECTOR; size = PAGE_ALIGN(vring_size(num, VIRTIO_PCI_VRING_ALIGN)); info->queue = alloc_pages_exact(size, GFP_KERNEL|__GFP_ZERO); @@ -272,12 +413,43 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, vq->priv = info; info->vq = vq; + /* allocate per-vq vector if available and necessary */ + if (callback && vp_dev->msix_used_vectors < vp_dev->msix_vectors) { + vector = vp_dev->msix_used_vectors; + snprintf(vp_dev->msix_names[vector], sizeof *vp_dev->msix_names, + "%s-%s", dev_name(&vp_dev->vdev.dev), name); + err = request_irq(vp_dev->msix_entries[vector].vector, + vring_interrupt, 0, + vp_dev->msix_names[vector], vq); + if (err) + goto out_request_irq; + info->vector = vector; + ++vp_dev->msix_used_vectors; + } else + vector = VP_MSIX_VQ_VECTOR; + + if (callback && vp_dev->msix_enabled) { + iowrite16(vector, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); + vector = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); + if (vector == VIRTIO_MSI_NO_VECTOR) { + err = -EBUSY; + goto out_assign; + } + } + spin_lock_irqsave(&vp_dev->lock, flags); list_add(&info->node, &vp_dev->virtqueues); spin_unlock_irqrestore(&vp_dev->lock, flags); return vq; +out_assign: + if (info->vector != VIRTIO_MSI_NO_VECTOR) { + free_irq(vp_dev->msix_entries[info->vector].vector, vq); + --vp_dev->msix_used_vectors; + } +out_request_irq: + vring_del_virtqueue(vq); out_activate_queue: iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); free_pages_exact(info->queue, size); @@ -286,17 +458,27 @@ out_info: return ERR_PTR(err); } -/* the config->del_vq() implementation */ static void vp_del_vq(struct virtqueue *vq) { struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); struct virtio_pci_vq_info *info = vq->priv; unsigned long size; + iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); + + if (info->vector != VIRTIO_MSI_NO_VECTOR) + free_irq(vp_dev->msix_entries[info->vector].vector, vq); + + if (vp_dev->msix_enabled) { + iowrite16(VIRTIO_MSI_NO_VECTOR, + vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); + /* Flush the write out to device */ + ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR); + } + vring_del_virtqueue(vq); /* Select and deactivate the queue */ - iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); size = PAGE_ALIGN(vring_size(info->num, VIRTIO_PCI_VRING_ALIGN)); @@ -304,30 +486,46 @@ static void vp_del_vq(struct virtqueue *vq) kfree(info); } +/* the config->del_vqs() implementation */ static void vp_del_vqs(struct virtio_device *vdev) { struct virtqueue *vq, *n; list_for_each_entry_safe(vq, n, &vdev->vqs, list) vp_del_vq(vq); + + vp_free_vectors(vdev); } +/* the config->find_vqs() implementation */ static int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], const char *names[]) { - int i; + int vectors = 0; + int i, err; + + /* How many vectors would we like? */ + for (i = 0; i < nvqs; ++i) + if (callbacks[i]) + ++vectors; + + err = vp_request_vectors(vdev, vectors); + if (err) + goto error_request; for (i = 0; i < nvqs; ++i) { vqs[i] = vp_find_vq(vdev, i, callbacks[i], names[i]); if (IS_ERR(vqs[i])) - goto error; + goto error_find; } return 0; -error: +error_find: vp_del_vqs(vdev); + +error_request: return PTR_ERR(vqs[i]); } @@ -349,7 +547,7 @@ static void virtio_pci_release_dev(struct device *_d) struct virtio_pci_device *vp_dev = to_vp_device(dev); struct pci_dev *pci_dev = vp_dev->pci_dev; - free_irq(pci_dev->irq, vp_dev); + vp_del_vqs(dev); pci_set_drvdata(pci_dev, NULL); pci_iounmap(pci_dev, vp_dev->ioaddr); pci_release_regions(pci_dev); @@ -408,21 +606,13 @@ static int __devinit virtio_pci_probe(struct pci_dev *pci_dev, vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor; vp_dev->vdev.id.device = pci_dev->subsystem_device; - /* register a handler for the queue with the PCI device's interrupt */ - err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED, - dev_name(&vp_dev->vdev.dev), vp_dev); - if (err) - goto out_set_drvdata; - /* finally register the virtio device */ err = register_virtio_device(&vp_dev->vdev); if (err) - goto out_req_irq; + goto out_set_drvdata; return 0; -out_req_irq: - free_irq(pci_dev->irq, vp_dev); out_set_drvdata: pci_set_drvdata(pci_dev, NULL); pci_iounmap(pci_dev, vp_dev->ioaddr); diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h index cd0fd5d181a..9a3d7c48c62 100644 --- a/include/linux/virtio_pci.h +++ b/include/linux/virtio_pci.h @@ -47,9 +47,17 @@ /* The bit of the ISR which indicates a device configuration change. */ #define VIRTIO_PCI_ISR_CONFIG 0x2 +/* MSI-X registers: only enabled if MSI-X is enabled. */ +/* A 16-bit vector for configuration changes. */ +#define VIRTIO_MSI_CONFIG_VECTOR 20 +/* A 16-bit vector for selected queue notifications. */ +#define VIRTIO_MSI_QUEUE_VECTOR 22 +/* Vector value used to disable MSI for queue */ +#define VIRTIO_MSI_NO_VECTOR 0xffff + /* The remaining space is defined by each driver as the per-driver * configuration space */ -#define VIRTIO_PCI_CONFIG 20 +#define VIRTIO_PCI_CONFIG(dev) ((dev)->msix_enabled ? 24 : 20) /* Virtio ABI version, this must match exactly */ #define VIRTIO_PCI_ABI_VERSION 0 -- cgit v1.2.3-70-g09d2 From ee006b353f1ca8c9a8470b72b462beb011d62e32 Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Mon, 11 May 2009 18:11:44 +0100 Subject: virtio: teach virtio_has_feature() about transport features Drivers don't add transport features to their table, so we shouldn't check these with virtio_check_driver_offered_feature(). We could perhaps add an ->offered_feature() virtio_config_op, but that perhaps that would be overkill for a consitency check like this. Signed-off-by: Mark McLoughlin Signed-off-by: Rusty Russell --- include/linux/virtio_config.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 4cd290c06a8..99f514575f6 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -113,7 +113,9 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev, if (__builtin_constant_p(fbit)) BUILD_BUG_ON(fbit >= 32); - virtio_check_driver_offered_feature(vdev, fbit); + if (fbit < VIRTIO_TRANSPORT_F_START) + virtio_check_driver_offered_feature(vdev, fbit); + return test_bit(fbit, vdev->features); } -- cgit v1.2.3-70-g09d2 From 9fa29b9df32ba4db055f3977933cd0c1b8fe67cd Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Mon, 11 May 2009 18:11:45 +0100 Subject: virtio: indirect ring entries (VIRTIO_RING_F_INDIRECT_DESC) Add a new feature flag for indirect ring entries. These are ring entries which point to a table of buffer descriptors. The idea here is to increase the ring capacity by allowing a larger effective ring size whereby the ring size dictates the number of requests that may be outstanding, rather than the size of those requests. This should be most effective in the case of block I/O where we can potentially benefit by concurrently dispatching a large number of large requests. Even in the simple case of single segment block requests, this results in a threefold increase in ring capacity. Signed-off-by: Mark McLoughlin Signed-off-by: Rusty Russell --- drivers/virtio/virtio_ring.c | 75 ++++++++++++++++++++++++++++++++++++++++++-- include/linux/virtio_ring.h | 5 +++ 2 files changed, 78 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 579fa693d5d..a882f260651 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -61,6 +61,9 @@ struct vring_virtqueue /* Other side has made a mess, don't try any more. */ bool broken; + /* Host supports indirect buffers */ + bool indirect; + /* Number of free buffers */ unsigned int num_free; /* Head of free buffer list. */ @@ -85,6 +88,55 @@ struct vring_virtqueue #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) +/* Set up an indirect table of descriptors and add it to the queue. */ +static int vring_add_indirect(struct vring_virtqueue *vq, + struct scatterlist sg[], + unsigned int out, + unsigned int in) +{ + struct vring_desc *desc; + unsigned head; + int i; + + desc = kmalloc((out + in) * sizeof(struct vring_desc), GFP_ATOMIC); + if (!desc) + return vq->vring.num; + + /* Transfer entries from the sg list into the indirect page */ + for (i = 0; i < out; i++) { + desc[i].flags = VRING_DESC_F_NEXT; + desc[i].addr = sg_phys(sg); + desc[i].len = sg->length; + desc[i].next = i+1; + sg++; + } + for (; i < (out + in); i++) { + desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; + desc[i].addr = sg_phys(sg); + desc[i].len = sg->length; + desc[i].next = i+1; + sg++; + } + + /* Last one doesn't continue. */ + desc[i-1].flags &= ~VRING_DESC_F_NEXT; + desc[i-1].next = 0; + + /* We're about to use a buffer */ + vq->num_free--; + + /* Use a single buffer which doesn't continue */ + head = vq->free_head; + vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT; + vq->vring.desc[head].addr = virt_to_phys(desc); + vq->vring.desc[head].len = i * sizeof(struct vring_desc); + + /* Update free pointer */ + vq->free_head = vq->vring.desc[head].next; + + return head; +} + static int vring_add_buf(struct virtqueue *_vq, struct scatterlist sg[], unsigned int out, @@ -94,12 +146,21 @@ static int vring_add_buf(struct virtqueue *_vq, struct vring_virtqueue *vq = to_vvq(_vq); unsigned int i, avail, head, uninitialized_var(prev); + START_USE(vq); + BUG_ON(data == NULL); + + /* If the host supports indirect descriptor tables, and we have multiple + * buffers, then go indirect. FIXME: tune this threshold */ + if (vq->indirect && (out + in) > 1 && vq->num_free) { + head = vring_add_indirect(vq, sg, out, in); + if (head != vq->vring.num) + goto add_head; + } + BUG_ON(out + in > vq->vring.num); BUG_ON(out + in == 0); - START_USE(vq); - if (vq->num_free < out + in) { pr_debug("Can't add buf len %i - avail = %i\n", out + in, vq->num_free); @@ -136,6 +197,7 @@ static int vring_add_buf(struct virtqueue *_vq, /* Update free pointer */ vq->free_head = i; +add_head: /* Set token. */ vq->data[head] = data; @@ -179,6 +241,11 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head) /* Put back on free list: find end */ i = head; + + /* Free the indirect table */ + if (vq->vring.desc[i].flags & VRING_DESC_F_INDIRECT) + kfree(phys_to_virt(vq->vring.desc[i].addr)); + while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) { i = vq->vring.desc[i].next; vq->num_free++; @@ -323,6 +390,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int num, vq->in_use = false; #endif + vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC); + /* No callback? Tell other side not to bother us. */ if (!callback) vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; @@ -351,6 +420,8 @@ void vring_transport_features(struct virtio_device *vdev) for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { switch (i) { + case VIRTIO_RING_F_INDIRECT_DESC: + break; default: /* We don't understand this bit. */ clear_bit(i, vdev->features); diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index 166c519689d..693e0ec5afa 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -14,6 +14,8 @@ #define VRING_DESC_F_NEXT 1 /* This marks a buffer as write-only (otherwise read-only). */ #define VRING_DESC_F_WRITE 2 +/* This means the buffer contains a list of buffer descriptors. */ +#define VRING_DESC_F_INDIRECT 4 /* The Host uses this in used->flags to advise the Guest: don't kick me when * you add a buffer. It's unreliable, so it's simply an optimization. Guest @@ -24,6 +26,9 @@ * optimization. */ #define VRING_AVAIL_F_NO_INTERRUPT 1 +/* We support indirect buffer descriptors */ +#define VIRTIO_RING_F_INDIRECT_DESC 28 + /* Virtio ring descriptors: 16 bytes. These can chain together via "next". */ struct vring_desc { -- cgit v1.2.3-70-g09d2 From a32a8813d0173163ba44d8f9556e0d89fdc4fb46 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 12 Jun 2009 22:27:02 -0600 Subject: lguest: improve interrupt handling, speed up stream networking lguest never checked for pending interrupts when enabling interrupts, and things still worked. However, it makes a significant difference to TCP performance, so it's time we fixed it by introducing a pending_irq flag and checking it on irq_restore and irq_enable. These two routines are now too big to patch into the 8/10 bytes patch space, so we drop that code. Note: The high latency on interrupt delivery had a very curious effect: once everything else was optimized, networking without GSO was faster than networking with GSO, since more interrupts were sent and hence a greater chance of one getting through to the Guest! Note2: (Almost) Closing the same loophole for iret doesn't have any measurable effect, so I'm leaving that patch for the moment. Before: 1GB tcpblast Guest->Host: 30.7 seconds 1GB tcpblast Guest->Host (no GSO): 76.0 seconds After: 1GB tcpblast Guest->Host: 6.8 seconds 1GB tcpblast Guest->Host (no GSO): 27.8 seconds Signed-off-by: Rusty Russell --- arch/x86/include/asm/lguest_hcall.h | 1 + arch/x86/lguest/boot.c | 21 +++++++++++++++------ arch/x86/lguest/i386_head.S | 2 -- drivers/lguest/core.c | 7 ++++--- drivers/lguest/hypercalls.c | 4 ++++ drivers/lguest/interrupts_and_traps.c | 16 +++++++++++++--- drivers/lguest/lg.h | 4 ++-- include/linux/lguest.h | 4 ++++ 8 files changed, 43 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h index faae1996487..f9a9f781124 100644 --- a/arch/x86/include/asm/lguest_hcall.h +++ b/arch/x86/include/asm/lguest_hcall.h @@ -17,6 +17,7 @@ #define LHCALL_LOAD_TLS 16 #define LHCALL_NOTIFY 17 #define LHCALL_LOAD_GDT_ENTRY 18 +#define LHCALL_SEND_INTERRUPTS 19 #define LGUEST_TRAP_ENTRY 0x1F diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 2392a7a171c..37b8c1d3e02 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -205,6 +205,12 @@ PV_CALLEE_SAVE_REGS_THUNK(save_fl); static void restore_fl(unsigned long flags) { lguest_data.irq_enabled = flags; + mb(); + /* Null hcall forces interrupt delivery now, if irq_pending is + * set to X86_EFLAGS_IF (ie. an interrupt is pending, and flags + * enables interrupts. */ + if (flags & lguest_data.irq_pending) + kvm_hypercall0(LHCALL_SEND_INTERRUPTS); } PV_CALLEE_SAVE_REGS_THUNK(restore_fl); @@ -219,6 +225,11 @@ PV_CALLEE_SAVE_REGS_THUNK(irq_disable); static void irq_enable(void) { lguest_data.irq_enabled = X86_EFLAGS_IF; + mb(); + /* Null hcall forces interrupt delivery now. */ + if (lguest_data.irq_pending) + kvm_hypercall0(LHCALL_SEND_INTERRUPTS); + } PV_CALLEE_SAVE_REGS_THUNK(irq_enable); @@ -972,10 +983,10 @@ static void lguest_restart(char *reason) * * Our current solution is to allow the paravirt back end to optionally patch * over the indirect calls to replace them with something more efficient. We - * patch the four most commonly called functions: disable interrupts, enable - * interrupts, restore interrupts and save interrupts. We usually have 6 or 10 - * bytes to patch into: the Guest versions of these operations are small enough - * that we can fit comfortably. + * patch two of the simplest of the most commonly called functions: disable + * interrupts and save interrupts. We usually have 6 or 10 bytes to patch + * into: the Guest versions of these operations are small enough that we can + * fit comfortably. * * First we need assembly templates of each of the patchable Guest operations, * and these are in i386_head.S. */ @@ -986,8 +997,6 @@ static const struct lguest_insns const char *start, *end; } lguest_insns[] = { [PARAVIRT_PATCH(pv_irq_ops.irq_disable)] = { lgstart_cli, lgend_cli }, - [PARAVIRT_PATCH(pv_irq_ops.irq_enable)] = { lgstart_sti, lgend_sti }, - [PARAVIRT_PATCH(pv_irq_ops.restore_fl)] = { lgstart_popf, lgend_popf }, [PARAVIRT_PATCH(pv_irq_ops.save_fl)] = { lgstart_pushf, lgend_pushf }, }; diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S index f7954198947..3e0c5545d59 100644 --- a/arch/x86/lguest/i386_head.S +++ b/arch/x86/lguest/i386_head.S @@ -46,8 +46,6 @@ ENTRY(lguest_entry) .globl lgstart_##name; .globl lgend_##name LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled) -LGUEST_PATCH(sti, movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled) -LGUEST_PATCH(popf, movl %eax, lguest_data+LGUEST_DATA_irq_enabled) LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax) /*:*/ diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index 8ca1def5b14..03fbc88c002 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c @@ -189,6 +189,7 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) /* We stop running once the Guest is dead. */ while (!cpu->lg->dead) { unsigned int irq; + bool more; /* First we run any hypercalls the Guest wants done. */ if (cpu->hcall) @@ -213,9 +214,9 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) /* Check if there are any interrupts which can be delivered now: * if so, this sets up the hander to be executed when we next * run the Guest. */ - irq = interrupt_pending(cpu); + irq = interrupt_pending(cpu, &more); if (irq < LGUEST_IRQS) - try_deliver_interrupt(cpu, irq); + try_deliver_interrupt(cpu, irq, more); /* All long-lived kernel loops need to check with this horrible * thing called the freezer. If the Host is trying to suspend, @@ -233,7 +234,7 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) set_current_state(TASK_INTERRUPTIBLE); /* Just before we sleep, make sure nothing snuck in * which we should be doing. */ - if (interrupt_pending(cpu) < LGUEST_IRQS + if (interrupt_pending(cpu, &more) < LGUEST_IRQS || cpu->break_out) set_current_state(TASK_RUNNING); else diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c index 54d66f05fef..f252b71ae79 100644 --- a/drivers/lguest/hypercalls.c +++ b/drivers/lguest/hypercalls.c @@ -37,6 +37,10 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args) /* This call does nothing, except by breaking out of the Guest * it makes us process all the asynchronous hypercalls. */ break; + case LHCALL_SEND_INTERRUPTS: + /* This call does nothing too, but by breaking out of the Guest + * it makes us process any pending interrupts. */ + break; case LHCALL_LGUEST_INIT: /* You can't get here unless you're already initialized. Don't * do that. */ diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c index a8c966fee1e..5a10754b479 100644 --- a/drivers/lguest/interrupts_and_traps.c +++ b/drivers/lguest/interrupts_and_traps.c @@ -131,7 +131,7 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, * interrupt_pending() returns the first pending interrupt which isn't blocked * by the Guest. It is called before every entry to the Guest, and just before * we go to sleep when the Guest has halted itself. */ -unsigned int interrupt_pending(struct lg_cpu *cpu) +unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more) { unsigned int irq; DECLARE_BITMAP(blk, LGUEST_IRQS); @@ -149,13 +149,14 @@ unsigned int interrupt_pending(struct lg_cpu *cpu) /* Find the first interrupt. */ irq = find_first_bit(blk, LGUEST_IRQS); + *more = find_next_bit(blk, LGUEST_IRQS, irq+1); return irq; } /* This actually diverts the Guest to running an interrupt handler, once an * interrupt has been identified by interrupt_pending(). */ -void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq) +void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more) { struct desc_struct *idt; @@ -178,8 +179,12 @@ void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq) u32 irq_enabled; if (get_user(irq_enabled, &cpu->lg->lguest_data->irq_enabled)) irq_enabled = 0; - if (!irq_enabled) + if (!irq_enabled) { + /* Make sure they know an IRQ is pending. */ + put_user(X86_EFLAGS_IF, + &cpu->lg->lguest_data->irq_pending); return; + } } /* Look at the IDT entry the Guest gave us for this interrupt. The @@ -202,6 +207,11 @@ void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq) * here is a compromise which means at least it gets updated every * timer interrupt. */ write_timestamp(cpu); + + /* If there are no other interrupts we want to deliver, clear + * the pending flag. */ + if (!more) + put_user(0, &cpu->lg->lguest_data->irq_pending); } /*:*/ diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 6743cf147d9..573896533ac 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -139,8 +139,8 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user); #define pgd_pfn(x) (pgd_val(x) >> PAGE_SHIFT) /* interrupts_and_traps.c: */ -unsigned int interrupt_pending(struct lg_cpu *cpu); -void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq); +unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more); +void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more); bool deliver_trap(struct lg_cpu *cpu, unsigned int num); void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int i, u32 low, u32 hi); diff --git a/include/linux/lguest.h b/include/linux/lguest.h index 175e63f4a8c..7bc1440fc47 100644 --- a/include/linux/lguest.h +++ b/include/linux/lguest.h @@ -30,6 +30,10 @@ struct lguest_data /* Wallclock time set by the Host. */ struct timespec time; + /* Interrupt pending set by the Host. The Guest should do a hypercall + * if it re-enables interrupts and sees this set (to X86_EFLAGS_IF). */ + int irq_pending; + /* Async hypercall ring. Instead of directly making hypercalls, we can * place them in here for processing the next time the Host wants. * This batching can be quite efficient. */ -- cgit v1.2.3-70-g09d2 From df60aeef4f4fe0645d9a195a7689005520422de5 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 12 Jun 2009 22:27:09 -0600 Subject: lguest: use eventfds for device notification Currently, when a Guest wants to perform I/O it calls LHCALL_NOTIFY with an address: the main Launcher process returns with this address, and figures out what device to run. A far nicer model is to let processes bind an eventfd to an address: if we find one, we simply signal the eventfd. Signed-off-by: Rusty Russell Cc: Davide Libenzi --- drivers/lguest/Kconfig | 2 +- drivers/lguest/core.c | 8 ++-- drivers/lguest/lg.h | 13 ++++++ drivers/lguest/lguest_user.c | 98 ++++++++++++++++++++++++++++++++++++++++- include/linux/lguest_launcher.h | 1 + 5 files changed, 116 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/lguest/Kconfig b/drivers/lguest/Kconfig index 8f63845db83..0aaa0597a62 100644 --- a/drivers/lguest/Kconfig +++ b/drivers/lguest/Kconfig @@ -1,6 +1,6 @@ config LGUEST tristate "Linux hypervisor example code" - depends on X86_32 && EXPERIMENTAL && FUTEX + depends on X86_32 && EXPERIMENTAL && EVENTFD select HVC_DRIVER ---help--- This is a very simple module which allows you to run diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index d0298dc45d9..508569c9571 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c @@ -198,9 +198,11 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) /* It's possible the Guest did a NOTIFY hypercall to the * Launcher, in which case we return from the read() now. */ if (cpu->pending_notify) { - if (put_user(cpu->pending_notify, user)) - return -EFAULT; - return sizeof(cpu->pending_notify); + if (!send_notify_to_eventfd(cpu)) { + if (put_user(cpu->pending_notify, user)) + return -EFAULT; + return sizeof(cpu->pending_notify); + } } /* Check for signals */ diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 040cb70780e..32fefdc6ad3 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -82,6 +82,16 @@ struct lg_cpu { struct lg_cpu_arch arch; }; +struct lg_eventfd { + unsigned long addr; + struct file *event; +}; + +struct lg_eventfd_map { + unsigned int num; + struct lg_eventfd map[]; +}; + /* The private info the thread maintains about the guest. */ struct lguest { @@ -102,6 +112,8 @@ struct lguest unsigned int stack_pages; u32 tsc_khz; + struct lg_eventfd_map *eventfds; + /* Dead? */ const char *dead; }; @@ -154,6 +166,7 @@ void setup_default_idt_entries(struct lguest_ro_state *state, void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt, const unsigned long *def); void guest_set_clockevent(struct lg_cpu *cpu, unsigned long delta); +bool send_notify_to_eventfd(struct lg_cpu *cpu); void init_clockdev(struct lg_cpu *cpu); bool check_syscall_vector(struct lguest *lg); int init_interrupts(void); diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index 1982b45bd93..f6bf255f183 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c @@ -7,6 +7,8 @@ #include #include #include +#include +#include #include "lg.h" /*L:055 When something happens, the Waker process needs a way to stop the @@ -35,6 +37,81 @@ static int break_guest_out(struct lg_cpu *cpu, const unsigned long __user*input) } } +bool send_notify_to_eventfd(struct lg_cpu *cpu) +{ + unsigned int i; + struct lg_eventfd_map *map; + + /* lg->eventfds is RCU-protected */ + rcu_read_lock(); + map = rcu_dereference(cpu->lg->eventfds); + for (i = 0; i < map->num; i++) { + if (map->map[i].addr == cpu->pending_notify) { + eventfd_signal(map->map[i].event, 1); + cpu->pending_notify = 0; + break; + } + } + rcu_read_unlock(); + return cpu->pending_notify == 0; +} + +static int add_eventfd(struct lguest *lg, unsigned long addr, int fd) +{ + struct lg_eventfd_map *new, *old = lg->eventfds; + + if (!addr) + return -EINVAL; + + /* Replace the old array with the new one, carefully: others can + * be accessing it at the same time */ + new = kmalloc(sizeof(*new) + sizeof(new->map[0]) * (old->num + 1), + GFP_KERNEL); + if (!new) + return -ENOMEM; + + /* First make identical copy. */ + memcpy(new->map, old->map, sizeof(old->map[0]) * old->num); + new->num = old->num; + + /* Now append new entry. */ + new->map[new->num].addr = addr; + new->map[new->num].event = eventfd_fget(fd); + if (IS_ERR(new->map[new->num].event)) { + kfree(new); + return PTR_ERR(new->map[new->num].event); + } + new->num++; + + /* Now put new one in place. */ + rcu_assign_pointer(lg->eventfds, new); + + /* We're not in a big hurry. Wait until noone's looking at old + * version, then delete it. */ + synchronize_rcu(); + kfree(old); + + return 0; +} + +static int attach_eventfd(struct lguest *lg, const unsigned long __user *input) +{ + unsigned long addr, fd; + int err; + + if (get_user(addr, input) != 0) + return -EFAULT; + input++; + if (get_user(fd, input) != 0) + return -EFAULT; + + mutex_lock(&lguest_lock); + err = add_eventfd(lg, addr, fd); + mutex_unlock(&lguest_lock); + + return 0; +} + /*L:050 Sending an interrupt is done by writing LHREQ_IRQ and an interrupt * number to /dev/lguest. */ static int user_send_irq(struct lg_cpu *cpu, const unsigned long __user *input) @@ -184,6 +261,13 @@ static int initialize(struct file *file, const unsigned long __user *input) goto unlock; } + lg->eventfds = kmalloc(sizeof(*lg->eventfds), GFP_KERNEL); + if (!lg->eventfds) { + err = -ENOMEM; + goto free_lg; + } + lg->eventfds->num = 0; + /* Populate the easy fields of our "struct lguest" */ lg->mem_base = (void __user *)args[0]; lg->pfn_limit = args[1]; @@ -191,7 +275,7 @@ static int initialize(struct file *file, const unsigned long __user *input) /* This is the first cpu (cpu 0) and it will start booting at args[2] */ err = lg_cpu_start(&lg->cpus[0], 0, args[2]); if (err) - goto release_guest; + goto free_eventfds; /* Initialize the Guest's shadow page tables, using the toplevel * address the Launcher gave us. This allocates memory, so can fail. */ @@ -210,7 +294,9 @@ static int initialize(struct file *file, const unsigned long __user *input) free_regs: /* FIXME: This should be in free_vcpu */ free_page(lg->cpus[0].regs_page); -release_guest: +free_eventfds: + kfree(lg->eventfds); +free_lg: kfree(lg); unlock: mutex_unlock(&lguest_lock); @@ -260,6 +346,8 @@ static ssize_t write(struct file *file, const char __user *in, return user_send_irq(cpu, input); case LHREQ_BREAK: return break_guest_out(cpu, input); + case LHREQ_EVENTFD: + return attach_eventfd(lg, input); default: return -EINVAL; } @@ -297,6 +385,12 @@ static int close(struct inode *inode, struct file *file) * the Launcher's memory management structure. */ mmput(lg->cpus[i].mm); } + + /* Release any eventfds they registered. */ + for (i = 0; i < lg->eventfds->num; i++) + fput(lg->eventfds->map[i].event); + kfree(lg->eventfds); + /* If lg->dead doesn't contain an error code it will be NULL or a * kmalloc()ed string, either of which is ok to hand to kfree(). */ if (!IS_ERR(lg->dead)) diff --git a/include/linux/lguest_launcher.h b/include/linux/lguest_launcher.h index a53407a4165..9de964b9058 100644 --- a/include/linux/lguest_launcher.h +++ b/include/linux/lguest_launcher.h @@ -58,6 +58,7 @@ enum lguest_req LHREQ_GETDMA, /* No longer used */ LHREQ_IRQ, /* + irq */ LHREQ_BREAK, /* + on/off flag (on blocks until someone does off) */ + LHREQ_EVENTFD, /* + address, fd. */ }; /* The alignment to use between consumer and producer parts of vring. -- cgit v1.2.3-70-g09d2 From 5dac051bc6030963181b69faddd9e0ad04f85fa8 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 12 Jun 2009 22:27:10 -0600 Subject: lguest: remove obsolete LHREQ_BREAK call We no longer need an efficient mechanism to force the Guest back into host userspace, as each device is serviced without bothering the main Guest process (aka. the Launcher). Signed-off-by: Rusty Russell --- drivers/lguest/core.c | 11 +++-------- drivers/lguest/lg.h | 4 +--- drivers/lguest/lguest_user.c | 31 ------------------------------- include/linux/lguest_launcher.h | 2 +- 4 files changed, 5 insertions(+), 43 deletions(-) (limited to 'include') diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index 508569c9571..a6974e9b8eb 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c @@ -209,10 +209,6 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) if (signal_pending(current)) return -ERESTARTSYS; - /* If Waker set break_out, return to Launcher. */ - if (cpu->break_out) - return -EAGAIN; - /* Check if there are any interrupts which can be delivered now: * if so, this sets up the hander to be executed when we next * run the Guest. */ @@ -231,13 +227,12 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) break; /* If the Guest asked to be stopped, we sleep. The Guest's - * clock timer or LHREQ_BREAK from the Waker will wake us. */ + * clock timer will wake us. */ if (cpu->halted) { set_current_state(TASK_INTERRUPTIBLE); - /* Just before we sleep, make sure nothing snuck in + /* Just before we sleep, make sure no interrupt snuck in * which we should be doing. */ - if (interrupt_pending(cpu, &more) < LGUEST_IRQS - || cpu->break_out) + if (interrupt_pending(cpu, &more) < LGUEST_IRQS) set_current_state(TASK_RUNNING); else schedule(); diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 32fefdc6ad3..d4e8979735c 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -71,9 +71,7 @@ struct lg_cpu { /* Virtual clock device */ struct hrtimer hrt; - /* Do we need to stop what we're doing and return to userspace? */ - int break_out; - wait_queue_head_t break_wq; + /* Did the Guest tell us to halt? */ int halted; /* Pending virtual interrupts */ diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index f6bf255f183..32e29712105 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c @@ -11,32 +11,6 @@ #include #include "lg.h" -/*L:055 When something happens, the Waker process needs a way to stop the - * kernel running the Guest and return to the Launcher. So the Waker writes - * LHREQ_BREAK and the value "1" to /dev/lguest to do this. Once the Launcher - * has done whatever needs attention, it writes LHREQ_BREAK and "0" to release - * the Waker. */ -static int break_guest_out(struct lg_cpu *cpu, const unsigned long __user*input) -{ - unsigned long on; - - /* Fetch whether they're turning break on or off. */ - if (get_user(on, input) != 0) - return -EFAULT; - - if (on) { - cpu->break_out = 1; - if (!wake_up_process(cpu->tsk)) - kick_process(cpu->tsk); - /* Wait for them to reset it */ - return wait_event_interruptible(cpu->break_wq, !cpu->break_out); - } else { - cpu->break_out = 0; - wake_up(&cpu->break_wq); - return 0; - } -} - bool send_notify_to_eventfd(struct lg_cpu *cpu) { unsigned int i; @@ -202,9 +176,6 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip) * address. */ lguest_arch_setup_regs(cpu, start_ip); - /* Initialize the queue for the Waker to wait on */ - init_waitqueue_head(&cpu->break_wq); - /* We keep a pointer to the Launcher task (ie. current task) for when * other Guests want to wake this one (eg. console input). */ cpu->tsk = current; @@ -344,8 +315,6 @@ static ssize_t write(struct file *file, const char __user *in, return initialize(file, input); case LHREQ_IRQ: return user_send_irq(cpu, input); - case LHREQ_BREAK: - return break_guest_out(cpu, input); case LHREQ_EVENTFD: return attach_eventfd(lg, input); default: diff --git a/include/linux/lguest_launcher.h b/include/linux/lguest_launcher.h index 9de964b9058..bfefbdf7498 100644 --- a/include/linux/lguest_launcher.h +++ b/include/linux/lguest_launcher.h @@ -57,7 +57,7 @@ enum lguest_req LHREQ_INITIALIZE, /* + base, pfnlimit, start */ LHREQ_GETDMA, /* No longer used */ LHREQ_IRQ, /* + irq */ - LHREQ_BREAK, /* + on/off flag (on blocks until someone does off) */ + LHREQ_BREAK, /* No longer used */ LHREQ_EVENTFD, /* + address, fd. */ }; -- cgit v1.2.3-70-g09d2 From 7e85ee0c1d15ca5f8bff0f514f158eba1742dd87 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Fri, 12 Jun 2009 14:03:06 +0300 Subject: slab,slub: don't enable interrupts during early boot As explained by Benjamin Herrenschmidt: Oh and btw, your patch alone doesn't fix powerpc, because it's missing a whole bunch of GFP_KERNEL's in the arch code... You would have to grep the entire kernel for things that check slab_is_available() and even then you'll be missing some. For example, slab_is_available() didn't always exist, and so in the early days on powerpc, we used a mem_init_done global that is set form mem_init() (not perfect but works in practice). And we still have code using that to do the test. Therefore, mask out __GFP_WAIT, __GFP_IO, and __GFP_FS in the slab allocators in early boot code to avoid enabling interrupts. Signed-off-by: Pekka Enberg --- include/linux/gfp.h | 3 +++ include/linux/slab.h | 2 ++ include/linux/slob_def.h | 5 +++++ include/linux/slub_def.h | 2 ++ init/main.c | 1 + mm/slab.c | 18 ++++++++++++++++++ mm/slub.c | 16 ++++++++++++++++ 7 files changed, 47 insertions(+) (limited to 'include') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 0bbc15f5453..3760e7c5de0 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -85,6 +85,9 @@ struct vm_area_struct; __GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\ __GFP_NORETRY|__GFP_NOMEMALLOC) +/* Control slab gfp mask during early boot */ +#define SLAB_GFP_BOOT_MASK __GFP_BITS_MASK & ~(__GFP_WAIT|__GFP_IO|__GFP_FS) + /* Control allocation constraints */ #define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE) diff --git a/include/linux/slab.h b/include/linux/slab.h index 48803064ced..219b8fb4651 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -319,4 +319,6 @@ static inline void *kzalloc_node(size_t size, gfp_t flags, int node) return kmalloc_node(size, flags | __GFP_ZERO, node); } +void __init kmem_cache_init_late(void); + #endif /* _LINUX_SLAB_H */ diff --git a/include/linux/slob_def.h b/include/linux/slob_def.h index 0ec00b39d00..bb5368df4be 100644 --- a/include/linux/slob_def.h +++ b/include/linux/slob_def.h @@ -34,4 +34,9 @@ static __always_inline void *__kmalloc(size_t size, gfp_t flags) return kmalloc(size, flags); } +static inline void kmem_cache_init_late(void) +{ + /* Nothing to do */ +} + #endif /* __LINUX_SLOB_DEF_H */ diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index be5d40c43bd..4dcbc2c7149 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -302,4 +302,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) } #endif +void __init kmem_cache_init_late(void); + #endif /* _LINUX_SLUB_DEF_H */ diff --git a/init/main.c b/init/main.c index b3e8f14c568..f6204f712e7 100644 --- a/init/main.c +++ b/init/main.c @@ -640,6 +640,7 @@ asmlinkage void __init start_kernel(void) "enabled early\n"); early_boot_irqs_on(); local_irq_enable(); + kmem_cache_init_late(); /* * HACK ALERT! This is early. We're enabling the console before diff --git a/mm/slab.c b/mm/slab.c index cd76964b53b..453efcb1c98 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -303,6 +303,12 @@ struct kmem_list3 { int free_touched; /* updated without locking */ }; +/* + * The slab allocator is initialized with interrupts disabled. Therefore, make + * sure early boot allocations don't accidentally enable interrupts. + */ +static gfp_t slab_gfp_mask __read_mostly = SLAB_GFP_BOOT_MASK; + /* * Need this for bootstrapping a per node allocator. */ @@ -1654,6 +1660,14 @@ void __init kmem_cache_init(void) */ } +void __init kmem_cache_init_late(void) +{ + /* + * Interrupts are enabled now so all GFP allocations are safe. + */ + slab_gfp_mask = __GFP_BITS_MASK; +} + static int __init cpucache_init(void) { int cpu; @@ -3354,6 +3368,8 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, unsigned long save_flags; void *ptr; + flags &= slab_gfp_mask; + lockdep_trace_alloc(flags); if (slab_should_failslab(cachep, flags)) @@ -3434,6 +3450,8 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) unsigned long save_flags; void *objp; + flags &= slab_gfp_mask; + lockdep_trace_alloc(flags); if (slab_should_failslab(cachep, flags)) diff --git a/mm/slub.c b/mm/slub.c index 3964d3ce4c1..30354bfeb43 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -178,6 +178,12 @@ static enum { SYSFS /* Sysfs up */ } slab_state = DOWN; +/* + * The slab allocator is initialized with interrupts disabled. Therefore, make + * sure early boot allocations don't accidentally enable interrupts. + */ +static gfp_t slab_gfp_mask __read_mostly = SLAB_GFP_BOOT_MASK; + /* A list of all slab caches on the system */ static DECLARE_RWSEM(slub_lock); static LIST_HEAD(slab_caches); @@ -1595,6 +1601,8 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, unsigned long flags; unsigned int objsize; + gfpflags &= slab_gfp_mask; + lockdep_trace_alloc(gfpflags); might_sleep_if(gfpflags & __GFP_WAIT); @@ -3104,6 +3112,14 @@ void __init kmem_cache_init(void) nr_cpu_ids, nr_node_ids); } +void __init kmem_cache_init_late(void) +{ + /* + * Interrupts are enabled now so all GFP allocations are safe. + */ + slab_gfp_mask = __GFP_BITS_MASK; +} + /* * Find a mergeable slab cache */ -- cgit v1.2.3-70-g09d2 From 7ea2ac9b6632038377cb488c7d1cb60b88164d4d Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Tue, 14 Apr 2009 23:14:17 -0300 Subject: Trivial: fix typo s/balence/balance/ Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Jiri Kosina --- include/linux/ultrasound.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ultrasound.h b/include/linux/ultrasound.h index 6b7703e75ce..71339dc531c 100644 --- a/include/linux/ultrasound.h +++ b/include/linux/ultrasound.h @@ -34,7 +34,7 @@ * _GUS_VOICEOFF - Stops voice (no parameters) * _GUS_VOICEFADE - Stops the voice smoothly. * _GUS_VOICEMODE - Alters the voice mode, don't start or stop voice (P1=voice mode) - * _GUS_VOICEBALA - Sets voice balence (P1, 0=left, 7=middle and 15=right, default 7) + * _GUS_VOICEBALA - Sets voice balance (P1, 0=left, 7=middle and 15=right, default 7) * _GUS_VOICEFREQ - Sets voice (sample) playback frequency (P1=Hz) * _GUS_VOICEVOL - Sets voice volume (P1=volume, 0xfff=max, 0xeff=half, 0x000=off) * _GUS_VOICEVOL2 - Sets voice volume (P1=volume, 0xfff=max, 0xeff=half, 0x000=off) -- cgit v1.2.3-70-g09d2 From 638772c7553f6893f7b346bfee4d46851af59afc Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Wed, 11 Feb 2009 17:25:24 +0800 Subject: fb: add support of LCD display controller on pxa168/910 (base layer) This driver is originally written by Lennert, modified by Green to be feature complete, and ported by Jun Nie and Kevin Liu for pxa168/910 processors. The patch adds support for the on-chip LCD display controller, it currently supports the base (graphics) layer only. Signed-off-by: Lennert Buytenhek Signed-off-by: Green Wan Cc: Peter Liao Signed-off-by: Jun Nie Signed-off-by: Kevin Liu Acked-by: Krzysztof Helt Signed-off-by: Eric Miao --- drivers/video/Kconfig | 10 + drivers/video/Makefile | 1 + drivers/video/pxa168fb.c | 803 +++++++++++++++++++++++++++++++++++++++++++++++ drivers/video/pxa168fb.h | 558 ++++++++++++++++++++++++++++++++ include/video/pxa168fb.h | 127 ++++++++ 5 files changed, 1499 insertions(+) create mode 100644 drivers/video/pxa168fb.c create mode 100644 drivers/video/pxa168fb.h create mode 100644 include/video/pxa168fb.h (limited to 'include') diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig index 0048f1185a6..13fd66a1f10 100644 --- a/drivers/video/Kconfig +++ b/drivers/video/Kconfig @@ -1759,6 +1759,16 @@ config FB_68328 Say Y here if you want to support the built-in frame buffer of the Motorola 68328 CPU family. +config FB_PXA168 + tristate "PXA168/910 LCD framebuffer support" + depends on FB && (CPU_PXA168 || CPU_PXA910) + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + ---help--- + Frame buffer driver for the built-in LCD controller in the Marvell + MMP processor. + config FB_PXA tristate "PXA LCD framebuffer support" depends on FB && ARCH_PXA diff --git a/drivers/video/Makefile b/drivers/video/Makefile index d8d0be5151e..01a819f4737 100644 --- a/drivers/video/Makefile +++ b/drivers/video/Makefile @@ -97,6 +97,7 @@ obj-$(CONFIG_FB_GBE) += gbefb.o obj-$(CONFIG_FB_CIRRUS) += cirrusfb.o obj-$(CONFIG_FB_ASILIANT) += asiliantfb.o obj-$(CONFIG_FB_PXA) += pxafb.o +obj-$(CONFIG_FB_PXA168) += pxa168fb.o obj-$(CONFIG_FB_W100) += w100fb.o obj-$(CONFIG_FB_TMIO) += tmiofb.o obj-$(CONFIG_FB_AU1100) += au1100fb.o diff --git a/drivers/video/pxa168fb.c b/drivers/video/pxa168fb.c new file mode 100644 index 00000000000..84d8327e47d --- /dev/null +++ b/drivers/video/pxa168fb.c @@ -0,0 +1,803 @@ +/* + * linux/drivers/video/pxa168fb.c -- Marvell PXA168 LCD Controller + * + * Copyright (C) 2008 Marvell International Ltd. + * All rights reserved. + * + * 2009-02-16 adapted from original version for PXA168/910 + * Jun Nie + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive for + * more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include